From 65bcbdcb0878dca7b5991b364cccd4e816fb3b62 Mon Sep 17 00:00:00 2001 From: Peng Jian Date: Fri, 1 Sep 2017 22:21:08 +0800 Subject: [PATCH] reuse some subsystem of scylla --- cache.hh | 29 +- conf/cassandra-rackdc.properties | 15 - conf/housekeeping.cfg | 2 - conf/scylla.yaml | 798 - scylla/db/config.cc => config.cc | 11 +- scylla/db/config.hh => config.hh | 33 +- db.cc | 33 +- db.hh | 8 +- ...3_partitioner.cc => default_partitioner.cc | 119 +- default_partitioner.hh | 39 + {scylla/gms => gms}/application_state.cc | 0 {scylla/gms => gms}/application_state.hh | 0 {scylla/gms => gms}/endpoint_state.cc | 0 {scylla/gms => gms}/endpoint_state.hh | 0 {scylla/gms => gms}/failure_detector.cc | 0 {scylla/gms => gms}/failure_detector.hh | 0 {scylla/gms => gms}/feature.hh | 0 {scylla/gms => gms}/gossip_digest.hh | 0 {scylla/gms => gms}/gossip_digest_ack.cc | 0 {scylla/gms => gms}/gossip_digest_ack.hh | 0 {scylla/gms => gms}/gossip_digest_ack2.cc | 0 {scylla/gms => gms}/gossip_digest_ack2.hh | 0 {scylla/gms => gms}/gossip_digest_syn.cc | 0 {scylla/gms => gms}/gossip_digest_syn.hh | 0 {scylla/gms => gms}/gossiper.cc | 5 - {scylla/gms => gms}/gossiper.hh | 0 {scylla/gms => gms}/heart_beat_state.hh | 0 .../i_endpoint_state_change_subscriber.hh | 0 .../i_failure_detection_event_listener.hh | 0 {scylla/gms => gms}/i_failure_detector.hh | 0 {scylla/gms => gms}/inet_address.cc | 0 {scylla/gms => gms}/inet_address.hh | 0 {scylla/gms => gms}/version_generator.cc | 0 {scylla/gms => gms}/version_generator.hh | 0 {scylla/gms => gms}/versioned_value.cc | 0 {scylla/gms => gms}/versioned_value.hh | 0 {scylla/idl => idl}/gossip_digest.idl.hh | 0 {scylla/idl => idl}/idl_test.idl.hh | 0 {scylla/idl => idl}/keys.idl.hh | 0 {scylla/idl => idl}/result.idl.hh | 0 {scylla/idl => idl}/streaming.idl.hh | 0 {scylla/idl => idl}/token.idl.hh | 0 {scylla/idl => idl}/uuid.idl.hh | 0 {scylla/io => io}/i_serializer.hh | 0 {scylla/io => io}/i_versioned_serializer.hh | 0 {scylla/io => io}/io.cc | 0 main.cc | 202 +- .../message => message}/messaging_service.cc | 227 +- .../message => message}/messaging_service.hh | 130 +- .../messaging_service_fwd.hh | 0 redis_storage_proxy.cc => proxy.cc | 16 +- redis_storage_proxy.hh => proxy.hh | 40 +- redis.cc | 2042 +- redis.hh | 201 +- redis_service.cc | 89 - ring.cc | 12 + ring.hh | 12 + scylla/Cql.tokens | 197 - scylla/SCYLLA-VERSION-GEN | 24 - scylla/api/api-doc/cache_service.json | 701 - scylla/api/api-doc/collectd.json | 202 - scylla/api/api-doc/column_family.json | 2821 -- scylla/api/api-doc/commitlog.json | 149 - scylla/api/api-doc/compaction_manager.json | 285 - scylla/api/api-doc/endpoint_snitch_info.json | 75 - scylla/api/api-doc/failure_detector.json | 240 - scylla/api/api-doc/gossiper.json | 153 - scylla/api/api-doc/hinted_handoff.json | 145 - scylla/api/api-doc/lsa.json | 29 - scylla/api/api-doc/messaging_service.json | 262 - scylla/api/api-doc/storage_proxy.json | 1075 - scylla/api/api-doc/storage_service.json | 2260 -- scylla/api/api-doc/stream_manager.json | 271 - scylla/api/api-doc/system.json | 114 - scylla/api/api-doc/utils.json | 105 - scylla/api/api.cc | 151 - scylla/api/api.hh | 219 - scylla/api/api_init.hh | 52 - scylla/api/cache_service.cc | 329 - scylla/api/cache_service.hh | 30 - scylla/api/collectd.cc | 136 - scylla/api/collectd.hh | 30 - scylla/api/column_family.cc | 909 - scylla/api/column_family.hh | 100 - scylla/api/commitlog.cc | 78 - scylla/api/commitlog.hh | 30 - scylla/api/compaction_manager.cc | 141 - scylla/api/compaction_manager.hh | 30 - scylla/api/endpoint_snitch.cc | 48 - scylla/api/endpoint_snitch.hh | 30 - scylla/api/failure_detector.cc | 108 - scylla/api/failure_detector.hh | 30 - scylla/api/gossiper.cc | 71 - scylla/api/gossiper.hh | 30 - scylla/api/hinted_handoff.cc | 75 - scylla/api/hinted_handoff.hh | 30 - scylla/api/lsa.cc | 45 - scylla/api/lsa.hh | 30 - scylla/api/messaging_service.cc | 155 - scylla/api/messaging_service.hh | 30 - scylla/api/storage_proxy.cc | 408 - scylla/api/storage_proxy.hh | 30 - scylla/api/storage_service.cc | 841 - scylla/api/storage_service.hh | 30 - scylla/api/stream_manager.cc | 163 - scylla/api/stream_manager.hh | 30 - scylla/api/system.cc | 70 - scylla/api/system.hh | 30 - scylla/atomic_cell.hh | 396 - scylla/atomic_cell_hash.hh | 80 - scylla/atomic_cell_or_collection.hh | 74 - scylla/auth/auth.cc | 384 - scylla/auth/auth.hh | 125 - scylla/auth/authenticated_user.cc | 72 - scylla/auth/authenticated_user.hh | 83 - scylla/auth/authenticator.cc | 127 - scylla/auth/authenticator.hh | 200 - scylla/auth/authorizer.cc | 104 - scylla/auth/authorizer.hh | 173 - scylla/auth/data_resource.cc | 171 - scylla/auth/data_resource.hh | 159 - scylla/auth/default_authorizer.cc | 240 - scylla/auth/default_authorizer.hh | 78 - scylla/auth/password_authenticator.cc | 351 - scylla/auth/password_authenticator.hh | 73 - scylla/auth/permission.cc | 104 - scylla/auth/permission.hh | 99 - scylla/cache_streamed_mutation.hh | 508 - scylla/caching_options.hh | 99 - scylla/canonical_mutation.cc | 90 - scylla/canonical_mutation.hh | 55 - scylla/cartesian_product.hh | 110 - scylla/cell_locking.hh | 566 - scylla/checked-file-impl.hh | 162 - scylla/clocks-impl.cc | 24 - scylla/clocks-impl.hh | 49 - scylla/clustering_bounds_comparator.hh | 167 - scylla/clustering_key_filter.hh | 68 - scylla/clustering_ranges_walker.hh | 220 - scylla/combine.hh | 58 - scylla/compaction_strategy.hh | 120 - scylla/compatible_ring_position.hh | 67 - scylla/compound.hh | 239 - scylla/compound_compat.hh | 605 - scylla/compress.hh | 155 - .../converting_mutation_partition_applier.hh | 130 - scylla/counters.cc | 296 - scylla/counters.hh | 387 - scylla/cpu_controller.hh | 89 - scylla/cql3/Cql.g | 1831 - scylla/cql3/abstract_marker.cc | 105 - scylla/cql3/abstract_marker.hh | 96 - scylla/cql3/assignment_testable.hh | 122 - scylla/cql3/attributes.cc | 142 - scylla/cql3/attributes.hh | 87 - scylla/cql3/cf_name.cc | 61 - scylla/cql3/cf_name.hh | 65 - scylla/cql3/column_condition.cc | 139 - scylla/cql3/column_condition.hh | 761 - scylla/cql3/column_identifier.cc | 140 - scylla/cql3/column_identifier.hh | 169 - scylla/cql3/column_specification.cc | 56 - scylla/cql3/column_specification.hh | 82 - scylla/cql3/constants.cc | 173 - scylla/cql3/constants.hh | 255 - scylla/cql3/cql3_type.cc | 395 - scylla/cql3/cql3_type.hh | 391 - scylla/cql3/cql_statement.hh | 119 - scylla/cql3/error_collector.hh | 356 - scylla/cql3/error_listener.hh | 75 - scylla/cql3/functions/abstract_function.hh | 127 - scylla/cql3/functions/aggregate_fcts.hh | 293 - scylla/cql3/functions/aggregate_function.hh | 98 - .../cql3/functions/bytes_conversion_fcts.hh | 107 - scylla/cql3/functions/function.hh | 96 - scylla/cql3/functions/function_call.hh | 88 - scylla/cql3/functions/function_name.hh | 100 - scylla/cql3/functions/functions.cc | 478 - scylla/cql3/functions/functions.hh | 186 - .../functions/native_aggregate_function.hh | 86 - scylla/cql3/functions/native_function.hh | 71 - .../cql3/functions/native_scalar_function.hh | 98 - scylla/cql3/functions/scalar_function.hh | 66 - scylla/cql3/functions/time_uuid_fcts.hh | 134 - scylla/cql3/functions/token_fct.hh | 73 - scylla/cql3/functions/uuid_fcts.hh | 62 - scylla/cql3/index_name.cc | 70 - scylla/cql3/index_name.hh | 63 - scylla/cql3/keyspace_element_name.cc | 74 - scylla/cql3/keyspace_element_name.hh | 90 - scylla/cql3/lists.cc | 506 - scylla/cql3/lists.hh | 210 - scylla/cql3/maps.cc | 372 - scylla/cql3/maps.hh | 152 - scylla/cql3/multi_column_relation.hh | 238 - scylla/cql3/operation.cc | 301 - scylla/cql3/operation.hh | 266 - scylla/cql3/operation_impl.hh | 94 - scylla/cql3/operator.cc | 57 - scylla/cql3/operator.hh | 115 - scylla/cql3/query_options.cc | 210 - scylla/cql3/query_options.hh | 188 - scylla/cql3/query_options_fwd.hh | 29 - scylla/cql3/query_processor.cc | 540 - scylla/cql3/query_processor.hh | 559 - scylla/cql3/relation.cc | 57 - scylla/cql3/relation.hh | 285 - .../cql3/restrictions/abstract_restriction.hh | 160 - .../forwarding_primary_key_restrictions.hh | 139 - .../restrictions/multi_column_restriction.hh | 494 - .../restrictions/primary_key_restrictions.hh | 100 - scylla/cql3/restrictions/restriction.hh | 137 - scylla/cql3/restrictions/restrictions.hh | 115 - .../single_column_primary_key_restrictions.hh | 411 - .../restrictions/single_column_restriction.hh | 508 - .../single_column_restrictions.hh | 239 - .../restrictions/statement_restrictions.cc | 687 - .../restrictions/statement_restrictions.hh | 407 - scylla/cql3/restrictions/term_slice.hh | 172 - scylla/cql3/restrictions/token_restriction.hh | 267 - scylla/cql3/result_set.cc | 193 - scylla/cql3/result_set.hh | 168 - .../selection/abstract_function_selector.cc | 107 - .../selection/abstract_function_selector.hh | 98 - .../selection/aggregate_function_selector.hh | 90 - scylla/cql3/selection/field_selector.hh | 127 - scylla/cql3/selection/raw_selector.hh | 85 - .../selection/scalar_function_selector.hh | 94 - scylla/cql3/selection/selectable.cc | 150 - scylla/cql3/selection/selectable.hh | 115 - .../selectable_with_field_selection.hh | 80 - scylla/cql3/selection/selection.cc | 434 - scylla/cql3/selection/selection.hh | 280 - scylla/cql3/selection/selector.cc | 41 - scylla/cql3/selection/selector.hh | 193 - scylla/cql3/selection/selector_factories.cc | 103 - scylla/cql3/selection/selector_factories.hh | 171 - scylla/cql3/selection/simple_selector.cc | 55 - scylla/cql3/selection/simple_selector.hh | 123 - scylla/cql3/selection/writetime_or_ttl.hh | 79 - .../selection/writetime_or_ttl_selector.hh | 141 - scylla/cql3/sets.cc | 321 - scylla/cql3/sets.hh | 145 - scylla/cql3/single_column_relation.cc | 167 - scylla/cql3/single_column_relation.hh | 208 - .../statements/alter_keyspace_statement.cc | 106 - .../statements/alter_keyspace_statement.hh | 69 - .../cql3/statements/alter_table_statement.cc | 389 - .../cql3/statements/alter_table_statement.hh | 87 - .../cql3/statements/alter_type_statement.cc | 243 - .../cql3/statements/alter_type_statement.hh | 107 - .../cql3/statements/alter_user_statement.cc | 110 - .../cql3/statements/alter_user_statement.hh | 72 - .../cql3/statements/alter_view_statement.cc | 125 - .../cql3/statements/alter_view_statement.hh | 72 - .../statements/authentication_statement.cc | 83 - .../statements/authentication_statement.hh | 75 - .../statements/authorization_statement.cc | 90 - .../statements/authorization_statement.hh | 82 - scylla/cql3/statements/batch_statement.cc | 454 - scylla/cql3/statements/batch_statement.hh | 156 - scylla/cql3/statements/bound.hh | 74 - scylla/cql3/statements/cf_prop_defs.cc | 248 - scylla/cql3/statements/cf_prop_defs.hh | 114 - scylla/cql3/statements/cf_properties.hh | 103 - scylla/cql3/statements/cf_statement.cc | 88 - .../cql3/statements/create_index_statement.cc | 277 - .../cql3/statements/create_index_statement.hh | 100 - .../statements/create_keyspace_statement.cc | 131 - .../statements/create_keyspace_statement.hh | 91 - .../cql3/statements/create_table_statement.cc | 390 - .../cql3/statements/create_table_statement.hh | 155 - .../cql3/statements/create_type_statement.cc | 164 - .../cql3/statements/create_type_statement.hh | 80 - .../cql3/statements/create_user_statement.cc | 87 - .../cql3/statements/create_user_statement.hh | 70 - .../cql3/statements/create_view_statement.cc | 353 - .../cql3/statements/create_view_statement.hh | 78 - scylla/cql3/statements/delete_statement.cc | 127 - scylla/cql3/statements/delete_statement.hh | 89 - .../cql3/statements/drop_index_statement.cc | 127 - .../cql3/statements/drop_index_statement.hh | 77 - .../statements/drop_keyspace_statement.cc | 103 - .../statements/drop_keyspace_statement.hh | 69 - .../cql3/statements/drop_table_statement.cc | 104 - .../cql3/statements/drop_table_statement.hh | 68 - scylla/cql3/statements/drop_type_statement.cc | 176 - scylla/cql3/statements/drop_type_statement.hh | 71 - scylla/cql3/statements/drop_user_statement.cc | 92 - scylla/cql3/statements/drop_user_statement.hh | 66 - scylla/cql3/statements/drop_view_statement.cc | 104 - scylla/cql3/statements/drop_view_statement.hh | 72 - scylla/cql3/statements/grant_statement.cc | 50 - scylla/cql3/statements/grant_statement.hh | 61 - scylla/cql3/statements/index_prop_defs.cc | 80 - scylla/cql3/statements/index_prop_defs.hh | 70 - scylla/cql3/statements/index_target.cc | 109 - scylla/cql3/statements/index_target.hh | 92 - scylla/cql3/statements/ks_prop_defs.cc | 96 - scylla/cql3/statements/ks_prop_defs.hh | 88 - .../statements/list_permissions_statement.cc | 131 - .../statements/list_permissions_statement.hh | 73 - .../cql3/statements/list_users_statement.cc | 63 - .../cql3/statements/list_users_statement.hh | 62 - .../cql3/statements/modification_statement.cc | 667 - .../cql3/statements/modification_statement.hh | 376 - scylla/cql3/statements/parsed_statement.cc | 90 - .../permission_altering_statement.cc | 96 - .../permission_altering_statement.hh | 67 - scylla/cql3/statements/prepared_statement.hh | 91 - .../cql3/statements/property_definitions.cc | 201 - .../cql3/statements/property_definitions.hh | 106 - scylla/cql3/statements/raw/batch_statement.hh | 93 - scylla/cql3/statements/raw/cf_statement.hh | 81 - .../cql3/statements/raw/delete_statement.hh | 76 - .../cql3/statements/raw/insert_statement.hh | 88 - .../statements/raw/modification_statement.hh | 97 - .../cql3/statements/raw/parsed_statement.hh | 83 - .../cql3/statements/raw/select_statement.hh | 158 - .../cql3/statements/raw/update_statement.hh | 91 - scylla/cql3/statements/raw/use_statement.hh | 68 - scylla/cql3/statements/request_validations.hh | 173 - scylla/cql3/statements/revoke_statement.cc | 50 - scylla/cql3/statements/revoke_statement.hh | 61 - .../statements/schema_altering_statement.cc | 116 - .../statements/schema_altering_statement.hh | 95 - scylla/cql3/statements/select_statement.cc | 675 - scylla/cql3/statements/select_statement.hh | 425 - scylla/cql3/statements/statement_type.hh | 109 - scylla/cql3/statements/truncate_statement.cc | 113 - scylla/cql3/statements/truncate_statement.hh | 80 - scylla/cql3/statements/update_statement.cc | 218 - scylla/cql3/statements/update_statement.hh | 79 - scylla/cql3/statements/use_statement.cc | 115 - scylla/cql3/statements/use_statement.hh | 81 - scylla/cql3/stats.hh | 40 - scylla/cql3/term.hh | 237 - scylla/cql3/token_relation.cc | 138 - scylla/cql3/token_relation.hh | 129 - scylla/cql3/tuples.hh | 430 - scylla/cql3/type_cast.hh | 90 - scylla/cql3/untyped_result_set.cc | 98 - scylla/cql3/untyped_result_set.hh | 179 - scylla/cql3/update_parameters.cc | 74 - scylla/cql3/update_parameters.hh | 206 - scylla/cql3/user_options.cc | 63 - scylla/cql3/user_options.hh | 62 - scylla/cql3/user_types.cc | 191 - scylla/cql3/user_types.hh | 91 - scylla/cql3/ut_name.cc | 76 - scylla/cql3/ut_name.hh | 74 - scylla/cql3/util.hh | 86 - scylla/cql3/values.hh | 185 - scylla/cql3/variable_specifications.cc | 98 - scylla/cql3/variable_specifications.hh | 78 - scylla/cql_serialization_format.hh | 52 - scylla/database.cc | 4129 -- scylla/database.hh | 1385 - scylla/database_fwd.hh | 50 - scylla/db/batchlog_manager.cc | 409 - scylla/db/batchlog_manager.hh | 119 - scylla/db/commitlog/commitlog.cc | 1948 - scylla/db/commitlog/commitlog.hh | 360 - scylla/db/commitlog/commitlog_entry.cc | 65 - scylla/db/commitlog/commitlog_entry.hh | 85 - scylla/db/commitlog/commitlog_replayer.cc | 398 - scylla/db/commitlog/commitlog_replayer.hh | 75 - scylla/db/commitlog/replay_position.hh | 136 - scylla/db/commitlog/rp_set.hh | 88 - scylla/db/consistency_level.cc | 333 - scylla/db/consistency_level.hh | 215 - scylla/db/consistency_level_type.hh | 64 - scylla/db/cql_type_parser.cc | 198 - scylla/db/cql_type_parser.hh | 70 - scylla/db/heat_load_balance.cc | 475 - scylla/db/heat_load_balance.hh | 154 - scylla/db/index/secondary_index.cc | 48 - scylla/db/index/secondary_index.hh | 389 - scylla/db/legacy_schema_migrator.cc | 644 - scylla/db/legacy_schema_migrator.hh | 61 - scylla/db/marshal/type_parser.cc | 324 - scylla/db/marshal/type_parser.hh | 447 - scylla/db/query_context.hh | 72 - scylla/db/read_repair_decision.hh | 62 - scylla/db/schema_tables.cc | 2568 -- scylla/db/schema_tables.hh | 192 - scylla/db/size_estimates_virtual_reader.hh | 285 - scylla/db/system_keyspace.cc | 1778 - scylla/db/system_keyspace.hh | 651 - scylla/db/view/view.cc | 900 - scylla/db/view/view.hh | 99 - scylla/db/write_type.hh | 62 - scylla/db_clock.hh | 67 - scylla/debug.hh | 34 - scylla/dht/boot_strapper.cc | 117 - scylla/dht/boot_strapper.hh | 99 - scylla/dht/byte_ordered_partitioner.cc | 187 - scylla/dht/byte_ordered_partitioner.hh | 87 - scylla/dht/i_partitioner.cc | 564 - scylla/dht/i_partitioner.hh | 729 - scylla/dht/murmur3_partitioner.hh | 69 - scylla/dht/random_partitioner.cc | 272 - scylla/dht/random_partitioner.hh | 54 - scylla/dht/range_streamer.cc | 268 - scylla/dht/range_streamer.hh | 174 - scylla/dht/token_range_endpoints.hh | 40 - scylla/digest_algorithm.hh | 31 - scylla/disk-error-handler.cc | 44 - scylla/disk-error-handler.hh | 102 - scylla/enum_set.hh | 230 - scylla/exceptions/exceptions.cc | 49 - scylla/exceptions/exceptions.hh | 268 - .../unrecognized_entity_exception.hh | 78 - scylla/fix_system_distributed_tables.py | 158 - scylla/fnv1a_hasher.hh | 54 - scylla/frozen_mutation.cc | 269 - scylla/frozen_mutation.hh | 111 - scylla/frozen_schema.cc | 60 - scylla/frozen_schema.hh | 41 - scylla/gc_clock.hh | 59 - scylla/hashing.hh | 160 - scylla/hashing_partition_visitor.hh | 88 - scylla/idl/cache_temperature.idl.hh | 24 - scylla/idl/commitlog.idl.hh | 25 - scylla/idl/consistency_level.idl.hh | 38 - scylla/idl/frozen_mutation.idl.hh | 24 - scylla/idl/frozen_schema.idl.hh | 42 - scylla/idl/mutation.idl.hh | 155 - scylla/idl/paging_state.idl.hh | 9 - scylla/idl/partition_checksum.idl.hh | 29 - scylla/idl/query.idl.hh | 37 - scylla/idl/range.idl.hh | 40 - scylla/idl/read_command.idl.hh | 53 - scylla/idl/reconcilable_result.idl.hh | 31 - scylla/idl/replay_position.idl.hh | 27 - scylla/idl/ring_position.idl.hh | 29 - scylla/idl/tracing.idl.hh | 43 - scylla/idl/truncation_record.idl.hh | 30 - scylla/index/secondary_index_manager.cc | 53 - scylla/index/secondary_index_manager.hh | 61 - scylla/init.cc | 154 - scylla/init.hh | 49 - scylla/interface/cassandra.thrift | 955 - scylla/intrusive_set_external_comparator.hh | 229 - scylla/json.hh | 65 - scylla/keys.cc | 98 - scylla/keys.hh | 779 - scylla/lister.cc | 77 - scylla/lister.hh | 165 - .../locator/abstract_replication_strategy.cc | 215 - .../locator/abstract_replication_strategy.hh | 126 - scylla/locator/ec2_multi_region_snitch.cc | 132 - scylla/locator/ec2_multi_region_snitch.hh | 58 - scylla/locator/ec2_snitch.cc | 128 - scylla/locator/ec2_snitch.hh | 48 - .../everywhere_replication_strategy.cc | 51 - .../everywhere_replication_strategy.hh | 64 - .../locator/gossiping_property_file_snitch.cc | 349 - .../locator/gossiping_property_file_snitch.hh | 143 - scylla/locator/local_strategy.cc | 57 - scylla/locator/local_strategy.hh | 56 - scylla/locator/locator.cc | 27 - scylla/locator/network_topology_strategy.cc | 246 - scylla/locator/network_topology_strategy.hh | 102 - scylla/locator/production_snitch_base.cc | 59 - scylla/locator/production_snitch_base.hh | 206 - scylla/locator/rack_inferring_snitch.cc | 29 - scylla/locator/rack_inferring_snitch.hh | 76 - scylla/locator/reconnectable_snitch_helper.hh | 133 - scylla/locator/simple_snitch.cc | 30 - scylla/locator/simple_snitch.hh | 96 - scylla/locator/simple_strategy.cc | 88 - scylla/locator/simple_strategy.hh | 44 - scylla/locator/snitch_base.cc | 147 - scylla/locator/snitch_base.hh | 437 - scylla/locator/token_metadata.cc | 672 - scylla/locator/token_metadata.hh | 1030 - scylla/log.hh | 38 - scylla/map_difference.hh | 87 - scylla/md5_hasher.hh | 49 - scylla/memtable-sstable.hh | 41 - scylla/memtable.cc | 576 - scylla/memtable.hh | 206 - scylla/mutation.cc | 266 - scylla/mutation.hh | 191 - scylla/mutation_compactor.hh | 262 - scylla/mutation_partition.cc | 2159 -- scylla/mutation_partition.hh | 1063 - scylla/mutation_partition_applier.hh | 67 - scylla/mutation_partition_serializer.cc | 243 - scylla/mutation_partition_serializer.hh | 56 - scylla/mutation_partition_view.cc | 232 - scylla/mutation_partition_view.hh | 46 - scylla/mutation_partition_visitor.hh | 78 - scylla/mutation_query.cc | 94 - scylla/mutation_query.hh | 148 - scylla/mutation_reader.cc | 359 - scylla/mutation_reader.hh | 511 - scylla/noexcept_traits.hh | 78 - scylla/nway_merger.hh | 267 - scylla/partition_builder.hh | 75 - scylla/partition_range_compat.hh | 175 - scylla/partition_slice_builder.cc | 153 - scylla/partition_slice_builder.hh | 59 - scylla/partition_snapshot_reader.hh | 296 - scylla/partition_snapshot_row_cursor.hh | 210 - scylla/partition_version.cc | 574 - scylla/partition_version.hh | 363 - scylla/position_in_partition.hh | 462 - scylla/query-request.hh | 197 - scylla/query-result-reader.hh | 193 - scylla/query-result-set.cc | 201 - scylla/query-result-set.hh | 137 - scylla/query-result-writer.hh | 196 - scylla/query-result.hh | 406 - scylla/query.cc | 272 - scylla/query_result_merger.hh | 58 - scylla/range.hh | 723 - scylla/range_tombstone.cc | 91 - scylla/range_tombstone.hh | 256 - scylla/range_tombstone_list.cc | 415 - scylla/range_tombstone_list.hh | 180 - scylla/read_context.hh | 243 - scylla/release.cc | 36 - scylla/release.hh | 26 - scylla/repair/range_split.hh | 76 - scylla/repair/repair.cc | 1177 - scylla/repair/repair.hh | 121 - scylla/reversibly_mergeable.hh | 69 - scylla/row_cache.cc | 1024 - scylla/row_cache.hh | 493 - scylla/schema.cc | 1262 - scylla/schema.hh | 777 - scylla/schema_builder.hh | 273 - scylla/schema_mutations.cc | 113 - scylla/schema_mutations.hh | 116 - scylla/schema_registry.cc | 303 - scylla/schema_registry.hh | 170 - scylla/schema_upgrader.hh | 68 - scylla/scripts/git-archive-all | 494 - scylla/scripts/scylla_current_repo | 37 - scylla/scripts/scylla_install_pkg | 106 - scylla/scripts/update-version | 11 - scylla/scylla-blocktune | 109 - scylla/scylla-housekeeping | 164 - scylla/seastarx.hh | 42 - scylla/serialization_visitors.hh | 156 - scylla/serializer.hh | 160 - scylla/serializer_impl.hh | 506 - scylla/service/cache_hitrate_calculator.hh | 46 - scylla/service/client_state.cc | 203 - scylla/service/client_state.hh | 290 - .../service/endpoint_lifecycle_subscriber.hh | 98 - scylla/service/load_broadcaster.hh | 100 - scylla/service/migration_listener.hh | 78 - scylla/service/migration_manager.cc | 925 - scylla/service/migration_manager.hh | 175 - scylla/service/migration_task.cc | 70 - scylla/service/migration_task.hh | 55 - scylla/service/misc_services.cc | 182 - scylla/service/pager/paging_state.cc | 86 - scylla/service/pager/paging_state.hh | 87 - scylla/service/pager/query_pager.hh | 116 - scylla/service/pager/query_pagers.cc | 363 - scylla/service/pager/query_pagers.hh | 75 - scylla/service/priority_manager.cc | 27 - scylla/service/priority_manager.hh | 108 - scylla/service/query_state.hh | 64 - scylla/service/storage_proxy.cc | 4169 -- scylla/service/storage_proxy.hh | 424 - scylla/service/storage_service.cc | 3557 -- scylla/service/storage_service.hh | 2254 -- scylla/sstable_mutation_readers.hh | 48 - scylla/sstables/atomic_deletion.cc | 138 - scylla/sstables/atomic_deletion.hh | 94 - scylla/sstables/binary_search.hh | 89 - scylla/sstables/column_name_helper.hh | 76 - scylla/sstables/compaction.cc | 615 - scylla/sstables/compaction.hh | 139 - scylla/sstables/compaction_manager.cc | 561 - scylla/sstables/compaction_manager.hh | 176 - scylla/sstables/compaction_strategy.cc | 948 - scylla/sstables/compress.cc | 346 - scylla/sstables/compress.hh | 205 - scylla/sstables/consumer.hh | 323 - .../date_tiered_compaction_strategy.hh | 427 - scylla/sstables/disk_types.hh | 113 - scylla/sstables/downsampling.hh | 205 - scylla/sstables/exceptions.hh | 53 - scylla/sstables/filter.cc | 65 - scylla/sstables/filter.hh | 45 - scylla/sstables/hyperloglog.hh | 344 - scylla/sstables/index_reader.hh | 633 - scylla/sstables/key.hh | 162 - scylla/sstables/leveled_manifest.hh | 699 - scylla/sstables/metadata_collector.hh | 338 - scylla/sstables/partition.cc | 1290 - scylla/sstables/remove.hh | 34 - scylla/sstables/row.cc | 480 - scylla/sstables/row.hh | 98 - scylla/sstables/shared_index_lists.hh | 130 - scylla/sstables/sstable_set.hh | 67 - scylla/sstables/sstables.cc | 2867 -- scylla/sstables/sstables.hh | 858 - scylla/sstables/streaming_histogram.hh | 249 - scylla/sstables/types.hh | 431 - scylla/sstables/writer.hh | 251 - scylla/stdx.hh | 27 - scylla/streamed_mutation.cc | 688 - scylla/streamed_mutation.hh | 790 - scylla/streaming/prepare_message.hh | 68 - scylla/streaming/progress_info.cc | 49 - scylla/streaming/progress_info.hh | 82 - scylla/streaming/session_info.cc | 99 - scylla/streaming/session_info.hh | 159 - scylla/streaming/stream_coordinator.cc | 107 - scylla/streaming/stream_coordinator.hh | 95 - scylla/streaming/stream_detail.hh | 59 - scylla/streaming/stream_event.hh | 97 - scylla/streaming/stream_event_handler.hh | 59 - scylla/streaming/stream_exception.hh | 60 - scylla/streaming/stream_manager.cc | 294 - scylla/streaming/stream_manager.hh | 179 - scylla/streaming/stream_plan.cc | 83 - scylla/streaming/stream_plan.hh | 147 - scylla/streaming/stream_receive_task.cc | 53 - scylla/streaming/stream_receive_task.hh | 83 - scylla/streaming/stream_request.cc | 52 - scylla/streaming/stream_request.hh | 71 - scylla/streaming/stream_result_future.cc | 156 - scylla/streaming/stream_result_future.hh | 134 - scylla/streaming/stream_session.cc | 554 - scylla/streaming/stream_session.hh | 344 - scylla/streaming/stream_session_state.cc | 63 - scylla/streaming/stream_session_state.hh | 56 - scylla/streaming/stream_state.hh | 73 - scylla/streaming/stream_summary.cc | 50 - scylla/streaming/stream_summary.hh | 69 - scylla/streaming/stream_task.cc | 51 - scylla/streaming/stream_task.hh | 89 - scylla/streaming/stream_transfer_task.cc | 193 - scylla/streaming/stream_transfer_task.hh | 87 - scylla/supervisor.cc | 74 - scylla/supervisor.hh | 46 - scylla/swagger-ui/.dockerignore | 4 - scylla/swagger-ui/.gitattributes | 13 - scylla/swagger-ui/.gitignore | 13 - scylla/swagger-ui/.jshintignore | 5 - scylla/swagger-ui/.jshintrc | 39 - scylla/swagger-ui/.npmignore | 9 - scylla/swagger-ui/.travis.yml | 10 - scylla/swagger-ui/CONTRIBUTING.md | 8 - scylla/swagger-ui/Dockerfile | 22 - scylla/swagger-ui/LICENSE | 11 - scylla/swagger-ui/README.md | 242 - scylla/swagger-ui/bower.json | 26 - scylla/swagger-ui/dist/css/print.css | 1172 - scylla/swagger-ui/dist/css/reset.css | 125 - scylla/swagger-ui/dist/css/screen.css | 1273 - scylla/swagger-ui/dist/css/style.css | 250 - scylla/swagger-ui/dist/css/typography.css | 26 - .../dist/fonts/droid-sans-v6-latin-700.eot | Bin 22922 -> 0 bytes .../dist/fonts/droid-sans-v6-latin-700.svg | 411 - .../dist/fonts/droid-sans-v6-latin-700.ttf | Bin 40513 -> 0 bytes .../dist/fonts/droid-sans-v6-latin-700.woff | Bin 25992 -> 0 bytes .../dist/fonts/droid-sans-v6-latin-700.woff2 | Bin 11480 -> 0 bytes .../fonts/droid-sans-v6-latin-regular.eot | Bin 22008 -> 0 bytes .../fonts/droid-sans-v6-latin-regular.svg | 403 - .../fonts/droid-sans-v6-latin-regular.ttf | Bin 39069 -> 0 bytes .../fonts/droid-sans-v6-latin-regular.woff | Bin 24868 -> 0 bytes .../fonts/droid-sans-v6-latin-regular.woff2 | Bin 11304 -> 0 bytes .../swagger-ui/dist/images/explorer_icons.png | Bin 5763 -> 0 bytes .../swagger-ui/dist/images/favicon-16x16.png | Bin 645 -> 0 bytes .../swagger-ui/dist/images/favicon-32x32.png | Bin 1654 -> 0 bytes scylla/swagger-ui/dist/images/favicon.ico | Bin 5430 -> 0 bytes scylla/swagger-ui/dist/images/logo_small.png | Bin 770 -> 0 bytes .../swagger-ui/dist/images/pet_store_api.png | Bin 824 -> 0 bytes scylla/swagger-ui/dist/images/throbber.gif | Bin 9257 -> 0 bytes scylla/swagger-ui/dist/images/wordnik_api.png | Bin 980 -> 0 bytes scylla/swagger-ui/dist/index.html | 102 - scylla/swagger-ui/dist/lib/backbone-min.js | 15 - .../swagger-ui/dist/lib/handlebars-2.0.0.js | 28 - .../swagger-ui/dist/lib/highlight.7.3.pack.js | 1 - .../swagger-ui/dist/lib/jquery-1.8.0.min.js | 2 - .../swagger-ui/dist/lib/jquery.ba-bbq.min.js | 18 - .../swagger-ui/dist/lib/jquery.slideto.min.js | 1 - .../swagger-ui/dist/lib/jquery.wiggle.min.js | 8 - scylla/swagger-ui/dist/lib/marked.js | 1272 - scylla/swagger-ui/dist/lib/swagger-oauth.js | 284 - scylla/swagger-ui/dist/lib/underscore-min.js | 6 - scylla/swagger-ui/dist/lib/underscore-min.map | 1 - scylla/swagger-ui/dist/o2c.html | 20 - scylla/swagger-ui/dist/swagger-ui.js | 32152 ---------------- scylla/swagger-ui/dist/swagger-ui.min.js | 16 - scylla/swagger-ui/gulpfile.js | 148 - scylla/swagger-ui/index.js | 7 - scylla/swagger-ui/lang/en.js | 52 - scylla/swagger-ui/lang/ru.js | 52 - scylla/swagger-ui/lang/translator.js | 38 - scylla/swagger-ui/lib/backbone-min.js | 15 - scylla/swagger-ui/lib/handlebars-2.0.0.js | 28 - scylla/swagger-ui/lib/highlight.7.3.pack.js | 1 - scylla/swagger-ui/lib/jquery-1.8.0.min.js | 2 - scylla/swagger-ui/lib/jquery.ba-bbq.min.js | 18 - scylla/swagger-ui/lib/jquery.slideto.min.js | 1 - scylla/swagger-ui/lib/jquery.wiggle.min.js | 8 - scylla/swagger-ui/lib/marked.js | 1272 - scylla/swagger-ui/lib/swagger-oauth.js | 284 - scylla/swagger-ui/lib/underscore-min.js | 6 - scylla/swagger-ui/lib/underscore-min.map | 1 - scylla/swagger-ui/package.json | 55 - scylla/swagger-ui/src/main/html/css/print.css | 1172 - scylla/swagger-ui/src/main/html/css/reset.css | 125 - .../swagger-ui/src/main/html/css/screen.css | 1273 - scylla/swagger-ui/src/main/html/css/style.css | 250 - .../src/main/html/css/typography.css | 26 - .../html/fonts/droid-sans-v6-latin-700.eot | Bin 22922 -> 0 bytes .../html/fonts/droid-sans-v6-latin-700.svg | 411 - .../html/fonts/droid-sans-v6-latin-700.ttf | Bin 40513 -> 0 bytes .../html/fonts/droid-sans-v6-latin-700.woff | Bin 25992 -> 0 bytes .../html/fonts/droid-sans-v6-latin-700.woff2 | Bin 11480 -> 0 bytes .../fonts/droid-sans-v6-latin-regular.eot | Bin 22008 -> 0 bytes .../fonts/droid-sans-v6-latin-regular.svg | 403 - .../fonts/droid-sans-v6-latin-regular.ttf | Bin 39069 -> 0 bytes .../fonts/droid-sans-v6-latin-regular.woff | Bin 24868 -> 0 bytes .../fonts/droid-sans-v6-latin-regular.woff2 | Bin 11304 -> 0 bytes .../src/main/html/images/explorer_icons.png | Bin 5763 -> 0 bytes .../src/main/html/images/favicon-16x16.png | Bin 645 -> 0 bytes .../src/main/html/images/favicon-32x32.png | Bin 1654 -> 0 bytes .../src/main/html/images/favicon.ico | Bin 5430 -> 0 bytes .../src/main/html/images/logo_small.png | Bin 770 -> 0 bytes .../src/main/html/images/pet_store_api.png | Bin 824 -> 0 bytes .../src/main/html/images/throbber.gif | Bin 9257 -> 0 bytes .../src/main/html/images/wordnik_api.png | Bin 980 -> 0 bytes scylla/swagger-ui/src/main/html/index.html | 102 - scylla/swagger-ui/src/main/html/o2c.html | 20 - .../src/main/javascript/SwaggerUi.js | 265 - scylla/swagger-ui/src/main/javascript/doc.js | 199 - .../src/main/javascript/helpers/handlebars.js | 40 - .../src/main/javascript/view/ApiKeyButton.js | 54 - .../main/javascript/view/BasicAuthButton.js | 49 - .../main/javascript/view/ContentTypeView.js | 11 - .../src/main/javascript/view/HeaderView.js | 48 - .../src/main/javascript/view/MainView.js | 137 - .../src/main/javascript/view/OperationView.js | 718 - .../view/ParameterContentTypeView.js | 12 - .../src/main/javascript/view/ParameterView.js | 108 - .../src/main/javascript/view/ResourceView.js | 73 - .../view/ResponseContentTypeView.js | 11 - .../src/main/javascript/view/SignatureView.js | 60 - .../main/javascript/view/StatusCodeView.js | 26 - scylla/swagger-ui/src/main/less/auth.less | 83 - .../src/main/less/highlight_default.less | 135 - scylla/swagger-ui/src/main/less/print.less | 41 - scylla/swagger-ui/src/main/less/reset.less | 51 - scylla/swagger-ui/src/main/less/screen.less | 139 - scylla/swagger-ui/src/main/less/specs.less | 1107 - scylla/swagger-ui/src/main/less/style.less | 311 - .../template/apikey_button_view.handlebars | 9 - .../basic_auth_button_view.handlebars | 11 - .../src/main/template/content_type.handlebars | 10 - .../src/main/template/main.handlebars | 30 - .../src/main/template/operation.handlebars | 112 - .../src/main/template/param.handlebars | 34 - .../src/main/template/param_list.handlebars | 19 - .../main/template/param_readonly.handlebars | 15 - .../param_readonly_required.handlebars | 15 - .../main/template/param_required.handlebars | 30 - .../parameter_content_type.handlebars | 10 - .../src/main/template/resource.handlebars | 28 - .../template/response_content_type.handlebars | 10 - .../src/main/template/signature.handlebars | 18 - .../src/main/template/status_code.handlebars | 16 - scylla/swagger-ui/test/.jshintrc | 10 - scylla/swagger-ui/test/e2e/driver.js | 10 - scylla/swagger-ui/test/e2e/servers.js | 38 - scylla/swagger-ui/test/e2e/v1.js | 105 - scylla/swagger-ui/test/e2e/v2.js | 120 - scylla/swagger-ui/test/mocha.opts | 1 - .../test/specs/v1.2/petstore/api-docs.json | 60 - .../test/specs/v1.2/petstore/pet.json | 425 - .../test/specs/v1.2/petstore/store.json | 145 - .../test/specs/v1.2/petstore/user.json | 299 - scylla/swagger-ui/test/specs/v2/petstore.json | 953 - scylla/tests/UUID_test.cc | 52 - scylla/tests/allocation_strategy_test.cc | 34 - scylla/tests/anchorless_list_test.cc | 88 - scylla/tests/auth_test.cc | 246 - scylla/tests/batchlog_manager_test.cc | 86 - scylla/tests/bytes_ostream_test.cc | 277 - scylla/tests/cache_streamed_mutation_test.cc | 1272 - scylla/tests/canonical_mutation_test.cc | 71 - scylla/tests/cartesian_product_test.cc | 65 - scylla/tests/cell_locker_test.cc | 286 - scylla/tests/commitlog_test.cc | 549 - scylla/tests/compound_test.cc | 338 - scylla/tests/config_test.cc | 881 - scylla/tests/counter_test.cc | 436 - scylla/tests/cql_assertions.cc | 128 - scylla/tests/cql_assertions.hh | 73 - scylla/tests/cql_query_test.cc | 2292 -- scylla/tests/cql_test_env.cc | 377 - scylla/tests/cql_test_env.hh | 94 - scylla/tests/crc_test.cc | 69 - scylla/tests/database_test.cc | 81 - scylla/tests/dynamic_bitset_test.cc | 225 - scylla/tests/ec2_snitch_test.cc | 104 - scylla/tests/flush_queue_test.cc | 204 - scylla/tests/frozen_mutation_test.cc | 104 - scylla/tests/gossip.cc | 118 - scylla/tests/gossip_test.cc | 58 - .../gossiping_property_file_snitch_test.cc | 119 - scylla/tests/hash_test.cc | 77 - scylla/tests/idl_test.cc | 352 - scylla/tests/input_stream_test.cc | 91 - scylla/tests/keys_test.cc | 162 - scylla/tests/log_histogram_test.cc | 103 - scylla/tests/logalloc_test.cc | 1196 - scylla/tests/lsa_async_eviction_test.cc | 101 - scylla/tests/lsa_sync_eviction_test.cc | 110 - scylla/tests/make_random_string.hh | 38 - scylla/tests/managed_vector_test.cc | 163 - scylla/tests/map_difference_test.cc | 136 - scylla/tests/memory_footprint.cc | 244 - scylla/tests/memtable_snapshot_source.hh | 129 - scylla/tests/memtable_test.cc | 320 - scylla/tests/message.cc | 228 - scylla/tests/murmur_hash_test.cc | 124 - scylla/tests/mutation_assertions.hh | 268 - scylla/tests/mutation_query_test.cc | 532 - scylla/tests/mutation_reader_assertions.hh | 106 - scylla/tests/mutation_reader_test.cc | 343 - scylla/tests/mutation_source_test.cc | 1226 - scylla/tests/mutation_source_test.hh | 60 - scylla/tests/mutation_test.cc | 1708 - .../tests/network_topology_strategy_test.cc | 306 - scylla/tests/nonwrapping_range_test.cc | 386 - scylla/tests/partitioner_test.cc | 1046 - scylla/tests/perf/perf.hh | 140 - scylla/tests/perf/perf_cql_parser.cc | 51 - scylla/tests/perf/perf_fast_forward.cc | 900 - scylla/tests/perf/perf_hash.cc | 57 - scylla/tests/perf/perf_mutation.cc | 57 - scylla/tests/perf/perf_simple_query.cc | 231 - scylla/tests/perf/perf_sstable.cc | 140 - scylla/tests/perf/perf_sstable.hh | 210 - scylla/tests/perf_row_cache_update.cc | 108 - scylla/tests/query_processor_test.cc | 118 - scylla/tests/range_assert.hh | 60 - scylla/tests/range_test.cc | 432 - scylla/tests/range_tombstone_list_test.cc | 692 - scylla/tests/result_set_assertions.cc | 111 - scylla/tests/result_set_assertions.hh | 74 - scylla/tests/row_cache_alloc_stress.cc | 261 - scylla/tests/row_cache_stress_test.cc | 366 - scylla/tests/row_cache_test.cc | 1781 - scylla/tests/schema_change_test.cc | 379 - scylla/tests/schema_registry_test.cc | 134 - scylla/tests/simple_schema.hh | 148 - .../bad_double_dc.property | 20 - .../bad_double_prefer_local.property | 20 - .../bad_double_rack.property | 20 - .../bad_format_1.property | 19 - .../bad_format_2.property | 19 - .../bad_format_3.property | 19 - .../bad_format_4.property | 19 - .../bad_format_5.property | 19 - .../bad_format_6.property | 19 - .../bad_missing_dc.property | 18 - .../bad_missing_rack.property | 18 - .../snitch_property_files/good_1.property | 19 - .../snitch_property_files/good_2.property | 18 - .../good_missing_prefer_local.property | 18 - scylla/tests/snitch_reset_test.cc | 159 - scylla/tests/sstable_assertions.hh | 74 - scylla/tests/sstable_atomic_deletion_test.cc | 170 - scylla/tests/sstable_datafile_test.cc | 3767 -- scylla/tests/sstable_mutation_test.cc | 853 - scylla/tests/sstable_resharding_test.cc | 132 - scylla/tests/sstable_test.cc | 1336 - scylla/tests/sstable_test.hh | 614 - scylla/tests/sstables/badcompression/C | Bin 43 -> 0 bytes .../la-1-big-CompressionInfo.db | Bin 31 -> 0 bytes .../sstables/badcompression/la-1-big-TOC.txt | 8 - .../la-2-big-CompressionInfo.db | Bin 43 -> 0 bytes .../sstables/badcompression/la-2-big-TOC.txt | 8 - scylla/tests/sstables/badtoc/ka-4-big-TOC.txt | 8 - scylla/tests/sstables/badtoc/la-1-big-TOC.txt | 0 scylla/tests/sstables/badtoc/la-2-big-TOC.txt | 9 - scylla/tests/sstables/badtoc/la-3-big-TOC.txt | 4 - .../bigsummary/la-76-big-CompressionInfo.db | Bin 23579 -> 0 bytes .../sstables/bigsummary/la-76-big-Data.db | 0 .../sstables/bigsummary/la-76-big-Digest.sha1 | 1 - .../sstables/bigsummary/la-76-big-Filter.db | Bin 193104 -> 0 bytes .../sstables/bigsummary/la-76-big-Index.db | 0 .../bigsummary/la-76-big-Statistics.db | Bin 9947 -> 0 bytes .../sstables/bigsummary/la-76-big-Summary.db | Bin 446038 -> 0 bytes .../sstables/bigsummary/la-76-big-TOC.txt | 6 - .../sstables/broken_ranges/la-2-big-CRC.db | Bin 60 -> 0 bytes .../sstables/broken_ranges/la-2-big-Data.db | Bin 632 -> 0 bytes .../broken_ranges/la-2-big-Digest.sha1 | 1 - .../sstables/broken_ranges/la-2-big-Filter.db | Bin 16 -> 0 bytes .../sstables/broken_ranges/la-2-big-Index.db | Bin 54 -> 0 bytes .../broken_ranges/la-2-big-Statistics.db | Bin 4543 -> 0 bytes .../broken_ranges/la-2-big-Summary.db | Bin 56 -> 0 bytes .../sstables/broken_ranges/la-2-big-TOC.txt | 8 - .../sstables/compact_dense/la-1-big-CRC.db | Bin 8 -> 0 bytes .../sstables/compact_dense/la-1-big-Data.db | Bin 55 -> 0 bytes .../compact_dense/la-1-big-Digest.sha1 | 1 - .../sstables/compact_dense/la-1-big-Filter.db | Bin 16 -> 0 bytes .../sstables/compact_dense/la-1-big-Index.db | Bin 23 -> 0 bytes .../compact_dense/la-1-big-Statistics.db | Bin 4454 -> 0 bytes .../compact_dense/la-1-big-Summary.db | Bin 107 -> 0 bytes .../sstables/compact_dense/la-1-big-TOC.txt | 8 - .../compact_simple_dense/la-1-big-CRC.db | Bin 8 -> 0 bytes .../compact_simple_dense/la-1-big-Data.db | Bin 95 -> 0 bytes .../compact_simple_dense/la-1-big-Digest.sha1 | 1 - .../compact_simple_dense/la-1-big-Filter.db | Bin 16 -> 0 bytes .../compact_simple_dense/la-1-big-Index.db | Bin 47 -> 0 bytes .../la-1-big-Statistics.db | Bin 4449 -> 0 bytes .../compact_simple_dense/la-1-big-Summary.db | Bin 109 -> 0 bytes .../compact_simple_dense/la-1-big-TOC.txt | 8 - .../sstables/compact_sparse/la-1-big-CRC.db | Bin 8 -> 0 bytes .../sstables/compact_sparse/la-1-big-Data.db | Bin 137 -> 0 bytes .../compact_sparse/la-1-big-Digest.sha1 | 1 - .../compact_sparse/la-1-big-Filter.db | Bin 16 -> 0 bytes .../sstables/compact_sparse/la-1-big-Index.db | Bin 47 -> 0 bytes .../compact_sparse/la-1-big-Statistics.db | Bin 4438 -> 0 bytes .../compact_sparse/la-1-big-Summary.db | Bin 109 -> 0 bytes .../sstables/compact_sparse/la-1-big-TOC.txt | 8 - .../compaction/la-1-big-CompressionInfo.db | Bin 43 -> 0 bytes .../sstables/compaction/la-1-big-Data.db | Bin 80 -> 0 bytes .../sstables/compaction/la-1-big-Digest.sha1 | 1 - .../sstables/compaction/la-1-big-Filter.db | Bin 16 -> 0 bytes .../sstables/compaction/la-1-big-Index.db | Bin 37 -> 0 bytes .../compaction/la-1-big-Statistics.db | Bin 4438 -> 0 bytes .../sstables/compaction/la-1-big-Summary.db | Bin 81 -> 0 bytes .../sstables/compaction/la-1-big-TOC.txt | 8 - .../compaction/la-2-big-CompressionInfo.db | Bin 43 -> 0 bytes .../sstables/compaction/la-2-big-Data.db | Bin 98 -> 0 bytes .../sstables/compaction/la-2-big-Digest.sha1 | 1 - .../sstables/compaction/la-2-big-Filter.db | Bin 16 -> 0 bytes .../sstables/compaction/la-2-big-Index.db | Bin 38 -> 0 bytes .../compaction/la-2-big-Statistics.db | Bin 4438 -> 0 bytes .../sstables/compaction/la-2-big-Summary.db | Bin 83 -> 0 bytes .../sstables/compaction/la-2-big-TOC.txt | 8 - .../compaction/la-3-big-CompressionInfo.db | Bin 43 -> 0 bytes .../sstables/compaction/la-3-big-Data.db | Bin 113 -> 0 bytes .../sstables/compaction/la-3-big-Digest.sha1 | 1 - .../sstables/compaction/la-3-big-Filter.db | Bin 16 -> 0 bytes .../sstables/compaction/la-3-big-Index.db | Bin 54 -> 0 bytes .../compaction/la-3-big-Statistics.db | Bin 4458 -> 0 bytes .../sstables/compaction/la-3-big-Summary.db | Bin 79 -> 0 bytes .../sstables/compaction/la-3-big-TOC.txt | 8 - scylla/tests/sstables/complex/la-1-big-CRC.db | Bin 8 -> 0 bytes .../tests/sstables/complex/la-1-big-Data.db | Bin 1288 -> 0 bytes .../sstables/complex/la-1-big-Digest.sha1 | 1 - .../tests/sstables/complex/la-1-big-Filter.db | Bin 16 -> 0 bytes .../tests/sstables/complex/la-1-big-Index.db | Bin 36 -> 0 bytes .../sstables/complex/la-1-big-Statistics.db | Bin 4474 -> 0 bytes .../sstables/complex/la-1-big-Summary.db | Bin 92 -> 0 bytes .../tests/sstables/complex/la-1-big-TOC.txt | 8 - scylla/tests/sstables/complex/la-2-big-CRC.db | Bin 8 -> 0 bytes .../tests/sstables/complex/la-2-big-Data.db | Bin 542 -> 0 bytes .../sstables/complex/la-2-big-Digest.sha1 | 1 - .../tests/sstables/complex/la-2-big-Filter.db | Bin 16 -> 0 bytes .../tests/sstables/complex/la-2-big-Index.db | Bin 54 -> 0 bytes .../sstables/complex/la-2-big-Statistics.db | Bin 4494 -> 0 bytes .../sstables/complex/la-2-big-Summary.db | Bin 92 -> 0 bytes .../tests/sstables/complex/la-2-big-TOC.txt | 8 - scylla/tests/sstables/complex/la-3-big-CRC.db | Bin 8 -> 0 bytes .../tests/sstables/complex/la-3-big-Data.db | Bin 243 -> 0 bytes .../sstables/complex/la-3-big-Digest.sha1 | 1 - .../tests/sstables/complex/la-3-big-Filter.db | Bin 16 -> 0 bytes .../tests/sstables/complex/la-3-big-Index.db | Bin 36 -> 0 bytes .../sstables/complex/la-3-big-Statistics.db | Bin 4458 -> 0 bytes .../sstables/complex/la-3-big-Summary.db | Bin 92 -> 0 bytes .../tests/sstables/complex/la-3-big-TOC.txt | 8 - .../composite/la-1-big-CompressionInfo.db | Bin 43 -> 0 bytes .../tests/sstables/composite/la-1-big-Data.db | Bin 11486 -> 0 bytes .../sstables/composite/la-1-big-Digest.sha1 | 1 - .../sstables/composite/la-1-big-Filter.db | Bin 40 -> 0 bytes .../sstables/composite/la-1-big-Index.db | Bin 3920 -> 0 bytes .../sstables/composite/la-1-big-Statistics.db | Bin 4500 -> 0 bytes .../sstables/composite/la-1-big-Summary.db | Bin 614 -> 0 bytes .../tests/sstables/composite/la-1-big-TOC.txt | 8 - .../compressed/la-1-big-CompressionInfo.db | Bin 43 -> 0 bytes .../sstables/compressed/la-1-big-Data.db | Bin 172 -> 0 bytes .../sstables/compressed/la-1-big-Digest.sha1 | 1 - .../sstables/compressed/la-1-big-Filter.db | Bin 16 -> 0 bytes .../sstables/compressed/la-1-big-Index.db | Bin 76 -> 0 bytes .../compressed/la-1-big-Statistics.db | Bin 4445 -> 0 bytes .../sstables/compressed/la-1-big-Summary.db | Bin 83 -> 0 bytes .../sstables/compressed/la-1-big-TOC.txt | 8 - .../ks-counter_test-ka-5-CompressionInfo.db | Bin 43 -> 0 bytes .../counter_test/ks-counter_test-ka-5-Data.db | Bin 170 -> 0 bytes .../ks-counter_test-ka-5-Digest.sha1 | 1 - .../ks-counter_test-ka-5-Filter.db | Bin 176 -> 0 bytes .../ks-counter_test-ka-5-Index.db | Bin 18 -> 0 bytes .../ks-counter_test-ka-5-Statistics.db | Bin 4470 -> 0 bytes .../ks-counter_test-ka-5-Summary.db | Bin 80 -> 0 bytes .../counter_test/ks-counter_test-ka-5-TOC.txt | 8 - .../deleted_cell/la-2-big-CompressionInfo.db | Bin 43 -> 0 bytes .../sstables/deleted_cell/la-2-big-Data.db | Bin 52 -> 0 bytes .../deleted_cell/la-2-big-Digest.sha1 | 1 - .../sstables/deleted_cell/la-2-big-Filter.db | Bin 16 -> 0 bytes .../sstables/deleted_cell/la-2-big-Index.db | Bin 19 -> 0 bytes .../deleted_cell/la-2-big-Statistics.db | Bin 4450 -> 0 bytes .../sstables/deleted_cell/la-2-big-Summary.db | Bin 83 -> 0 bytes .../sstables/deleted_cell/la-2-big-TOC.txt | 8 - .../try1-data-ka-3-CompressionInfo.db | Bin 123 -> 0 bytes .../large_partition/try1-data-ka-3-Data.db | Bin 269525 -> 0 bytes .../try1-data-ka-3-Digest.sha1 | 1 - .../large_partition/try1-data-ka-3-Filter.db | Bin 176 -> 0 bytes .../large_partition/try1-data-ka-3-Index.db | Bin 484 -> 0 bytes .../try1-data-ka-3-Statistics.db | Bin 4452 -> 0 bytes .../large_partition/try1-data-ka-3-Summary.db | Bin 74 -> 0 bytes .../large_partition/try1-data-ka-3-TOC.txt | 8 - scylla/tests/sstables/list_pk/la-1-big-CRC.db | Bin 8 -> 0 bytes .../tests/sstables/list_pk/la-1-big-Data.db | Bin 207 -> 0 bytes .../sstables/list_pk/la-1-big-Digest.sha1 | 1 - .../tests/sstables/list_pk/la-1-big-Filter.db | Bin 16 -> 0 bytes .../tests/sstables/list_pk/la-1-big-Index.db | Bin 84 -> 0 bytes .../sstables/list_pk/la-1-big-Statistics.db | Bin 4442 -> 0 bytes .../sstables/list_pk/la-1-big-Summary.db | Bin 117 -> 0 bytes .../tests/sstables/list_pk/la-1-big-TOC.txt | 8 - scylla/tests/sstables/map_pk/la-1-big-CRC.db | Bin 8 -> 0 bytes scylla/tests/sstables/map_pk/la-1-big-Data.db | Bin 207 -> 0 bytes .../sstables/map_pk/la-1-big-Digest.sha1 | 1 - .../tests/sstables/map_pk/la-1-big-Filter.db | Bin 16 -> 0 bytes .../tests/sstables/map_pk/la-1-big-Index.db | Bin 84 -> 0 bytes .../sstables/map_pk/la-1-big-Statistics.db | Bin 4442 -> 0 bytes .../tests/sstables/map_pk/la-1-big-Summary.db | Bin 122 -> 0 bytes scylla/tests/sstables/map_pk/la-1-big-TOC.txt | 8 - ...-test_multi_schema-ka-1-CompressionInfo.db | Bin 43 -> 0 bytes .../test-test_multi_schema-ka-1-Data.db | Bin 113 -> 0 bytes .../test-test_multi_schema-ka-1-Digest.sha1 | 1 - .../test-test_multi_schema-ka-1-Filter.db | Bin 16 -> 0 bytes .../test-test_multi_schema-ka-1-Index.db | Bin 18 -> 0 bytes .../test-test_multi_schema-ka-1-Statistics.db | Bin 4450 -> 0 bytes .../test-test_multi_schema-ka-1-Summary.db | Bin 92 -> 0 bytes .../test-test_multi_schema-ka-1-TOC.txt | 8 - ...kipping_partitions-ka-1-CompressionInfo.db | Bin 43 -> 0 bytes .../ks-test_skipping_partitions-ka-1-Data.db | Bin 267 -> 0 bytes ...-test_skipping_partitions-ka-1-Digest.sha1 | 1 - ...ks-test_skipping_partitions-ka-1-Filter.db | Bin 24 -> 0 bytes .../ks-test_skipping_partitions-ka-1-Index.db | Bin 180 -> 0 bytes ...est_skipping_partitions-ka-1-Statistics.db | Bin 4468 -> 0 bytes ...s-test_skipping_partitions-ka-1-Summary.db | Bin 92 -> 0 bytes .../ks-test_skipping_partitions-ka-1-TOC.txt | 8 - ...romoted_index_read-ka-1-CompressionInfo.db | Bin 43 -> 0 bytes .../ks-promoted_index_read-ka-1-Data.db | Bin 135 -> 0 bytes .../ks-promoted_index_read-ka-1-Digest.sha1 | 1 - .../ks-promoted_index_read-ka-1-Filter.db | Bin 16 -> 0 bytes .../ks-promoted_index_read-ka-1-Index.db | Bin 288 -> 0 bytes .../ks-promoted_index_read-ka-1-Statistics.db | Bin 4474 -> 0 bytes .../ks-promoted_index_read-ka-1-Summary.db | Bin 92 -> 0 bytes .../ks-promoted_index_read-ka-1-TOC.txt | 8 - .../sstables/set/la-1-big-CompressionInfo.db | Bin 43 -> 0 bytes scylla/tests/sstables/set/la-1-big-Data.db | Bin 104 -> 0 bytes .../tests/sstables/set/la-1-big-Digest.sha1 | 1 - scylla/tests/sstables/set/la-1-big-Filter.db | Bin 16 -> 0 bytes scylla/tests/sstables/set/la-1-big-Index.db | Bin 19 -> 0 bytes .../tests/sstables/set/la-1-big-Statistics.db | Bin 4450 -> 0 bytes scylla/tests/sstables/set/la-1-big-Summary.db | Bin 83 -> 0 bytes scylla/tests/sstables/set/la-1-big-TOC.txt | 8 - scylla/tests/sstables/set_pk/la-1-big-CRC.db | Bin 8 -> 0 bytes scylla/tests/sstables/set_pk/la-1-big-Data.db | Bin 207 -> 0 bytes .../sstables/set_pk/la-1-big-Digest.sha1 | 1 - .../tests/sstables/set_pk/la-1-big-Filter.db | Bin 16 -> 0 bytes .../tests/sstables/set_pk/la-1-big-Index.db | Bin 84 -> 0 bytes .../sstables/set_pk/la-1-big-Statistics.db | Bin 4442 -> 0 bytes .../tests/sstables/set_pk/la-1-big-Summary.db | Bin 117 -> 0 bytes scylla/tests/sstables/set_pk/la-1-big-TOC.txt | 8 - ...utation_reads_test-ka-1-CompressionInfo.db | Bin 43 -> 0 bytes ...ks-sliced_mutation_reads_test-ka-1-Data.db | Bin 280 -> 0 bytes ...liced_mutation_reads_test-ka-1-Digest.sha1 | 1 - ...-sliced_mutation_reads_test-ka-1-Filter.db | Bin 16 -> 0 bytes ...s-sliced_mutation_reads_test-ka-1-Index.db | Bin 36 -> 0 bytes ...ced_mutation_reads_test-ka-1-Statistics.db | Bin 4466 -> 0 bytes ...sliced_mutation_reads_test-ka-1-Summary.db | Bin 92 -> 0 bytes ...ks-sliced_mutation_reads_test-ka-1-TOC.txt | 8 - .../test-summary_test-ka-1-CompressionInfo.db | Bin 43 -> 0 bytes .../test-summary_test-ka-1-Data.db | Bin 1539 -> 0 bytes .../test-summary_test-ka-1-Digest.sha1 | 1 - .../test-summary_test-ka-1-Filter.db | Bin 176 -> 0 bytes .../test-summary_test-ka-1-Index.db | Bin 2340 -> 0 bytes .../test-summary_test-ka-1-Statistics.db | Bin 4821 -> 0 bytes .../test-summary_test-ka-1-Summary.db | Bin 80 -> 0 bytes .../test-summary_test-ka-1-TOC.txt | 8 - .../try1-tab-ka-1-CompressionInfo.db | Bin 43 -> 0 bytes .../tombstone_overlap/try1-tab-ka-1-Data.db | Bin 83 -> 0 bytes .../try1-tab-ka-1-Digest.sha1 | 1 - .../tombstone_overlap/try1-tab-ka-1-Filter.db | Bin 16 -> 0 bytes .../tombstone_overlap/try1-tab-ka-1-Index.db | Bin 16 -> 0 bytes .../try1-tab-ka-1-Statistics.db | Bin 4470 -> 0 bytes .../try1-tab-ka-1-Summary.db | Bin 74 -> 0 bytes .../tombstone_overlap/try1-tab-ka-1-TOC.txt | 8 - .../try1-tab-ka-4-CompressionInfo.db | Bin 43 -> 0 bytes .../tombstone_overlap/try1-tab-ka-4-Data.db | Bin 97 -> 0 bytes .../try1-tab-ka-4-Digest.sha1 | 1 - .../tombstone_overlap/try1-tab-ka-4-Filter.db | Bin 16 -> 0 bytes .../tombstone_overlap/try1-tab-ka-4-Index.db | Bin 16 -> 0 bytes .../try1-tab-ka-4-Statistics.db | Bin 4470 -> 0 bytes .../try1-tab-ka-4-Summary.db | Bin 74 -> 0 bytes .../tombstone_overlap/try1-tab-ka-4-TOC.txt | 8 - .../try1-tab2-ka-3-CompressionInfo.db | Bin 43 -> 0 bytes .../tombstone_overlap/try1-tab2-ka-3-Data.db | Bin 156 -> 0 bytes .../try1-tab2-ka-3-Digest.sha1 | 1 - .../try1-tab2-ka-3-Filter.db | Bin 16 -> 0 bytes .../tombstone_overlap/try1-tab2-ka-3-Index.db | Bin 16 -> 0 bytes .../try1-tab2-ka-3-Statistics.db | Bin 4480 -> 0 bytes .../try1-tab2-ka-3-Summary.db | Bin 74 -> 0 bytes .../tombstone_overlap/try1-tab2-ka-3-TOC.txt | 8 - .../sstables/ttl/la-1-big-CompressionInfo.db | Bin 43 -> 0 bytes scylla/tests/sstables/ttl/la-1-big-Data.db | Bin 67 -> 0 bytes .../tests/sstables/ttl/la-1-big-Digest.sha1 | 1 - scylla/tests/sstables/ttl/la-1-big-Filter.db | Bin 16 -> 0 bytes scylla/tests/sstables/ttl/la-1-big-Index.db | Bin 19 -> 0 bytes .../tests/sstables/ttl/la-1-big-Statistics.db | Bin 4450 -> 0 bytes scylla/tests/sstables/ttl/la-1-big-Summary.db | Bin 83 -> 0 bytes scylla/tests/sstables/ttl/la-1-big-TOC.txt | 8 - .../sstables/uncompressed/la-1-big-CRC.db | Bin 8 -> 0 bytes .../sstables/uncompressed/la-1-big-Data.db | Bin 370 -> 0 bytes .../uncompressed/la-1-big-Digest.sha1 | 1 - .../sstables/uncompressed/la-1-big-Filter.db | Bin 16 -> 0 bytes .../sstables/uncompressed/la-1-big-Index.db | Bin 76 -> 0 bytes .../uncompressed/la-1-big-Statistics.db | Bin 4445 -> 0 bytes .../sstables/uncompressed/la-1-big-Summary.db | Bin 95 -> 0 bytes .../sstables/uncompressed/la-1-big-TOC.txt | 8 - .../sstables/uncompressed/la-2-big-CRC.db | Bin 8 -> 0 bytes .../sstables/uncompressed/la-2-big-Data.db | Bin 370 -> 0 bytes .../uncompressed/la-2-big-Digest.sha1 | 1 - .../sstables/uncompressed/la-2-big-Filter.db | Bin 16 -> 0 bytes .../sstables/uncompressed/la-2-big-Index.db | Bin 76 -> 0 bytes .../uncompressed/la-2-big-Statistics.db | Bin 4445 -> 0 bytes .../sstables/uncompressed/la-2-big-TOC.txt | 7 - .../unknown_component/la-1-big-CRC.db | Bin 8 -> 0 bytes .../unknown_component/la-1-big-Data.db | Bin 370 -> 0 bytes .../unknown_component/la-1-big-Digest.sha1 | 1 - .../unknown_component/la-1-big-Filter.db | Bin 16 -> 0 bytes .../unknown_component/la-1-big-Index.db | Bin 76 -> 0 bytes .../unknown_component/la-1-big-Statistics.db | Bin 4445 -> 0 bytes .../unknown_component/la-1-big-Summary.db | Bin 95 -> 0 bytes .../unknown_component/la-1-big-TOC.txt | 9 - .../unknown_component/la-1-big-UNKNOWN.txt | 0 ...ge_tombstone_order-ka-1-CompressionInfo.db | Bin 66 -> 0 bytes ...s-wrong_range_tombstone_order-ka-1-Data.db | Bin 246 -> 0 bytes ...ong_range_tombstone_order-ka-1-Digest.sha1 | 1 - ...wrong_range_tombstone_order-ka-1-Filter.db | Bin 16 -> 0 bytes ...-wrong_range_tombstone_order-ka-1-Index.db | Bin 18 -> 0 bytes ...g_range_tombstone_order-ka-1-Statistics.db | Bin 4503 -> 0 bytes ...rong_range_tombstone_order-ka-1-Summary.db | Bin 92 -> 0 bytes ...s-wrong_range_tombstone_order-ka-1-TOC.txt | 8 - .../wrongrange/la-114-big-CompressionInfo.db | Bin 43 -> 0 bytes .../sstables/wrongrange/la-114-big-Data.db | Bin 583 -> 0 bytes .../wrongrange/la-114-big-Digest.sha1 | 1 - .../sstables/wrongrange/la-114-big-Filter.db | Bin 176 -> 0 bytes .../sstables/wrongrange/la-114-big-Index.db | Bin 20 -> 0 bytes .../wrongrange/la-114-big-Statistics.db | Bin 4551 -> 0 bytes .../sstables/wrongrange/la-114-big-Summary.db | Bin 86 -> 0 bytes .../sstables/wrongrange/la-114-big-TOC.txt | 8 - scylla/tests/storage_proxy_test.cc | 118 - scylla/tests/streamed_mutation_test.cc | 561 - scylla/tests/test-serialization.cc | 185 - scylla/tests/test_services.hh | 47 - scylla/tests/tmpdir.hh | 58 - scylla/tests/total_order_check.hh | 75 - scylla/tests/types_test.cc | 797 - scylla/tests/view_schema_test.cc | 2540 -- scylla/tests/virtual_reader_test.cc | 221 - scylla/thrift/handler.cc | 1895 - scylla/thrift/handler.hh | 33 - scylla/thrift/server.cc | 245 - scylla/thrift/server.hh | 110 - scylla/thrift/thrift_validation.cc | 123 - scylla/thrift/thrift_validation.hh | 674 - scylla/thrift/utils.hh | 34 - scylla/timestamp.hh | 59 - scylla/to_string.hh | 115 - scylla/tombstone.hh | 104 - scylla/tracing/trace_keyspace_helper.cc | 513 - scylla/tracing/trace_keyspace_helper.hh | 322 - scylla/tracing/trace_state.cc | 148 - scylla/tracing/trace_state.hh | 676 - scylla/tracing/tracing.cc | 215 - scylla/tracing/tracing.hh | 680 - scylla/transport/event.cc | 105 - scylla/transport/event.hh | 112 - scylla/transport/event_notifier.cc | 297 - scylla/transport/messages/result_message.hh | 191 - .../transport/messages/result_message_base.hh | 48 - scylla/transport/messages_fwd.hh | 32 - scylla/transport/server.cc | 1925 - scylla/transport/server.hh | 285 - scylla/types.cc | 3208 -- scylla/types.hh | 1638 - scylla/unimplemented.cc | 81 - scylla/unimplemented.hh | 72 - scylla/validation.cc | 116 - scylla/validation.hh | 59 - scylla/version.hh | 72 - scylla/view_info.hh | 73 - redis_server.cc => server.cc | 2 +- redis_server.hh => server.hh | 0 storage_service.cc | 325 + redis_service.hh => storage_service.hh | 38 +- .../bits_operation.cc | 0 .../bits_operation.hh | 0 dict_lsa.cc => structures/dict_lsa.cc | 0 dict_lsa.hh => structures/dict_lsa.hh | 0 geo.cc => structures/geo.cc | 0 geo.hh => structures/geo.hh | 0 hll.cc => structures/hll.cc | 0 hll.hh => structures/hll.hh | 0 list_lsa.cc => structures/list_lsa.cc | 0 list_lsa.hh => structures/list_lsa.hh | 0 sset_lsa.cc => structures/sset_lsa.cc | 0 sset_lsa.hh => structures/sset_lsa.hh | 0 token.cc | 21 + token.hh | 51 + {scylla/utils => utils}/UUID.hh | 0 {scylla/utils => utils}/UUID_gen.cc | 0 {scylla/utils => utils}/UUID_gen.hh | 0 .../utils => utils}/allocation_strategy.hh | 0 {scylla/utils => utils}/anchorless_list.hh | 0 {scylla/utils => utils}/big_decimal.cc | 0 {scylla/utils => utils}/big_decimal.hh | 0 {scylla/utils => utils}/bloom_calculations.cc | 0 {scylla/utils => utils}/bloom_calculations.hh | 0 {scylla/utils => utils}/bloom_filter.cc | 0 {scylla/utils => utils}/bloom_filter.hh | 0 .../utils => utils}/bounded_stats_deque.hh | 0 {scylla => utils}/bytes.cc | 0 {scylla => utils}/bytes.hh | 0 {scylla => utils}/bytes_ostream.hh | 0 {scylla/utils => utils}/class_registrator.hh | 0 {scylla/utils => utils}/crc.hh | 0 {scylla/utils => utils}/data_input.hh | 0 {scylla/utils => utils}/data_output.hh | 0 {scylla/utils => utils}/date.h | 0 {scylla/utils => utils}/div_ceil.hh | 0 {scylla/utils => utils}/dynamic_bitset.cc | 0 {scylla/utils => utils}/dynamic_bitset.hh | 0 .../utils => utils}/estimated_histogram.hh | 0 {scylla/utils => utils}/exceptions.cc | 0 {scylla/utils => utils}/exceptions.hh | 0 .../exponential_backoff_retry.hh | 0 {scylla/utils => utils}/fb_utilities.hh | 0 {scylla/utils => utils}/file_lock.cc | 0 {scylla/utils => utils}/file_lock.hh | 0 {scylla/utils => utils}/flush_queue.hh | 0 {scylla/utils => utils}/hash.hh | 0 {scylla/utils => utils}/histogram.hh | 0 {scylla/utils => utils}/i_filter.cc | 0 {scylla/utils => utils}/i_filter.hh | 0 {scylla/utils => utils}/input_stream.hh | 0 {scylla/utils => utils}/int_range.hh | 0 {scylla/utils => utils}/joinpoint.hh | 0 {scylla/utils => utils}/large_bitset.cc | 0 {scylla/utils => utils}/large_bitset.hh | 0 {scylla/utils => utils}/latency.hh | 0 {scylla/utils => utils}/loading_cache.hh | 0 {scylla/utils => utils}/log_histogram.hh | 0 {scylla/utils => utils}/logalloc.cc | 0 {scylla/utils => utils}/logalloc.hh | 0 {scylla/utils => utils}/managed_bytes.cc | 0 {scylla/utils => utils}/managed_bytes.hh | 0 {scylla/utils => utils}/managed_ref.hh | 0 {scylla/utils => utils}/managed_vector.hh | 0 {scylla/utils => utils}/move.hh | 0 {scylla/utils => utils}/murmur_hash.cc | 0 {scylla/utils => utils}/murmur_hash.hh | 0 {scylla/utils => utils}/mutable_view.hh | 0 {scylla/utils => utils}/optimized_optional.hh | 0 {scylla/utils => utils}/phased_barrier.hh | 0 {scylla/utils => utils}/rate_limiter.cc | 0 {scylla/utils => utils}/rate_limiter.hh | 0 {scylla/utils => utils}/runtime.cc | 0 {scylla/utils => utils}/runtime.hh | 0 {scylla/utils => utils}/sequenced_set.hh | 0 {scylla/utils => utils}/serialization.hh | 0 {scylla/utils => utils}/to_boost_visitor.hh | 0 {scylla/utils => utils}/utils.cc | 0 {scylla/utils => utils}/uuid.cc | 0 .../with_relational_operators.hh | 0 1286 files changed, 604 insertions(+), 246457 deletions(-) delete mode 100644 conf/cassandra-rackdc.properties delete mode 100644 conf/housekeeping.cfg delete mode 100644 conf/scylla.yaml rename scylla/db/config.cc => config.cc (98%) rename scylla/db/config.hh => config.hh (97%) rename scylla/dht/murmur3_partitioner.cc => default_partitioner.cc (63%) create mode 100644 default_partitioner.hh rename {scylla/gms => gms}/application_state.cc (100%) rename {scylla/gms => gms}/application_state.hh (100%) rename {scylla/gms => gms}/endpoint_state.cc (100%) rename {scylla/gms => gms}/endpoint_state.hh (100%) rename {scylla/gms => gms}/failure_detector.cc (100%) rename {scylla/gms => gms}/failure_detector.hh (100%) rename {scylla/gms => gms}/feature.hh (100%) rename {scylla/gms => gms}/gossip_digest.hh (100%) rename {scylla/gms => gms}/gossip_digest_ack.cc (100%) rename {scylla/gms => gms}/gossip_digest_ack.hh (100%) rename {scylla/gms => gms}/gossip_digest_ack2.cc (100%) rename {scylla/gms => gms}/gossip_digest_ack2.hh (100%) rename {scylla/gms => gms}/gossip_digest_syn.cc (100%) rename {scylla/gms => gms}/gossip_digest_syn.hh (100%) rename {scylla/gms => gms}/gossiper.cc (99%) rename {scylla/gms => gms}/gossiper.hh (100%) rename {scylla/gms => gms}/heart_beat_state.hh (100%) rename {scylla/gms => gms}/i_endpoint_state_change_subscriber.hh (100%) rename {scylla/gms => gms}/i_failure_detection_event_listener.hh (100%) rename {scylla/gms => gms}/i_failure_detector.hh (100%) rename {scylla/gms => gms}/inet_address.cc (100%) rename {scylla/gms => gms}/inet_address.hh (100%) rename {scylla/gms => gms}/version_generator.cc (100%) rename {scylla/gms => gms}/version_generator.hh (100%) rename {scylla/gms => gms}/versioned_value.cc (100%) rename {scylla/gms => gms}/versioned_value.hh (100%) rename {scylla/idl => idl}/gossip_digest.idl.hh (100%) rename {scylla/idl => idl}/idl_test.idl.hh (100%) rename {scylla/idl => idl}/keys.idl.hh (100%) rename {scylla/idl => idl}/result.idl.hh (100%) rename {scylla/idl => idl}/streaming.idl.hh (100%) rename {scylla/idl => idl}/token.idl.hh (100%) rename {scylla/idl => idl}/uuid.idl.hh (100%) rename {scylla/io => io}/i_serializer.hh (100%) rename {scylla/io => io}/i_versioned_serializer.hh (100%) rename {scylla/io => io}/io.cc (100%) rename {scylla/message => message}/messaging_service.cc (68%) rename {scylla/message => message}/messaging_service.hh (51%) rename {scylla/message => message}/messaging_service_fwd.hh (100%) rename redis_storage_proxy.cc => proxy.cc (90%) rename redis_storage_proxy.hh => proxy.hh (58%) delete mode 100644 redis_service.cc create mode 100644 ring.cc create mode 100644 ring.hh delete mode 100644 scylla/Cql.tokens delete mode 100755 scylla/SCYLLA-VERSION-GEN delete mode 100644 scylla/api/api-doc/cache_service.json delete mode 100644 scylla/api/api-doc/collectd.json delete mode 100644 scylla/api/api-doc/column_family.json delete mode 100644 scylla/api/api-doc/commitlog.json delete mode 100644 scylla/api/api-doc/compaction_manager.json delete mode 100644 scylla/api/api-doc/endpoint_snitch_info.json delete mode 100644 scylla/api/api-doc/failure_detector.json delete mode 100644 scylla/api/api-doc/gossiper.json delete mode 100644 scylla/api/api-doc/hinted_handoff.json delete mode 100644 scylla/api/api-doc/lsa.json delete mode 100644 scylla/api/api-doc/messaging_service.json delete mode 100644 scylla/api/api-doc/storage_proxy.json delete mode 100644 scylla/api/api-doc/storage_service.json delete mode 100644 scylla/api/api-doc/stream_manager.json delete mode 100644 scylla/api/api-doc/system.json delete mode 100644 scylla/api/api-doc/utils.json delete mode 100644 scylla/api/api.cc delete mode 100644 scylla/api/api.hh delete mode 100644 scylla/api/api_init.hh delete mode 100644 scylla/api/cache_service.cc delete mode 100644 scylla/api/cache_service.hh delete mode 100644 scylla/api/collectd.cc delete mode 100644 scylla/api/collectd.hh delete mode 100644 scylla/api/column_family.cc delete mode 100644 scylla/api/column_family.hh delete mode 100644 scylla/api/commitlog.cc delete mode 100644 scylla/api/commitlog.hh delete mode 100644 scylla/api/compaction_manager.cc delete mode 100644 scylla/api/compaction_manager.hh delete mode 100644 scylla/api/endpoint_snitch.cc delete mode 100644 scylla/api/endpoint_snitch.hh delete mode 100644 scylla/api/failure_detector.cc delete mode 100644 scylla/api/failure_detector.hh delete mode 100644 scylla/api/gossiper.cc delete mode 100644 scylla/api/gossiper.hh delete mode 100644 scylla/api/hinted_handoff.cc delete mode 100644 scylla/api/hinted_handoff.hh delete mode 100644 scylla/api/lsa.cc delete mode 100644 scylla/api/lsa.hh delete mode 100644 scylla/api/messaging_service.cc delete mode 100644 scylla/api/messaging_service.hh delete mode 100644 scylla/api/storage_proxy.cc delete mode 100644 scylla/api/storage_proxy.hh delete mode 100644 scylla/api/storage_service.cc delete mode 100644 scylla/api/storage_service.hh delete mode 100644 scylla/api/stream_manager.cc delete mode 100644 scylla/api/stream_manager.hh delete mode 100644 scylla/api/system.cc delete mode 100644 scylla/api/system.hh delete mode 100644 scylla/atomic_cell.hh delete mode 100644 scylla/atomic_cell_hash.hh delete mode 100644 scylla/atomic_cell_or_collection.hh delete mode 100644 scylla/auth/auth.cc delete mode 100644 scylla/auth/auth.hh delete mode 100644 scylla/auth/authenticated_user.cc delete mode 100644 scylla/auth/authenticated_user.hh delete mode 100644 scylla/auth/authenticator.cc delete mode 100644 scylla/auth/authenticator.hh delete mode 100644 scylla/auth/authorizer.cc delete mode 100644 scylla/auth/authorizer.hh delete mode 100644 scylla/auth/data_resource.cc delete mode 100644 scylla/auth/data_resource.hh delete mode 100644 scylla/auth/default_authorizer.cc delete mode 100644 scylla/auth/default_authorizer.hh delete mode 100644 scylla/auth/password_authenticator.cc delete mode 100644 scylla/auth/password_authenticator.hh delete mode 100644 scylla/auth/permission.cc delete mode 100644 scylla/auth/permission.hh delete mode 100644 scylla/cache_streamed_mutation.hh delete mode 100644 scylla/caching_options.hh delete mode 100644 scylla/canonical_mutation.cc delete mode 100644 scylla/canonical_mutation.hh delete mode 100644 scylla/cartesian_product.hh delete mode 100644 scylla/cell_locking.hh delete mode 100644 scylla/checked-file-impl.hh delete mode 100644 scylla/clocks-impl.cc delete mode 100644 scylla/clocks-impl.hh delete mode 100644 scylla/clustering_bounds_comparator.hh delete mode 100644 scylla/clustering_key_filter.hh delete mode 100644 scylla/clustering_ranges_walker.hh delete mode 100644 scylla/combine.hh delete mode 100644 scylla/compaction_strategy.hh delete mode 100644 scylla/compatible_ring_position.hh delete mode 100644 scylla/compound.hh delete mode 100644 scylla/compound_compat.hh delete mode 100644 scylla/compress.hh delete mode 100644 scylla/converting_mutation_partition_applier.hh delete mode 100644 scylla/counters.cc delete mode 100644 scylla/counters.hh delete mode 100644 scylla/cpu_controller.hh delete mode 100644 scylla/cql3/Cql.g delete mode 100644 scylla/cql3/abstract_marker.cc delete mode 100644 scylla/cql3/abstract_marker.hh delete mode 100644 scylla/cql3/assignment_testable.hh delete mode 100644 scylla/cql3/attributes.cc delete mode 100644 scylla/cql3/attributes.hh delete mode 100644 scylla/cql3/cf_name.cc delete mode 100644 scylla/cql3/cf_name.hh delete mode 100644 scylla/cql3/column_condition.cc delete mode 100644 scylla/cql3/column_condition.hh delete mode 100644 scylla/cql3/column_identifier.cc delete mode 100644 scylla/cql3/column_identifier.hh delete mode 100644 scylla/cql3/column_specification.cc delete mode 100644 scylla/cql3/column_specification.hh delete mode 100644 scylla/cql3/constants.cc delete mode 100644 scylla/cql3/constants.hh delete mode 100644 scylla/cql3/cql3_type.cc delete mode 100644 scylla/cql3/cql3_type.hh delete mode 100644 scylla/cql3/cql_statement.hh delete mode 100644 scylla/cql3/error_collector.hh delete mode 100644 scylla/cql3/error_listener.hh delete mode 100644 scylla/cql3/functions/abstract_function.hh delete mode 100644 scylla/cql3/functions/aggregate_fcts.hh delete mode 100644 scylla/cql3/functions/aggregate_function.hh delete mode 100644 scylla/cql3/functions/bytes_conversion_fcts.hh delete mode 100644 scylla/cql3/functions/function.hh delete mode 100644 scylla/cql3/functions/function_call.hh delete mode 100644 scylla/cql3/functions/function_name.hh delete mode 100644 scylla/cql3/functions/functions.cc delete mode 100644 scylla/cql3/functions/functions.hh delete mode 100644 scylla/cql3/functions/native_aggregate_function.hh delete mode 100644 scylla/cql3/functions/native_function.hh delete mode 100644 scylla/cql3/functions/native_scalar_function.hh delete mode 100644 scylla/cql3/functions/scalar_function.hh delete mode 100644 scylla/cql3/functions/time_uuid_fcts.hh delete mode 100644 scylla/cql3/functions/token_fct.hh delete mode 100644 scylla/cql3/functions/uuid_fcts.hh delete mode 100644 scylla/cql3/index_name.cc delete mode 100644 scylla/cql3/index_name.hh delete mode 100644 scylla/cql3/keyspace_element_name.cc delete mode 100644 scylla/cql3/keyspace_element_name.hh delete mode 100644 scylla/cql3/lists.cc delete mode 100644 scylla/cql3/lists.hh delete mode 100644 scylla/cql3/maps.cc delete mode 100644 scylla/cql3/maps.hh delete mode 100644 scylla/cql3/multi_column_relation.hh delete mode 100644 scylla/cql3/operation.cc delete mode 100644 scylla/cql3/operation.hh delete mode 100644 scylla/cql3/operation_impl.hh delete mode 100644 scylla/cql3/operator.cc delete mode 100644 scylla/cql3/operator.hh delete mode 100644 scylla/cql3/query_options.cc delete mode 100644 scylla/cql3/query_options.hh delete mode 100644 scylla/cql3/query_options_fwd.hh delete mode 100644 scylla/cql3/query_processor.cc delete mode 100644 scylla/cql3/query_processor.hh delete mode 100644 scylla/cql3/relation.cc delete mode 100644 scylla/cql3/relation.hh delete mode 100644 scylla/cql3/restrictions/abstract_restriction.hh delete mode 100644 scylla/cql3/restrictions/forwarding_primary_key_restrictions.hh delete mode 100644 scylla/cql3/restrictions/multi_column_restriction.hh delete mode 100644 scylla/cql3/restrictions/primary_key_restrictions.hh delete mode 100644 scylla/cql3/restrictions/restriction.hh delete mode 100644 scylla/cql3/restrictions/restrictions.hh delete mode 100644 scylla/cql3/restrictions/single_column_primary_key_restrictions.hh delete mode 100644 scylla/cql3/restrictions/single_column_restriction.hh delete mode 100644 scylla/cql3/restrictions/single_column_restrictions.hh delete mode 100644 scylla/cql3/restrictions/statement_restrictions.cc delete mode 100644 scylla/cql3/restrictions/statement_restrictions.hh delete mode 100644 scylla/cql3/restrictions/term_slice.hh delete mode 100644 scylla/cql3/restrictions/token_restriction.hh delete mode 100644 scylla/cql3/result_set.cc delete mode 100644 scylla/cql3/result_set.hh delete mode 100644 scylla/cql3/selection/abstract_function_selector.cc delete mode 100644 scylla/cql3/selection/abstract_function_selector.hh delete mode 100644 scylla/cql3/selection/aggregate_function_selector.hh delete mode 100644 scylla/cql3/selection/field_selector.hh delete mode 100644 scylla/cql3/selection/raw_selector.hh delete mode 100644 scylla/cql3/selection/scalar_function_selector.hh delete mode 100644 scylla/cql3/selection/selectable.cc delete mode 100644 scylla/cql3/selection/selectable.hh delete mode 100644 scylla/cql3/selection/selectable_with_field_selection.hh delete mode 100644 scylla/cql3/selection/selection.cc delete mode 100644 scylla/cql3/selection/selection.hh delete mode 100644 scylla/cql3/selection/selector.cc delete mode 100644 scylla/cql3/selection/selector.hh delete mode 100644 scylla/cql3/selection/selector_factories.cc delete mode 100644 scylla/cql3/selection/selector_factories.hh delete mode 100644 scylla/cql3/selection/simple_selector.cc delete mode 100644 scylla/cql3/selection/simple_selector.hh delete mode 100644 scylla/cql3/selection/writetime_or_ttl.hh delete mode 100644 scylla/cql3/selection/writetime_or_ttl_selector.hh delete mode 100644 scylla/cql3/sets.cc delete mode 100644 scylla/cql3/sets.hh delete mode 100644 scylla/cql3/single_column_relation.cc delete mode 100644 scylla/cql3/single_column_relation.hh delete mode 100644 scylla/cql3/statements/alter_keyspace_statement.cc delete mode 100644 scylla/cql3/statements/alter_keyspace_statement.hh delete mode 100644 scylla/cql3/statements/alter_table_statement.cc delete mode 100644 scylla/cql3/statements/alter_table_statement.hh delete mode 100644 scylla/cql3/statements/alter_type_statement.cc delete mode 100644 scylla/cql3/statements/alter_type_statement.hh delete mode 100644 scylla/cql3/statements/alter_user_statement.cc delete mode 100644 scylla/cql3/statements/alter_user_statement.hh delete mode 100644 scylla/cql3/statements/alter_view_statement.cc delete mode 100644 scylla/cql3/statements/alter_view_statement.hh delete mode 100644 scylla/cql3/statements/authentication_statement.cc delete mode 100644 scylla/cql3/statements/authentication_statement.hh delete mode 100644 scylla/cql3/statements/authorization_statement.cc delete mode 100644 scylla/cql3/statements/authorization_statement.hh delete mode 100644 scylla/cql3/statements/batch_statement.cc delete mode 100644 scylla/cql3/statements/batch_statement.hh delete mode 100644 scylla/cql3/statements/bound.hh delete mode 100644 scylla/cql3/statements/cf_prop_defs.cc delete mode 100644 scylla/cql3/statements/cf_prop_defs.hh delete mode 100644 scylla/cql3/statements/cf_properties.hh delete mode 100644 scylla/cql3/statements/cf_statement.cc delete mode 100644 scylla/cql3/statements/create_index_statement.cc delete mode 100644 scylla/cql3/statements/create_index_statement.hh delete mode 100644 scylla/cql3/statements/create_keyspace_statement.cc delete mode 100644 scylla/cql3/statements/create_keyspace_statement.hh delete mode 100644 scylla/cql3/statements/create_table_statement.cc delete mode 100644 scylla/cql3/statements/create_table_statement.hh delete mode 100644 scylla/cql3/statements/create_type_statement.cc delete mode 100644 scylla/cql3/statements/create_type_statement.hh delete mode 100644 scylla/cql3/statements/create_user_statement.cc delete mode 100644 scylla/cql3/statements/create_user_statement.hh delete mode 100644 scylla/cql3/statements/create_view_statement.cc delete mode 100644 scylla/cql3/statements/create_view_statement.hh delete mode 100644 scylla/cql3/statements/delete_statement.cc delete mode 100644 scylla/cql3/statements/delete_statement.hh delete mode 100644 scylla/cql3/statements/drop_index_statement.cc delete mode 100644 scylla/cql3/statements/drop_index_statement.hh delete mode 100644 scylla/cql3/statements/drop_keyspace_statement.cc delete mode 100644 scylla/cql3/statements/drop_keyspace_statement.hh delete mode 100644 scylla/cql3/statements/drop_table_statement.cc delete mode 100644 scylla/cql3/statements/drop_table_statement.hh delete mode 100644 scylla/cql3/statements/drop_type_statement.cc delete mode 100644 scylla/cql3/statements/drop_type_statement.hh delete mode 100644 scylla/cql3/statements/drop_user_statement.cc delete mode 100644 scylla/cql3/statements/drop_user_statement.hh delete mode 100644 scylla/cql3/statements/drop_view_statement.cc delete mode 100644 scylla/cql3/statements/drop_view_statement.hh delete mode 100644 scylla/cql3/statements/grant_statement.cc delete mode 100644 scylla/cql3/statements/grant_statement.hh delete mode 100644 scylla/cql3/statements/index_prop_defs.cc delete mode 100644 scylla/cql3/statements/index_prop_defs.hh delete mode 100644 scylla/cql3/statements/index_target.cc delete mode 100644 scylla/cql3/statements/index_target.hh delete mode 100644 scylla/cql3/statements/ks_prop_defs.cc delete mode 100644 scylla/cql3/statements/ks_prop_defs.hh delete mode 100644 scylla/cql3/statements/list_permissions_statement.cc delete mode 100644 scylla/cql3/statements/list_permissions_statement.hh delete mode 100644 scylla/cql3/statements/list_users_statement.cc delete mode 100644 scylla/cql3/statements/list_users_statement.hh delete mode 100644 scylla/cql3/statements/modification_statement.cc delete mode 100644 scylla/cql3/statements/modification_statement.hh delete mode 100644 scylla/cql3/statements/parsed_statement.cc delete mode 100644 scylla/cql3/statements/permission_altering_statement.cc delete mode 100644 scylla/cql3/statements/permission_altering_statement.hh delete mode 100644 scylla/cql3/statements/prepared_statement.hh delete mode 100644 scylla/cql3/statements/property_definitions.cc delete mode 100644 scylla/cql3/statements/property_definitions.hh delete mode 100644 scylla/cql3/statements/raw/batch_statement.hh delete mode 100644 scylla/cql3/statements/raw/cf_statement.hh delete mode 100644 scylla/cql3/statements/raw/delete_statement.hh delete mode 100644 scylla/cql3/statements/raw/insert_statement.hh delete mode 100644 scylla/cql3/statements/raw/modification_statement.hh delete mode 100644 scylla/cql3/statements/raw/parsed_statement.hh delete mode 100644 scylla/cql3/statements/raw/select_statement.hh delete mode 100644 scylla/cql3/statements/raw/update_statement.hh delete mode 100644 scylla/cql3/statements/raw/use_statement.hh delete mode 100644 scylla/cql3/statements/request_validations.hh delete mode 100644 scylla/cql3/statements/revoke_statement.cc delete mode 100644 scylla/cql3/statements/revoke_statement.hh delete mode 100644 scylla/cql3/statements/schema_altering_statement.cc delete mode 100644 scylla/cql3/statements/schema_altering_statement.hh delete mode 100644 scylla/cql3/statements/select_statement.cc delete mode 100644 scylla/cql3/statements/select_statement.hh delete mode 100644 scylla/cql3/statements/statement_type.hh delete mode 100644 scylla/cql3/statements/truncate_statement.cc delete mode 100644 scylla/cql3/statements/truncate_statement.hh delete mode 100644 scylla/cql3/statements/update_statement.cc delete mode 100644 scylla/cql3/statements/update_statement.hh delete mode 100644 scylla/cql3/statements/use_statement.cc delete mode 100644 scylla/cql3/statements/use_statement.hh delete mode 100644 scylla/cql3/stats.hh delete mode 100644 scylla/cql3/term.hh delete mode 100644 scylla/cql3/token_relation.cc delete mode 100644 scylla/cql3/token_relation.hh delete mode 100644 scylla/cql3/tuples.hh delete mode 100644 scylla/cql3/type_cast.hh delete mode 100644 scylla/cql3/untyped_result_set.cc delete mode 100644 scylla/cql3/untyped_result_set.hh delete mode 100644 scylla/cql3/update_parameters.cc delete mode 100644 scylla/cql3/update_parameters.hh delete mode 100644 scylla/cql3/user_options.cc delete mode 100644 scylla/cql3/user_options.hh delete mode 100644 scylla/cql3/user_types.cc delete mode 100644 scylla/cql3/user_types.hh delete mode 100644 scylla/cql3/ut_name.cc delete mode 100644 scylla/cql3/ut_name.hh delete mode 100644 scylla/cql3/util.hh delete mode 100644 scylla/cql3/values.hh delete mode 100644 scylla/cql3/variable_specifications.cc delete mode 100644 scylla/cql3/variable_specifications.hh delete mode 100644 scylla/cql_serialization_format.hh delete mode 100644 scylla/database.cc delete mode 100644 scylla/database.hh delete mode 100644 scylla/database_fwd.hh delete mode 100644 scylla/db/batchlog_manager.cc delete mode 100644 scylla/db/batchlog_manager.hh delete mode 100644 scylla/db/commitlog/commitlog.cc delete mode 100644 scylla/db/commitlog/commitlog.hh delete mode 100644 scylla/db/commitlog/commitlog_entry.cc delete mode 100644 scylla/db/commitlog/commitlog_entry.hh delete mode 100644 scylla/db/commitlog/commitlog_replayer.cc delete mode 100644 scylla/db/commitlog/commitlog_replayer.hh delete mode 100644 scylla/db/commitlog/replay_position.hh delete mode 100644 scylla/db/commitlog/rp_set.hh delete mode 100644 scylla/db/consistency_level.cc delete mode 100644 scylla/db/consistency_level.hh delete mode 100644 scylla/db/consistency_level_type.hh delete mode 100644 scylla/db/cql_type_parser.cc delete mode 100644 scylla/db/cql_type_parser.hh delete mode 100644 scylla/db/heat_load_balance.cc delete mode 100644 scylla/db/heat_load_balance.hh delete mode 100644 scylla/db/index/secondary_index.cc delete mode 100644 scylla/db/index/secondary_index.hh delete mode 100644 scylla/db/legacy_schema_migrator.cc delete mode 100644 scylla/db/legacy_schema_migrator.hh delete mode 100644 scylla/db/marshal/type_parser.cc delete mode 100644 scylla/db/marshal/type_parser.hh delete mode 100644 scylla/db/query_context.hh delete mode 100644 scylla/db/read_repair_decision.hh delete mode 100644 scylla/db/schema_tables.cc delete mode 100644 scylla/db/schema_tables.hh delete mode 100644 scylla/db/size_estimates_virtual_reader.hh delete mode 100644 scylla/db/system_keyspace.cc delete mode 100644 scylla/db/system_keyspace.hh delete mode 100644 scylla/db/view/view.cc delete mode 100644 scylla/db/view/view.hh delete mode 100644 scylla/db/write_type.hh delete mode 100644 scylla/db_clock.hh delete mode 100644 scylla/debug.hh delete mode 100644 scylla/dht/boot_strapper.cc delete mode 100644 scylla/dht/boot_strapper.hh delete mode 100644 scylla/dht/byte_ordered_partitioner.cc delete mode 100644 scylla/dht/byte_ordered_partitioner.hh delete mode 100644 scylla/dht/i_partitioner.cc delete mode 100644 scylla/dht/i_partitioner.hh delete mode 100644 scylla/dht/murmur3_partitioner.hh delete mode 100644 scylla/dht/random_partitioner.cc delete mode 100644 scylla/dht/random_partitioner.hh delete mode 100644 scylla/dht/range_streamer.cc delete mode 100644 scylla/dht/range_streamer.hh delete mode 100644 scylla/dht/token_range_endpoints.hh delete mode 100644 scylla/digest_algorithm.hh delete mode 100644 scylla/disk-error-handler.cc delete mode 100644 scylla/disk-error-handler.hh delete mode 100644 scylla/enum_set.hh delete mode 100644 scylla/exceptions/exceptions.cc delete mode 100644 scylla/exceptions/exceptions.hh delete mode 100644 scylla/exceptions/unrecognized_entity_exception.hh delete mode 100755 scylla/fix_system_distributed_tables.py delete mode 100644 scylla/fnv1a_hasher.hh delete mode 100644 scylla/frozen_mutation.cc delete mode 100644 scylla/frozen_mutation.hh delete mode 100644 scylla/frozen_schema.cc delete mode 100644 scylla/frozen_schema.hh delete mode 100644 scylla/gc_clock.hh delete mode 100644 scylla/hashing.hh delete mode 100644 scylla/hashing_partition_visitor.hh delete mode 100644 scylla/idl/cache_temperature.idl.hh delete mode 100644 scylla/idl/commitlog.idl.hh delete mode 100644 scylla/idl/consistency_level.idl.hh delete mode 100644 scylla/idl/frozen_mutation.idl.hh delete mode 100644 scylla/idl/frozen_schema.idl.hh delete mode 100644 scylla/idl/mutation.idl.hh delete mode 100644 scylla/idl/paging_state.idl.hh delete mode 100644 scylla/idl/partition_checksum.idl.hh delete mode 100644 scylla/idl/query.idl.hh delete mode 100644 scylla/idl/range.idl.hh delete mode 100644 scylla/idl/read_command.idl.hh delete mode 100644 scylla/idl/reconcilable_result.idl.hh delete mode 100644 scylla/idl/replay_position.idl.hh delete mode 100644 scylla/idl/ring_position.idl.hh delete mode 100644 scylla/idl/tracing.idl.hh delete mode 100644 scylla/idl/truncation_record.idl.hh delete mode 100644 scylla/index/secondary_index_manager.cc delete mode 100644 scylla/index/secondary_index_manager.hh delete mode 100644 scylla/init.cc delete mode 100644 scylla/init.hh delete mode 100644 scylla/interface/cassandra.thrift delete mode 100644 scylla/intrusive_set_external_comparator.hh delete mode 100644 scylla/json.hh delete mode 100644 scylla/keys.cc delete mode 100644 scylla/keys.hh delete mode 100644 scylla/lister.cc delete mode 100644 scylla/lister.hh delete mode 100644 scylla/locator/abstract_replication_strategy.cc delete mode 100644 scylla/locator/abstract_replication_strategy.hh delete mode 100644 scylla/locator/ec2_multi_region_snitch.cc delete mode 100644 scylla/locator/ec2_multi_region_snitch.hh delete mode 100644 scylla/locator/ec2_snitch.cc delete mode 100644 scylla/locator/ec2_snitch.hh delete mode 100644 scylla/locator/everywhere_replication_strategy.cc delete mode 100644 scylla/locator/everywhere_replication_strategy.hh delete mode 100644 scylla/locator/gossiping_property_file_snitch.cc delete mode 100644 scylla/locator/gossiping_property_file_snitch.hh delete mode 100644 scylla/locator/local_strategy.cc delete mode 100644 scylla/locator/local_strategy.hh delete mode 100644 scylla/locator/locator.cc delete mode 100644 scylla/locator/network_topology_strategy.cc delete mode 100644 scylla/locator/network_topology_strategy.hh delete mode 100644 scylla/locator/production_snitch_base.cc delete mode 100644 scylla/locator/production_snitch_base.hh delete mode 100644 scylla/locator/rack_inferring_snitch.cc delete mode 100644 scylla/locator/rack_inferring_snitch.hh delete mode 100644 scylla/locator/reconnectable_snitch_helper.hh delete mode 100644 scylla/locator/simple_snitch.cc delete mode 100644 scylla/locator/simple_snitch.hh delete mode 100644 scylla/locator/simple_strategy.cc delete mode 100644 scylla/locator/simple_strategy.hh delete mode 100644 scylla/locator/snitch_base.cc delete mode 100644 scylla/locator/snitch_base.hh delete mode 100644 scylla/locator/token_metadata.cc delete mode 100644 scylla/locator/token_metadata.hh delete mode 100644 scylla/log.hh delete mode 100644 scylla/map_difference.hh delete mode 100644 scylla/md5_hasher.hh delete mode 100644 scylla/memtable-sstable.hh delete mode 100644 scylla/memtable.cc delete mode 100644 scylla/memtable.hh delete mode 100644 scylla/mutation.cc delete mode 100644 scylla/mutation.hh delete mode 100644 scylla/mutation_compactor.hh delete mode 100644 scylla/mutation_partition.cc delete mode 100644 scylla/mutation_partition.hh delete mode 100644 scylla/mutation_partition_applier.hh delete mode 100644 scylla/mutation_partition_serializer.cc delete mode 100644 scylla/mutation_partition_serializer.hh delete mode 100644 scylla/mutation_partition_view.cc delete mode 100644 scylla/mutation_partition_view.hh delete mode 100644 scylla/mutation_partition_visitor.hh delete mode 100644 scylla/mutation_query.cc delete mode 100644 scylla/mutation_query.hh delete mode 100644 scylla/mutation_reader.cc delete mode 100644 scylla/mutation_reader.hh delete mode 100644 scylla/noexcept_traits.hh delete mode 100644 scylla/nway_merger.hh delete mode 100644 scylla/partition_builder.hh delete mode 100644 scylla/partition_range_compat.hh delete mode 100644 scylla/partition_slice_builder.cc delete mode 100644 scylla/partition_slice_builder.hh delete mode 100644 scylla/partition_snapshot_reader.hh delete mode 100644 scylla/partition_snapshot_row_cursor.hh delete mode 100644 scylla/partition_version.cc delete mode 100644 scylla/partition_version.hh delete mode 100644 scylla/position_in_partition.hh delete mode 100644 scylla/query-request.hh delete mode 100644 scylla/query-result-reader.hh delete mode 100644 scylla/query-result-set.cc delete mode 100644 scylla/query-result-set.hh delete mode 100644 scylla/query-result-writer.hh delete mode 100644 scylla/query-result.hh delete mode 100644 scylla/query.cc delete mode 100644 scylla/query_result_merger.hh delete mode 100644 scylla/range.hh delete mode 100644 scylla/range_tombstone.cc delete mode 100644 scylla/range_tombstone.hh delete mode 100644 scylla/range_tombstone_list.cc delete mode 100644 scylla/range_tombstone_list.hh delete mode 100644 scylla/read_context.hh delete mode 100644 scylla/release.cc delete mode 100644 scylla/release.hh delete mode 100644 scylla/repair/range_split.hh delete mode 100644 scylla/repair/repair.cc delete mode 100644 scylla/repair/repair.hh delete mode 100644 scylla/reversibly_mergeable.hh delete mode 100644 scylla/row_cache.cc delete mode 100644 scylla/row_cache.hh delete mode 100644 scylla/schema.cc delete mode 100644 scylla/schema.hh delete mode 100644 scylla/schema_builder.hh delete mode 100644 scylla/schema_mutations.cc delete mode 100644 scylla/schema_mutations.hh delete mode 100644 scylla/schema_registry.cc delete mode 100644 scylla/schema_registry.hh delete mode 100644 scylla/schema_upgrader.hh delete mode 100755 scylla/scripts/git-archive-all delete mode 100755 scylla/scripts/scylla_current_repo delete mode 100755 scylla/scripts/scylla_install_pkg delete mode 100755 scylla/scripts/update-version delete mode 100755 scylla/scylla-blocktune delete mode 100755 scylla/scylla-housekeeping delete mode 100644 scylla/seastarx.hh delete mode 100644 scylla/serialization_visitors.hh delete mode 100644 scylla/serializer.hh delete mode 100644 scylla/serializer_impl.hh delete mode 100644 scylla/service/cache_hitrate_calculator.hh delete mode 100644 scylla/service/client_state.cc delete mode 100644 scylla/service/client_state.hh delete mode 100644 scylla/service/endpoint_lifecycle_subscriber.hh delete mode 100644 scylla/service/load_broadcaster.hh delete mode 100644 scylla/service/migration_listener.hh delete mode 100644 scylla/service/migration_manager.cc delete mode 100644 scylla/service/migration_manager.hh delete mode 100644 scylla/service/migration_task.cc delete mode 100644 scylla/service/migration_task.hh delete mode 100644 scylla/service/misc_services.cc delete mode 100644 scylla/service/pager/paging_state.cc delete mode 100644 scylla/service/pager/paging_state.hh delete mode 100644 scylla/service/pager/query_pager.hh delete mode 100644 scylla/service/pager/query_pagers.cc delete mode 100644 scylla/service/pager/query_pagers.hh delete mode 100644 scylla/service/priority_manager.cc delete mode 100644 scylla/service/priority_manager.hh delete mode 100644 scylla/service/query_state.hh delete mode 100644 scylla/service/storage_proxy.cc delete mode 100644 scylla/service/storage_proxy.hh delete mode 100644 scylla/service/storage_service.cc delete mode 100644 scylla/service/storage_service.hh delete mode 100644 scylla/sstable_mutation_readers.hh delete mode 100644 scylla/sstables/atomic_deletion.cc delete mode 100644 scylla/sstables/atomic_deletion.hh delete mode 100644 scylla/sstables/binary_search.hh delete mode 100644 scylla/sstables/column_name_helper.hh delete mode 100644 scylla/sstables/compaction.cc delete mode 100644 scylla/sstables/compaction.hh delete mode 100644 scylla/sstables/compaction_manager.cc delete mode 100644 scylla/sstables/compaction_manager.hh delete mode 100644 scylla/sstables/compaction_strategy.cc delete mode 100644 scylla/sstables/compress.cc delete mode 100644 scylla/sstables/compress.hh delete mode 100644 scylla/sstables/consumer.hh delete mode 100644 scylla/sstables/date_tiered_compaction_strategy.hh delete mode 100644 scylla/sstables/disk_types.hh delete mode 100644 scylla/sstables/downsampling.hh delete mode 100644 scylla/sstables/exceptions.hh delete mode 100644 scylla/sstables/filter.cc delete mode 100644 scylla/sstables/filter.hh delete mode 100644 scylla/sstables/hyperloglog.hh delete mode 100644 scylla/sstables/index_reader.hh delete mode 100644 scylla/sstables/key.hh delete mode 100644 scylla/sstables/leveled_manifest.hh delete mode 100644 scylla/sstables/metadata_collector.hh delete mode 100644 scylla/sstables/partition.cc delete mode 100644 scylla/sstables/remove.hh delete mode 100644 scylla/sstables/row.cc delete mode 100644 scylla/sstables/row.hh delete mode 100644 scylla/sstables/shared_index_lists.hh delete mode 100644 scylla/sstables/sstable_set.hh delete mode 100644 scylla/sstables/sstables.cc delete mode 100644 scylla/sstables/sstables.hh delete mode 100644 scylla/sstables/streaming_histogram.hh delete mode 100644 scylla/sstables/types.hh delete mode 100644 scylla/sstables/writer.hh delete mode 100644 scylla/stdx.hh delete mode 100644 scylla/streamed_mutation.cc delete mode 100644 scylla/streamed_mutation.hh delete mode 100644 scylla/streaming/prepare_message.hh delete mode 100644 scylla/streaming/progress_info.cc delete mode 100644 scylla/streaming/progress_info.hh delete mode 100644 scylla/streaming/session_info.cc delete mode 100644 scylla/streaming/session_info.hh delete mode 100644 scylla/streaming/stream_coordinator.cc delete mode 100644 scylla/streaming/stream_coordinator.hh delete mode 100644 scylla/streaming/stream_detail.hh delete mode 100644 scylla/streaming/stream_event.hh delete mode 100644 scylla/streaming/stream_event_handler.hh delete mode 100644 scylla/streaming/stream_exception.hh delete mode 100644 scylla/streaming/stream_manager.cc delete mode 100644 scylla/streaming/stream_manager.hh delete mode 100644 scylla/streaming/stream_plan.cc delete mode 100644 scylla/streaming/stream_plan.hh delete mode 100644 scylla/streaming/stream_receive_task.cc delete mode 100644 scylla/streaming/stream_receive_task.hh delete mode 100644 scylla/streaming/stream_request.cc delete mode 100644 scylla/streaming/stream_request.hh delete mode 100644 scylla/streaming/stream_result_future.cc delete mode 100644 scylla/streaming/stream_result_future.hh delete mode 100644 scylla/streaming/stream_session.cc delete mode 100644 scylla/streaming/stream_session.hh delete mode 100644 scylla/streaming/stream_session_state.cc delete mode 100644 scylla/streaming/stream_session_state.hh delete mode 100644 scylla/streaming/stream_state.hh delete mode 100644 scylla/streaming/stream_summary.cc delete mode 100644 scylla/streaming/stream_summary.hh delete mode 100644 scylla/streaming/stream_task.cc delete mode 100644 scylla/streaming/stream_task.hh delete mode 100644 scylla/streaming/stream_transfer_task.cc delete mode 100644 scylla/streaming/stream_transfer_task.hh delete mode 100644 scylla/supervisor.cc delete mode 100644 scylla/supervisor.hh delete mode 100644 scylla/swagger-ui/.dockerignore delete mode 100644 scylla/swagger-ui/.gitattributes delete mode 100644 scylla/swagger-ui/.gitignore delete mode 100644 scylla/swagger-ui/.jshintignore delete mode 100644 scylla/swagger-ui/.jshintrc delete mode 100644 scylla/swagger-ui/.npmignore delete mode 100644 scylla/swagger-ui/.travis.yml delete mode 100644 scylla/swagger-ui/CONTRIBUTING.md delete mode 100644 scylla/swagger-ui/Dockerfile delete mode 100644 scylla/swagger-ui/LICENSE delete mode 100644 scylla/swagger-ui/README.md delete mode 100644 scylla/swagger-ui/bower.json delete mode 100644 scylla/swagger-ui/dist/css/print.css delete mode 100644 scylla/swagger-ui/dist/css/reset.css delete mode 100644 scylla/swagger-ui/dist/css/screen.css delete mode 100644 scylla/swagger-ui/dist/css/style.css delete mode 100644 scylla/swagger-ui/dist/css/typography.css delete mode 100644 scylla/swagger-ui/dist/fonts/droid-sans-v6-latin-700.eot delete mode 100644 scylla/swagger-ui/dist/fonts/droid-sans-v6-latin-700.svg delete mode 100644 scylla/swagger-ui/dist/fonts/droid-sans-v6-latin-700.ttf delete mode 100644 scylla/swagger-ui/dist/fonts/droid-sans-v6-latin-700.woff delete mode 100644 scylla/swagger-ui/dist/fonts/droid-sans-v6-latin-700.woff2 delete mode 100644 scylla/swagger-ui/dist/fonts/droid-sans-v6-latin-regular.eot delete mode 100644 scylla/swagger-ui/dist/fonts/droid-sans-v6-latin-regular.svg delete mode 100644 scylla/swagger-ui/dist/fonts/droid-sans-v6-latin-regular.ttf delete mode 100644 scylla/swagger-ui/dist/fonts/droid-sans-v6-latin-regular.woff delete mode 100644 scylla/swagger-ui/dist/fonts/droid-sans-v6-latin-regular.woff2 delete mode 100644 scylla/swagger-ui/dist/images/explorer_icons.png delete mode 100755 scylla/swagger-ui/dist/images/favicon-16x16.png delete mode 100755 scylla/swagger-ui/dist/images/favicon-32x32.png delete mode 100755 scylla/swagger-ui/dist/images/favicon.ico delete mode 100644 scylla/swagger-ui/dist/images/logo_small.png delete mode 100644 scylla/swagger-ui/dist/images/pet_store_api.png delete mode 100644 scylla/swagger-ui/dist/images/throbber.gif delete mode 100644 scylla/swagger-ui/dist/images/wordnik_api.png delete mode 100644 scylla/swagger-ui/dist/index.html delete mode 100644 scylla/swagger-ui/dist/lib/backbone-min.js delete mode 100644 scylla/swagger-ui/dist/lib/handlebars-2.0.0.js delete mode 100644 scylla/swagger-ui/dist/lib/highlight.7.3.pack.js delete mode 100644 scylla/swagger-ui/dist/lib/jquery-1.8.0.min.js delete mode 100644 scylla/swagger-ui/dist/lib/jquery.ba-bbq.min.js delete mode 100644 scylla/swagger-ui/dist/lib/jquery.slideto.min.js delete mode 100644 scylla/swagger-ui/dist/lib/jquery.wiggle.min.js delete mode 100644 scylla/swagger-ui/dist/lib/marked.js delete mode 100644 scylla/swagger-ui/dist/lib/swagger-oauth.js delete mode 100644 scylla/swagger-ui/dist/lib/underscore-min.js delete mode 100644 scylla/swagger-ui/dist/lib/underscore-min.map delete mode 100644 scylla/swagger-ui/dist/o2c.html delete mode 100644 scylla/swagger-ui/dist/swagger-ui.js delete mode 100644 scylla/swagger-ui/dist/swagger-ui.min.js delete mode 100644 scylla/swagger-ui/gulpfile.js delete mode 100644 scylla/swagger-ui/index.js delete mode 100644 scylla/swagger-ui/lang/en.js delete mode 100644 scylla/swagger-ui/lang/ru.js delete mode 100644 scylla/swagger-ui/lang/translator.js delete mode 100644 scylla/swagger-ui/lib/backbone-min.js delete mode 100644 scylla/swagger-ui/lib/handlebars-2.0.0.js delete mode 100644 scylla/swagger-ui/lib/highlight.7.3.pack.js delete mode 100644 scylla/swagger-ui/lib/jquery-1.8.0.min.js delete mode 100644 scylla/swagger-ui/lib/jquery.ba-bbq.min.js delete mode 100644 scylla/swagger-ui/lib/jquery.slideto.min.js delete mode 100644 scylla/swagger-ui/lib/jquery.wiggle.min.js delete mode 100644 scylla/swagger-ui/lib/marked.js delete mode 100644 scylla/swagger-ui/lib/swagger-oauth.js delete mode 100644 scylla/swagger-ui/lib/underscore-min.js delete mode 100644 scylla/swagger-ui/lib/underscore-min.map delete mode 100644 scylla/swagger-ui/package.json delete mode 100644 scylla/swagger-ui/src/main/html/css/print.css delete mode 100644 scylla/swagger-ui/src/main/html/css/reset.css delete mode 100644 scylla/swagger-ui/src/main/html/css/screen.css delete mode 100644 scylla/swagger-ui/src/main/html/css/style.css delete mode 100644 scylla/swagger-ui/src/main/html/css/typography.css delete mode 100644 scylla/swagger-ui/src/main/html/fonts/droid-sans-v6-latin-700.eot delete mode 100644 scylla/swagger-ui/src/main/html/fonts/droid-sans-v6-latin-700.svg delete mode 100644 scylla/swagger-ui/src/main/html/fonts/droid-sans-v6-latin-700.ttf delete mode 100644 scylla/swagger-ui/src/main/html/fonts/droid-sans-v6-latin-700.woff delete mode 100644 scylla/swagger-ui/src/main/html/fonts/droid-sans-v6-latin-700.woff2 delete mode 100644 scylla/swagger-ui/src/main/html/fonts/droid-sans-v6-latin-regular.eot delete mode 100644 scylla/swagger-ui/src/main/html/fonts/droid-sans-v6-latin-regular.svg delete mode 100644 scylla/swagger-ui/src/main/html/fonts/droid-sans-v6-latin-regular.ttf delete mode 100644 scylla/swagger-ui/src/main/html/fonts/droid-sans-v6-latin-regular.woff delete mode 100644 scylla/swagger-ui/src/main/html/fonts/droid-sans-v6-latin-regular.woff2 delete mode 100644 scylla/swagger-ui/src/main/html/images/explorer_icons.png delete mode 100755 scylla/swagger-ui/src/main/html/images/favicon-16x16.png delete mode 100755 scylla/swagger-ui/src/main/html/images/favicon-32x32.png delete mode 100755 scylla/swagger-ui/src/main/html/images/favicon.ico delete mode 100644 scylla/swagger-ui/src/main/html/images/logo_small.png delete mode 100644 scylla/swagger-ui/src/main/html/images/pet_store_api.png delete mode 100644 scylla/swagger-ui/src/main/html/images/throbber.gif delete mode 100644 scylla/swagger-ui/src/main/html/images/wordnik_api.png delete mode 100644 scylla/swagger-ui/src/main/html/index.html delete mode 100644 scylla/swagger-ui/src/main/html/o2c.html delete mode 100644 scylla/swagger-ui/src/main/javascript/SwaggerUi.js delete mode 100644 scylla/swagger-ui/src/main/javascript/doc.js delete mode 100644 scylla/swagger-ui/src/main/javascript/helpers/handlebars.js delete mode 100644 scylla/swagger-ui/src/main/javascript/view/ApiKeyButton.js delete mode 100644 scylla/swagger-ui/src/main/javascript/view/BasicAuthButton.js delete mode 100644 scylla/swagger-ui/src/main/javascript/view/ContentTypeView.js delete mode 100644 scylla/swagger-ui/src/main/javascript/view/HeaderView.js delete mode 100644 scylla/swagger-ui/src/main/javascript/view/MainView.js delete mode 100644 scylla/swagger-ui/src/main/javascript/view/OperationView.js delete mode 100644 scylla/swagger-ui/src/main/javascript/view/ParameterContentTypeView.js delete mode 100644 scylla/swagger-ui/src/main/javascript/view/ParameterView.js delete mode 100644 scylla/swagger-ui/src/main/javascript/view/ResourceView.js delete mode 100644 scylla/swagger-ui/src/main/javascript/view/ResponseContentTypeView.js delete mode 100644 scylla/swagger-ui/src/main/javascript/view/SignatureView.js delete mode 100644 scylla/swagger-ui/src/main/javascript/view/StatusCodeView.js delete mode 100644 scylla/swagger-ui/src/main/less/auth.less delete mode 100644 scylla/swagger-ui/src/main/less/highlight_default.less delete mode 100644 scylla/swagger-ui/src/main/less/print.less delete mode 100644 scylla/swagger-ui/src/main/less/reset.less delete mode 100644 scylla/swagger-ui/src/main/less/screen.less delete mode 100644 scylla/swagger-ui/src/main/less/specs.less delete mode 100644 scylla/swagger-ui/src/main/less/style.less delete mode 100644 scylla/swagger-ui/src/main/template/apikey_button_view.handlebars delete mode 100644 scylla/swagger-ui/src/main/template/basic_auth_button_view.handlebars delete mode 100644 scylla/swagger-ui/src/main/template/content_type.handlebars delete mode 100644 scylla/swagger-ui/src/main/template/main.handlebars delete mode 100644 scylla/swagger-ui/src/main/template/operation.handlebars delete mode 100644 scylla/swagger-ui/src/main/template/param.handlebars delete mode 100644 scylla/swagger-ui/src/main/template/param_list.handlebars delete mode 100644 scylla/swagger-ui/src/main/template/param_readonly.handlebars delete mode 100644 scylla/swagger-ui/src/main/template/param_readonly_required.handlebars delete mode 100644 scylla/swagger-ui/src/main/template/param_required.handlebars delete mode 100644 scylla/swagger-ui/src/main/template/parameter_content_type.handlebars delete mode 100644 scylla/swagger-ui/src/main/template/resource.handlebars delete mode 100644 scylla/swagger-ui/src/main/template/response_content_type.handlebars delete mode 100644 scylla/swagger-ui/src/main/template/signature.handlebars delete mode 100644 scylla/swagger-ui/src/main/template/status_code.handlebars delete mode 100644 scylla/swagger-ui/test/.jshintrc delete mode 100644 scylla/swagger-ui/test/e2e/driver.js delete mode 100644 scylla/swagger-ui/test/e2e/servers.js delete mode 100644 scylla/swagger-ui/test/e2e/v1.js delete mode 100644 scylla/swagger-ui/test/e2e/v2.js delete mode 100644 scylla/swagger-ui/test/mocha.opts delete mode 100644 scylla/swagger-ui/test/specs/v1.2/petstore/api-docs.json delete mode 100644 scylla/swagger-ui/test/specs/v1.2/petstore/pet.json delete mode 100644 scylla/swagger-ui/test/specs/v1.2/petstore/store.json delete mode 100644 scylla/swagger-ui/test/specs/v1.2/petstore/user.json delete mode 100644 scylla/swagger-ui/test/specs/v2/petstore.json delete mode 100644 scylla/tests/UUID_test.cc delete mode 100644 scylla/tests/allocation_strategy_test.cc delete mode 100644 scylla/tests/anchorless_list_test.cc delete mode 100644 scylla/tests/auth_test.cc delete mode 100644 scylla/tests/batchlog_manager_test.cc delete mode 100644 scylla/tests/bytes_ostream_test.cc delete mode 100644 scylla/tests/cache_streamed_mutation_test.cc delete mode 100644 scylla/tests/canonical_mutation_test.cc delete mode 100644 scylla/tests/cartesian_product_test.cc delete mode 100644 scylla/tests/cell_locker_test.cc delete mode 100644 scylla/tests/commitlog_test.cc delete mode 100644 scylla/tests/compound_test.cc delete mode 100644 scylla/tests/config_test.cc delete mode 100644 scylla/tests/counter_test.cc delete mode 100644 scylla/tests/cql_assertions.cc delete mode 100644 scylla/tests/cql_assertions.hh delete mode 100644 scylla/tests/cql_query_test.cc delete mode 100644 scylla/tests/cql_test_env.cc delete mode 100644 scylla/tests/cql_test_env.hh delete mode 100644 scylla/tests/crc_test.cc delete mode 100644 scylla/tests/database_test.cc delete mode 100644 scylla/tests/dynamic_bitset_test.cc delete mode 100644 scylla/tests/ec2_snitch_test.cc delete mode 100644 scylla/tests/flush_queue_test.cc delete mode 100644 scylla/tests/frozen_mutation_test.cc delete mode 100644 scylla/tests/gossip.cc delete mode 100644 scylla/tests/gossip_test.cc delete mode 100644 scylla/tests/gossiping_property_file_snitch_test.cc delete mode 100644 scylla/tests/hash_test.cc delete mode 100644 scylla/tests/idl_test.cc delete mode 100644 scylla/tests/input_stream_test.cc delete mode 100644 scylla/tests/keys_test.cc delete mode 100644 scylla/tests/log_histogram_test.cc delete mode 100644 scylla/tests/logalloc_test.cc delete mode 100644 scylla/tests/lsa_async_eviction_test.cc delete mode 100644 scylla/tests/lsa_sync_eviction_test.cc delete mode 100644 scylla/tests/make_random_string.hh delete mode 100644 scylla/tests/managed_vector_test.cc delete mode 100644 scylla/tests/map_difference_test.cc delete mode 100644 scylla/tests/memory_footprint.cc delete mode 100644 scylla/tests/memtable_snapshot_source.hh delete mode 100644 scylla/tests/memtable_test.cc delete mode 100644 scylla/tests/message.cc delete mode 100644 scylla/tests/murmur_hash_test.cc delete mode 100644 scylla/tests/mutation_assertions.hh delete mode 100644 scylla/tests/mutation_query_test.cc delete mode 100644 scylla/tests/mutation_reader_assertions.hh delete mode 100644 scylla/tests/mutation_reader_test.cc delete mode 100644 scylla/tests/mutation_source_test.cc delete mode 100644 scylla/tests/mutation_source_test.hh delete mode 100644 scylla/tests/mutation_test.cc delete mode 100644 scylla/tests/network_topology_strategy_test.cc delete mode 100644 scylla/tests/nonwrapping_range_test.cc delete mode 100644 scylla/tests/partitioner_test.cc delete mode 100644 scylla/tests/perf/perf.hh delete mode 100644 scylla/tests/perf/perf_cql_parser.cc delete mode 100644 scylla/tests/perf/perf_fast_forward.cc delete mode 100644 scylla/tests/perf/perf_hash.cc delete mode 100644 scylla/tests/perf/perf_mutation.cc delete mode 100644 scylla/tests/perf/perf_simple_query.cc delete mode 100644 scylla/tests/perf/perf_sstable.cc delete mode 100644 scylla/tests/perf/perf_sstable.hh delete mode 100644 scylla/tests/perf_row_cache_update.cc delete mode 100644 scylla/tests/query_processor_test.cc delete mode 100644 scylla/tests/range_assert.hh delete mode 100644 scylla/tests/range_test.cc delete mode 100644 scylla/tests/range_tombstone_list_test.cc delete mode 100644 scylla/tests/result_set_assertions.cc delete mode 100644 scylla/tests/result_set_assertions.hh delete mode 100644 scylla/tests/row_cache_alloc_stress.cc delete mode 100644 scylla/tests/row_cache_stress_test.cc delete mode 100644 scylla/tests/row_cache_test.cc delete mode 100644 scylla/tests/schema_change_test.cc delete mode 100644 scylla/tests/schema_registry_test.cc delete mode 100644 scylla/tests/simple_schema.hh delete mode 100644 scylla/tests/snitch_property_files/bad_double_dc.property delete mode 100644 scylla/tests/snitch_property_files/bad_double_prefer_local.property delete mode 100644 scylla/tests/snitch_property_files/bad_double_rack.property delete mode 100644 scylla/tests/snitch_property_files/bad_format_1.property delete mode 100644 scylla/tests/snitch_property_files/bad_format_2.property delete mode 100644 scylla/tests/snitch_property_files/bad_format_3.property delete mode 100644 scylla/tests/snitch_property_files/bad_format_4.property delete mode 100644 scylla/tests/snitch_property_files/bad_format_5.property delete mode 100644 scylla/tests/snitch_property_files/bad_format_6.property delete mode 100644 scylla/tests/snitch_property_files/bad_missing_dc.property delete mode 100644 scylla/tests/snitch_property_files/bad_missing_rack.property delete mode 100644 scylla/tests/snitch_property_files/good_1.property delete mode 100644 scylla/tests/snitch_property_files/good_2.property delete mode 100644 scylla/tests/snitch_property_files/good_missing_prefer_local.property delete mode 100644 scylla/tests/snitch_reset_test.cc delete mode 100644 scylla/tests/sstable_assertions.hh delete mode 100644 scylla/tests/sstable_atomic_deletion_test.cc delete mode 100644 scylla/tests/sstable_datafile_test.cc delete mode 100644 scylla/tests/sstable_mutation_test.cc delete mode 100644 scylla/tests/sstable_resharding_test.cc delete mode 100644 scylla/tests/sstable_test.cc delete mode 100644 scylla/tests/sstable_test.hh delete mode 100644 scylla/tests/sstables/badcompression/C delete mode 100644 scylla/tests/sstables/badcompression/la-1-big-CompressionInfo.db delete mode 100644 scylla/tests/sstables/badcompression/la-1-big-TOC.txt delete mode 100644 scylla/tests/sstables/badcompression/la-2-big-CompressionInfo.db delete mode 100644 scylla/tests/sstables/badcompression/la-2-big-TOC.txt delete mode 100644 scylla/tests/sstables/badtoc/ka-4-big-TOC.txt delete mode 100644 scylla/tests/sstables/badtoc/la-1-big-TOC.txt delete mode 100644 scylla/tests/sstables/badtoc/la-2-big-TOC.txt delete mode 100644 scylla/tests/sstables/badtoc/la-3-big-TOC.txt delete mode 100644 scylla/tests/sstables/bigsummary/la-76-big-CompressionInfo.db delete mode 100644 scylla/tests/sstables/bigsummary/la-76-big-Data.db delete mode 100644 scylla/tests/sstables/bigsummary/la-76-big-Digest.sha1 delete mode 100644 scylla/tests/sstables/bigsummary/la-76-big-Filter.db delete mode 100644 scylla/tests/sstables/bigsummary/la-76-big-Index.db delete mode 100644 scylla/tests/sstables/bigsummary/la-76-big-Statistics.db delete mode 100644 scylla/tests/sstables/bigsummary/la-76-big-Summary.db delete mode 100644 scylla/tests/sstables/bigsummary/la-76-big-TOC.txt delete mode 100755 scylla/tests/sstables/broken_ranges/la-2-big-CRC.db delete mode 100755 scylla/tests/sstables/broken_ranges/la-2-big-Data.db delete mode 100755 scylla/tests/sstables/broken_ranges/la-2-big-Digest.sha1 delete mode 100755 scylla/tests/sstables/broken_ranges/la-2-big-Filter.db delete mode 100755 scylla/tests/sstables/broken_ranges/la-2-big-Index.db delete mode 100755 scylla/tests/sstables/broken_ranges/la-2-big-Statistics.db delete mode 100755 scylla/tests/sstables/broken_ranges/la-2-big-Summary.db delete mode 100755 scylla/tests/sstables/broken_ranges/la-2-big-TOC.txt delete mode 100644 scylla/tests/sstables/compact_dense/la-1-big-CRC.db delete mode 100644 scylla/tests/sstables/compact_dense/la-1-big-Data.db delete mode 100644 scylla/tests/sstables/compact_dense/la-1-big-Digest.sha1 delete mode 100644 scylla/tests/sstables/compact_dense/la-1-big-Filter.db delete mode 100644 scylla/tests/sstables/compact_dense/la-1-big-Index.db delete mode 100644 scylla/tests/sstables/compact_dense/la-1-big-Statistics.db delete mode 100644 scylla/tests/sstables/compact_dense/la-1-big-Summary.db delete mode 100644 scylla/tests/sstables/compact_dense/la-1-big-TOC.txt delete mode 100644 scylla/tests/sstables/compact_simple_dense/la-1-big-CRC.db delete mode 100644 scylla/tests/sstables/compact_simple_dense/la-1-big-Data.db delete mode 100644 scylla/tests/sstables/compact_simple_dense/la-1-big-Digest.sha1 delete mode 100644 scylla/tests/sstables/compact_simple_dense/la-1-big-Filter.db delete mode 100644 scylla/tests/sstables/compact_simple_dense/la-1-big-Index.db delete mode 100644 scylla/tests/sstables/compact_simple_dense/la-1-big-Statistics.db delete mode 100644 scylla/tests/sstables/compact_simple_dense/la-1-big-Summary.db delete mode 100644 scylla/tests/sstables/compact_simple_dense/la-1-big-TOC.txt delete mode 100644 scylla/tests/sstables/compact_sparse/la-1-big-CRC.db delete mode 100644 scylla/tests/sstables/compact_sparse/la-1-big-Data.db delete mode 100644 scylla/tests/sstables/compact_sparse/la-1-big-Digest.sha1 delete mode 100644 scylla/tests/sstables/compact_sparse/la-1-big-Filter.db delete mode 100644 scylla/tests/sstables/compact_sparse/la-1-big-Index.db delete mode 100644 scylla/tests/sstables/compact_sparse/la-1-big-Statistics.db delete mode 100644 scylla/tests/sstables/compact_sparse/la-1-big-Summary.db delete mode 100644 scylla/tests/sstables/compact_sparse/la-1-big-TOC.txt delete mode 100644 scylla/tests/sstables/compaction/la-1-big-CompressionInfo.db delete mode 100644 scylla/tests/sstables/compaction/la-1-big-Data.db delete mode 100644 scylla/tests/sstables/compaction/la-1-big-Digest.sha1 delete mode 100644 scylla/tests/sstables/compaction/la-1-big-Filter.db delete mode 100644 scylla/tests/sstables/compaction/la-1-big-Index.db delete mode 100644 scylla/tests/sstables/compaction/la-1-big-Statistics.db delete mode 100644 scylla/tests/sstables/compaction/la-1-big-Summary.db delete mode 100644 scylla/tests/sstables/compaction/la-1-big-TOC.txt delete mode 100644 scylla/tests/sstables/compaction/la-2-big-CompressionInfo.db delete mode 100644 scylla/tests/sstables/compaction/la-2-big-Data.db delete mode 100644 scylla/tests/sstables/compaction/la-2-big-Digest.sha1 delete mode 100644 scylla/tests/sstables/compaction/la-2-big-Filter.db delete mode 100644 scylla/tests/sstables/compaction/la-2-big-Index.db delete mode 100644 scylla/tests/sstables/compaction/la-2-big-Statistics.db delete mode 100644 scylla/tests/sstables/compaction/la-2-big-Summary.db delete mode 100644 scylla/tests/sstables/compaction/la-2-big-TOC.txt delete mode 100644 scylla/tests/sstables/compaction/la-3-big-CompressionInfo.db delete mode 100644 scylla/tests/sstables/compaction/la-3-big-Data.db delete mode 100644 scylla/tests/sstables/compaction/la-3-big-Digest.sha1 delete mode 100644 scylla/tests/sstables/compaction/la-3-big-Filter.db delete mode 100644 scylla/tests/sstables/compaction/la-3-big-Index.db delete mode 100644 scylla/tests/sstables/compaction/la-3-big-Statistics.db delete mode 100644 scylla/tests/sstables/compaction/la-3-big-Summary.db delete mode 100644 scylla/tests/sstables/compaction/la-3-big-TOC.txt delete mode 100644 scylla/tests/sstables/complex/la-1-big-CRC.db delete mode 100644 scylla/tests/sstables/complex/la-1-big-Data.db delete mode 100644 scylla/tests/sstables/complex/la-1-big-Digest.sha1 delete mode 100644 scylla/tests/sstables/complex/la-1-big-Filter.db delete mode 100644 scylla/tests/sstables/complex/la-1-big-Index.db delete mode 100644 scylla/tests/sstables/complex/la-1-big-Statistics.db delete mode 100644 scylla/tests/sstables/complex/la-1-big-Summary.db delete mode 100644 scylla/tests/sstables/complex/la-1-big-TOC.txt delete mode 100644 scylla/tests/sstables/complex/la-2-big-CRC.db delete mode 100644 scylla/tests/sstables/complex/la-2-big-Data.db delete mode 100644 scylla/tests/sstables/complex/la-2-big-Digest.sha1 delete mode 100644 scylla/tests/sstables/complex/la-2-big-Filter.db delete mode 100644 scylla/tests/sstables/complex/la-2-big-Index.db delete mode 100644 scylla/tests/sstables/complex/la-2-big-Statistics.db delete mode 100644 scylla/tests/sstables/complex/la-2-big-Summary.db delete mode 100644 scylla/tests/sstables/complex/la-2-big-TOC.txt delete mode 100644 scylla/tests/sstables/complex/la-3-big-CRC.db delete mode 100644 scylla/tests/sstables/complex/la-3-big-Data.db delete mode 100644 scylla/tests/sstables/complex/la-3-big-Digest.sha1 delete mode 100644 scylla/tests/sstables/complex/la-3-big-Filter.db delete mode 100644 scylla/tests/sstables/complex/la-3-big-Index.db delete mode 100644 scylla/tests/sstables/complex/la-3-big-Statistics.db delete mode 100644 scylla/tests/sstables/complex/la-3-big-Summary.db delete mode 100644 scylla/tests/sstables/complex/la-3-big-TOC.txt delete mode 100644 scylla/tests/sstables/composite/la-1-big-CompressionInfo.db delete mode 100644 scylla/tests/sstables/composite/la-1-big-Data.db delete mode 100644 scylla/tests/sstables/composite/la-1-big-Digest.sha1 delete mode 100644 scylla/tests/sstables/composite/la-1-big-Filter.db delete mode 100644 scylla/tests/sstables/composite/la-1-big-Index.db delete mode 100644 scylla/tests/sstables/composite/la-1-big-Statistics.db delete mode 100644 scylla/tests/sstables/composite/la-1-big-Summary.db delete mode 100644 scylla/tests/sstables/composite/la-1-big-TOC.txt delete mode 100644 scylla/tests/sstables/compressed/la-1-big-CompressionInfo.db delete mode 100644 scylla/tests/sstables/compressed/la-1-big-Data.db delete mode 100644 scylla/tests/sstables/compressed/la-1-big-Digest.sha1 delete mode 100644 scylla/tests/sstables/compressed/la-1-big-Filter.db delete mode 100644 scylla/tests/sstables/compressed/la-1-big-Index.db delete mode 100644 scylla/tests/sstables/compressed/la-1-big-Statistics.db delete mode 100644 scylla/tests/sstables/compressed/la-1-big-Summary.db delete mode 100644 scylla/tests/sstables/compressed/la-1-big-TOC.txt delete mode 100644 scylla/tests/sstables/counter_test/ks-counter_test-ka-5-CompressionInfo.db delete mode 100644 scylla/tests/sstables/counter_test/ks-counter_test-ka-5-Data.db delete mode 100644 scylla/tests/sstables/counter_test/ks-counter_test-ka-5-Digest.sha1 delete mode 100644 scylla/tests/sstables/counter_test/ks-counter_test-ka-5-Filter.db delete mode 100644 scylla/tests/sstables/counter_test/ks-counter_test-ka-5-Index.db delete mode 100644 scylla/tests/sstables/counter_test/ks-counter_test-ka-5-Statistics.db delete mode 100644 scylla/tests/sstables/counter_test/ks-counter_test-ka-5-Summary.db delete mode 100644 scylla/tests/sstables/counter_test/ks-counter_test-ka-5-TOC.txt delete mode 100644 scylla/tests/sstables/deleted_cell/la-2-big-CompressionInfo.db delete mode 100644 scylla/tests/sstables/deleted_cell/la-2-big-Data.db delete mode 100644 scylla/tests/sstables/deleted_cell/la-2-big-Digest.sha1 delete mode 100644 scylla/tests/sstables/deleted_cell/la-2-big-Filter.db delete mode 100644 scylla/tests/sstables/deleted_cell/la-2-big-Index.db delete mode 100644 scylla/tests/sstables/deleted_cell/la-2-big-Statistics.db delete mode 100644 scylla/tests/sstables/deleted_cell/la-2-big-Summary.db delete mode 100644 scylla/tests/sstables/deleted_cell/la-2-big-TOC.txt delete mode 100644 scylla/tests/sstables/large_partition/try1-data-ka-3-CompressionInfo.db delete mode 100644 scylla/tests/sstables/large_partition/try1-data-ka-3-Data.db delete mode 100644 scylla/tests/sstables/large_partition/try1-data-ka-3-Digest.sha1 delete mode 100644 scylla/tests/sstables/large_partition/try1-data-ka-3-Filter.db delete mode 100644 scylla/tests/sstables/large_partition/try1-data-ka-3-Index.db delete mode 100644 scylla/tests/sstables/large_partition/try1-data-ka-3-Statistics.db delete mode 100644 scylla/tests/sstables/large_partition/try1-data-ka-3-Summary.db delete mode 100644 scylla/tests/sstables/large_partition/try1-data-ka-3-TOC.txt delete mode 100644 scylla/tests/sstables/list_pk/la-1-big-CRC.db delete mode 100644 scylla/tests/sstables/list_pk/la-1-big-Data.db delete mode 100644 scylla/tests/sstables/list_pk/la-1-big-Digest.sha1 delete mode 100644 scylla/tests/sstables/list_pk/la-1-big-Filter.db delete mode 100644 scylla/tests/sstables/list_pk/la-1-big-Index.db delete mode 100644 scylla/tests/sstables/list_pk/la-1-big-Statistics.db delete mode 100644 scylla/tests/sstables/list_pk/la-1-big-Summary.db delete mode 100644 scylla/tests/sstables/list_pk/la-1-big-TOC.txt delete mode 100644 scylla/tests/sstables/map_pk/la-1-big-CRC.db delete mode 100644 scylla/tests/sstables/map_pk/la-1-big-Data.db delete mode 100644 scylla/tests/sstables/map_pk/la-1-big-Digest.sha1 delete mode 100644 scylla/tests/sstables/map_pk/la-1-big-Filter.db delete mode 100644 scylla/tests/sstables/map_pk/la-1-big-Index.db delete mode 100644 scylla/tests/sstables/map_pk/la-1-big-Statistics.db delete mode 100644 scylla/tests/sstables/map_pk/la-1-big-Summary.db delete mode 100644 scylla/tests/sstables/map_pk/la-1-big-TOC.txt delete mode 100644 scylla/tests/sstables/multi_schema_test/test-test_multi_schema-ka-1-CompressionInfo.db delete mode 100644 scylla/tests/sstables/multi_schema_test/test-test_multi_schema-ka-1-Data.db delete mode 100644 scylla/tests/sstables/multi_schema_test/test-test_multi_schema-ka-1-Digest.sha1 delete mode 100644 scylla/tests/sstables/multi_schema_test/test-test_multi_schema-ka-1-Filter.db delete mode 100644 scylla/tests/sstables/multi_schema_test/test-test_multi_schema-ka-1-Index.db delete mode 100644 scylla/tests/sstables/multi_schema_test/test-test_multi_schema-ka-1-Statistics.db delete mode 100644 scylla/tests/sstables/multi_schema_test/test-test_multi_schema-ka-1-Summary.db delete mode 100644 scylla/tests/sstables/multi_schema_test/test-test_multi_schema-ka-1-TOC.txt delete mode 100644 scylla/tests/sstables/partition_skipping/ks-test_skipping_partitions-ka-1-CompressionInfo.db delete mode 100644 scylla/tests/sstables/partition_skipping/ks-test_skipping_partitions-ka-1-Data.db delete mode 100644 scylla/tests/sstables/partition_skipping/ks-test_skipping_partitions-ka-1-Digest.sha1 delete mode 100644 scylla/tests/sstables/partition_skipping/ks-test_skipping_partitions-ka-1-Filter.db delete mode 100644 scylla/tests/sstables/partition_skipping/ks-test_skipping_partitions-ka-1-Index.db delete mode 100644 scylla/tests/sstables/partition_skipping/ks-test_skipping_partitions-ka-1-Statistics.db delete mode 100644 scylla/tests/sstables/partition_skipping/ks-test_skipping_partitions-ka-1-Summary.db delete mode 100644 scylla/tests/sstables/partition_skipping/ks-test_skipping_partitions-ka-1-TOC.txt delete mode 100644 scylla/tests/sstables/promoted_index_read/ks-promoted_index_read-ka-1-CompressionInfo.db delete mode 100644 scylla/tests/sstables/promoted_index_read/ks-promoted_index_read-ka-1-Data.db delete mode 100644 scylla/tests/sstables/promoted_index_read/ks-promoted_index_read-ka-1-Digest.sha1 delete mode 100644 scylla/tests/sstables/promoted_index_read/ks-promoted_index_read-ka-1-Filter.db delete mode 100644 scylla/tests/sstables/promoted_index_read/ks-promoted_index_read-ka-1-Index.db delete mode 100644 scylla/tests/sstables/promoted_index_read/ks-promoted_index_read-ka-1-Statistics.db delete mode 100644 scylla/tests/sstables/promoted_index_read/ks-promoted_index_read-ka-1-Summary.db delete mode 100644 scylla/tests/sstables/promoted_index_read/ks-promoted_index_read-ka-1-TOC.txt delete mode 100644 scylla/tests/sstables/set/la-1-big-CompressionInfo.db delete mode 100644 scylla/tests/sstables/set/la-1-big-Data.db delete mode 100644 scylla/tests/sstables/set/la-1-big-Digest.sha1 delete mode 100644 scylla/tests/sstables/set/la-1-big-Filter.db delete mode 100644 scylla/tests/sstables/set/la-1-big-Index.db delete mode 100644 scylla/tests/sstables/set/la-1-big-Statistics.db delete mode 100644 scylla/tests/sstables/set/la-1-big-Summary.db delete mode 100644 scylla/tests/sstables/set/la-1-big-TOC.txt delete mode 100644 scylla/tests/sstables/set_pk/la-1-big-CRC.db delete mode 100644 scylla/tests/sstables/set_pk/la-1-big-Data.db delete mode 100644 scylla/tests/sstables/set_pk/la-1-big-Digest.sha1 delete mode 100644 scylla/tests/sstables/set_pk/la-1-big-Filter.db delete mode 100644 scylla/tests/sstables/set_pk/la-1-big-Index.db delete mode 100644 scylla/tests/sstables/set_pk/la-1-big-Statistics.db delete mode 100644 scylla/tests/sstables/set_pk/la-1-big-Summary.db delete mode 100644 scylla/tests/sstables/set_pk/la-1-big-TOC.txt delete mode 100644 scylla/tests/sstables/sliced_mutation_reads/ks-sliced_mutation_reads_test-ka-1-CompressionInfo.db delete mode 100644 scylla/tests/sstables/sliced_mutation_reads/ks-sliced_mutation_reads_test-ka-1-Data.db delete mode 100644 scylla/tests/sstables/sliced_mutation_reads/ks-sliced_mutation_reads_test-ka-1-Digest.sha1 delete mode 100644 scylla/tests/sstables/sliced_mutation_reads/ks-sliced_mutation_reads_test-ka-1-Filter.db delete mode 100644 scylla/tests/sstables/sliced_mutation_reads/ks-sliced_mutation_reads_test-ka-1-Index.db delete mode 100644 scylla/tests/sstables/sliced_mutation_reads/ks-sliced_mutation_reads_test-ka-1-Statistics.db delete mode 100644 scylla/tests/sstables/sliced_mutation_reads/ks-sliced_mutation_reads_test-ka-1-Summary.db delete mode 100644 scylla/tests/sstables/sliced_mutation_reads/ks-sliced_mutation_reads_test-ka-1-TOC.txt delete mode 100644 scylla/tests/sstables/summary_test/test-summary_test-ka-1-CompressionInfo.db delete mode 100644 scylla/tests/sstables/summary_test/test-summary_test-ka-1-Data.db delete mode 100644 scylla/tests/sstables/summary_test/test-summary_test-ka-1-Digest.sha1 delete mode 100644 scylla/tests/sstables/summary_test/test-summary_test-ka-1-Filter.db delete mode 100644 scylla/tests/sstables/summary_test/test-summary_test-ka-1-Index.db delete mode 100644 scylla/tests/sstables/summary_test/test-summary_test-ka-1-Statistics.db delete mode 100644 scylla/tests/sstables/summary_test/test-summary_test-ka-1-Summary.db delete mode 100644 scylla/tests/sstables/summary_test/test-summary_test-ka-1-TOC.txt delete mode 100644 scylla/tests/sstables/tombstone_overlap/try1-tab-ka-1-CompressionInfo.db delete mode 100644 scylla/tests/sstables/tombstone_overlap/try1-tab-ka-1-Data.db delete mode 100644 scylla/tests/sstables/tombstone_overlap/try1-tab-ka-1-Digest.sha1 delete mode 100644 scylla/tests/sstables/tombstone_overlap/try1-tab-ka-1-Filter.db delete mode 100644 scylla/tests/sstables/tombstone_overlap/try1-tab-ka-1-Index.db delete mode 100644 scylla/tests/sstables/tombstone_overlap/try1-tab-ka-1-Statistics.db delete mode 100644 scylla/tests/sstables/tombstone_overlap/try1-tab-ka-1-Summary.db delete mode 100644 scylla/tests/sstables/tombstone_overlap/try1-tab-ka-1-TOC.txt delete mode 100644 scylla/tests/sstables/tombstone_overlap/try1-tab-ka-4-CompressionInfo.db delete mode 100644 scylla/tests/sstables/tombstone_overlap/try1-tab-ka-4-Data.db delete mode 100644 scylla/tests/sstables/tombstone_overlap/try1-tab-ka-4-Digest.sha1 delete mode 100644 scylla/tests/sstables/tombstone_overlap/try1-tab-ka-4-Filter.db delete mode 100644 scylla/tests/sstables/tombstone_overlap/try1-tab-ka-4-Index.db delete mode 100644 scylla/tests/sstables/tombstone_overlap/try1-tab-ka-4-Statistics.db delete mode 100644 scylla/tests/sstables/tombstone_overlap/try1-tab-ka-4-Summary.db delete mode 100644 scylla/tests/sstables/tombstone_overlap/try1-tab-ka-4-TOC.txt delete mode 100644 scylla/tests/sstables/tombstone_overlap/try1-tab2-ka-3-CompressionInfo.db delete mode 100644 scylla/tests/sstables/tombstone_overlap/try1-tab2-ka-3-Data.db delete mode 100644 scylla/tests/sstables/tombstone_overlap/try1-tab2-ka-3-Digest.sha1 delete mode 100644 scylla/tests/sstables/tombstone_overlap/try1-tab2-ka-3-Filter.db delete mode 100644 scylla/tests/sstables/tombstone_overlap/try1-tab2-ka-3-Index.db delete mode 100644 scylla/tests/sstables/tombstone_overlap/try1-tab2-ka-3-Statistics.db delete mode 100644 scylla/tests/sstables/tombstone_overlap/try1-tab2-ka-3-Summary.db delete mode 100644 scylla/tests/sstables/tombstone_overlap/try1-tab2-ka-3-TOC.txt delete mode 100644 scylla/tests/sstables/ttl/la-1-big-CompressionInfo.db delete mode 100644 scylla/tests/sstables/ttl/la-1-big-Data.db delete mode 100644 scylla/tests/sstables/ttl/la-1-big-Digest.sha1 delete mode 100644 scylla/tests/sstables/ttl/la-1-big-Filter.db delete mode 100644 scylla/tests/sstables/ttl/la-1-big-Index.db delete mode 100644 scylla/tests/sstables/ttl/la-1-big-Statistics.db delete mode 100644 scylla/tests/sstables/ttl/la-1-big-Summary.db delete mode 100644 scylla/tests/sstables/ttl/la-1-big-TOC.txt delete mode 100644 scylla/tests/sstables/uncompressed/la-1-big-CRC.db delete mode 100644 scylla/tests/sstables/uncompressed/la-1-big-Data.db delete mode 100644 scylla/tests/sstables/uncompressed/la-1-big-Digest.sha1 delete mode 100644 scylla/tests/sstables/uncompressed/la-1-big-Filter.db delete mode 100644 scylla/tests/sstables/uncompressed/la-1-big-Index.db delete mode 100644 scylla/tests/sstables/uncompressed/la-1-big-Statistics.db delete mode 100644 scylla/tests/sstables/uncompressed/la-1-big-Summary.db delete mode 100644 scylla/tests/sstables/uncompressed/la-1-big-TOC.txt delete mode 100644 scylla/tests/sstables/uncompressed/la-2-big-CRC.db delete mode 100644 scylla/tests/sstables/uncompressed/la-2-big-Data.db delete mode 100644 scylla/tests/sstables/uncompressed/la-2-big-Digest.sha1 delete mode 100644 scylla/tests/sstables/uncompressed/la-2-big-Filter.db delete mode 100644 scylla/tests/sstables/uncompressed/la-2-big-Index.db delete mode 100644 scylla/tests/sstables/uncompressed/la-2-big-Statistics.db delete mode 100644 scylla/tests/sstables/uncompressed/la-2-big-TOC.txt delete mode 100644 scylla/tests/sstables/unknown_component/la-1-big-CRC.db delete mode 100644 scylla/tests/sstables/unknown_component/la-1-big-Data.db delete mode 100644 scylla/tests/sstables/unknown_component/la-1-big-Digest.sha1 delete mode 100644 scylla/tests/sstables/unknown_component/la-1-big-Filter.db delete mode 100644 scylla/tests/sstables/unknown_component/la-1-big-Index.db delete mode 100644 scylla/tests/sstables/unknown_component/la-1-big-Statistics.db delete mode 100644 scylla/tests/sstables/unknown_component/la-1-big-Summary.db delete mode 100644 scylla/tests/sstables/unknown_component/la-1-big-TOC.txt delete mode 100644 scylla/tests/sstables/unknown_component/la-1-big-UNKNOWN.txt delete mode 100644 scylla/tests/sstables/wrong_range_tombstone_order/ks-wrong_range_tombstone_order-ka-1-CompressionInfo.db delete mode 100644 scylla/tests/sstables/wrong_range_tombstone_order/ks-wrong_range_tombstone_order-ka-1-Data.db delete mode 100644 scylla/tests/sstables/wrong_range_tombstone_order/ks-wrong_range_tombstone_order-ka-1-Digest.sha1 delete mode 100644 scylla/tests/sstables/wrong_range_tombstone_order/ks-wrong_range_tombstone_order-ka-1-Filter.db delete mode 100644 scylla/tests/sstables/wrong_range_tombstone_order/ks-wrong_range_tombstone_order-ka-1-Index.db delete mode 100644 scylla/tests/sstables/wrong_range_tombstone_order/ks-wrong_range_tombstone_order-ka-1-Statistics.db delete mode 100644 scylla/tests/sstables/wrong_range_tombstone_order/ks-wrong_range_tombstone_order-ka-1-Summary.db delete mode 100644 scylla/tests/sstables/wrong_range_tombstone_order/ks-wrong_range_tombstone_order-ka-1-TOC.txt delete mode 100644 scylla/tests/sstables/wrongrange/la-114-big-CompressionInfo.db delete mode 100644 scylla/tests/sstables/wrongrange/la-114-big-Data.db delete mode 100644 scylla/tests/sstables/wrongrange/la-114-big-Digest.sha1 delete mode 100644 scylla/tests/sstables/wrongrange/la-114-big-Filter.db delete mode 100644 scylla/tests/sstables/wrongrange/la-114-big-Index.db delete mode 100644 scylla/tests/sstables/wrongrange/la-114-big-Statistics.db delete mode 100644 scylla/tests/sstables/wrongrange/la-114-big-Summary.db delete mode 100644 scylla/tests/sstables/wrongrange/la-114-big-TOC.txt delete mode 100644 scylla/tests/storage_proxy_test.cc delete mode 100644 scylla/tests/streamed_mutation_test.cc delete mode 100644 scylla/tests/test-serialization.cc delete mode 100644 scylla/tests/test_services.hh delete mode 100644 scylla/tests/tmpdir.hh delete mode 100644 scylla/tests/total_order_check.hh delete mode 100644 scylla/tests/types_test.cc delete mode 100644 scylla/tests/view_schema_test.cc delete mode 100644 scylla/tests/virtual_reader_test.cc delete mode 100644 scylla/thrift/handler.cc delete mode 100644 scylla/thrift/handler.hh delete mode 100644 scylla/thrift/server.cc delete mode 100644 scylla/thrift/server.hh delete mode 100644 scylla/thrift/thrift_validation.cc delete mode 100644 scylla/thrift/thrift_validation.hh delete mode 100644 scylla/thrift/utils.hh delete mode 100644 scylla/timestamp.hh delete mode 100644 scylla/to_string.hh delete mode 100644 scylla/tombstone.hh delete mode 100644 scylla/tracing/trace_keyspace_helper.cc delete mode 100644 scylla/tracing/trace_keyspace_helper.hh delete mode 100644 scylla/tracing/trace_state.cc delete mode 100644 scylla/tracing/trace_state.hh delete mode 100644 scylla/tracing/tracing.cc delete mode 100644 scylla/tracing/tracing.hh delete mode 100644 scylla/transport/event.cc delete mode 100644 scylla/transport/event.hh delete mode 100644 scylla/transport/event_notifier.cc delete mode 100644 scylla/transport/messages/result_message.hh delete mode 100644 scylla/transport/messages/result_message_base.hh delete mode 100644 scylla/transport/messages_fwd.hh delete mode 100644 scylla/transport/server.cc delete mode 100644 scylla/transport/server.hh delete mode 100644 scylla/types.cc delete mode 100644 scylla/types.hh delete mode 100644 scylla/unimplemented.cc delete mode 100644 scylla/unimplemented.hh delete mode 100644 scylla/validation.cc delete mode 100644 scylla/validation.hh delete mode 100644 scylla/version.hh delete mode 100644 scylla/view_info.hh rename redis_server.cc => server.cc (99%) rename redis_server.hh => server.hh (100%) create mode 100644 storage_service.cc rename redis_service.hh => storage_service.hh (63%) rename bits_operation.cc => structures/bits_operation.cc (100%) rename bits_operation.hh => structures/bits_operation.hh (100%) rename dict_lsa.cc => structures/dict_lsa.cc (100%) rename dict_lsa.hh => structures/dict_lsa.hh (100%) rename geo.cc => structures/geo.cc (100%) rename geo.hh => structures/geo.hh (100%) rename hll.cc => structures/hll.cc (100%) rename hll.hh => structures/hll.hh (100%) rename list_lsa.cc => structures/list_lsa.cc (100%) rename list_lsa.hh => structures/list_lsa.hh (100%) rename sset_lsa.cc => structures/sset_lsa.cc (100%) rename sset_lsa.hh => structures/sset_lsa.hh (100%) create mode 100644 token.cc create mode 100644 token.hh rename {scylla/utils => utils}/UUID.hh (100%) rename {scylla/utils => utils}/UUID_gen.cc (100%) rename {scylla/utils => utils}/UUID_gen.hh (100%) rename {scylla/utils => utils}/allocation_strategy.hh (100%) rename {scylla/utils => utils}/anchorless_list.hh (100%) rename {scylla/utils => utils}/big_decimal.cc (100%) rename {scylla/utils => utils}/big_decimal.hh (100%) rename {scylla/utils => utils}/bloom_calculations.cc (100%) rename {scylla/utils => utils}/bloom_calculations.hh (100%) rename {scylla/utils => utils}/bloom_filter.cc (100%) rename {scylla/utils => utils}/bloom_filter.hh (100%) rename {scylla/utils => utils}/bounded_stats_deque.hh (100%) rename {scylla => utils}/bytes.cc (100%) rename {scylla => utils}/bytes.hh (100%) rename {scylla => utils}/bytes_ostream.hh (100%) rename {scylla/utils => utils}/class_registrator.hh (100%) rename {scylla/utils => utils}/crc.hh (100%) rename {scylla/utils => utils}/data_input.hh (100%) rename {scylla/utils => utils}/data_output.hh (100%) rename {scylla/utils => utils}/date.h (100%) rename {scylla/utils => utils}/div_ceil.hh (100%) rename {scylla/utils => utils}/dynamic_bitset.cc (100%) rename {scylla/utils => utils}/dynamic_bitset.hh (100%) rename {scylla/utils => utils}/estimated_histogram.hh (100%) rename {scylla/utils => utils}/exceptions.cc (100%) rename {scylla/utils => utils}/exceptions.hh (100%) rename {scylla/utils => utils}/exponential_backoff_retry.hh (100%) rename {scylla/utils => utils}/fb_utilities.hh (100%) rename {scylla/utils => utils}/file_lock.cc (100%) rename {scylla/utils => utils}/file_lock.hh (100%) rename {scylla/utils => utils}/flush_queue.hh (100%) rename {scylla/utils => utils}/hash.hh (100%) rename {scylla/utils => utils}/histogram.hh (100%) rename {scylla/utils => utils}/i_filter.cc (100%) rename {scylla/utils => utils}/i_filter.hh (100%) rename {scylla/utils => utils}/input_stream.hh (100%) rename {scylla/utils => utils}/int_range.hh (100%) rename {scylla/utils => utils}/joinpoint.hh (100%) rename {scylla/utils => utils}/large_bitset.cc (100%) rename {scylla/utils => utils}/large_bitset.hh (100%) rename {scylla/utils => utils}/latency.hh (100%) rename {scylla/utils => utils}/loading_cache.hh (100%) rename {scylla/utils => utils}/log_histogram.hh (100%) rename {scylla/utils => utils}/logalloc.cc (100%) rename {scylla/utils => utils}/logalloc.hh (100%) rename {scylla/utils => utils}/managed_bytes.cc (100%) rename {scylla/utils => utils}/managed_bytes.hh (100%) rename {scylla/utils => utils}/managed_ref.hh (100%) rename {scylla/utils => utils}/managed_vector.hh (100%) rename {scylla/utils => utils}/move.hh (100%) rename {scylla/utils => utils}/murmur_hash.cc (100%) rename {scylla/utils => utils}/murmur_hash.hh (100%) rename {scylla/utils => utils}/mutable_view.hh (100%) rename {scylla/utils => utils}/optimized_optional.hh (100%) rename {scylla/utils => utils}/phased_barrier.hh (100%) rename {scylla/utils => utils}/rate_limiter.cc (100%) rename {scylla/utils => utils}/rate_limiter.hh (100%) rename {scylla/utils => utils}/runtime.cc (100%) rename {scylla/utils => utils}/runtime.hh (100%) rename {scylla/utils => utils}/sequenced_set.hh (100%) rename {scylla/utils => utils}/serialization.hh (100%) rename {scylla/utils => utils}/to_boost_visitor.hh (100%) rename {scylla/utils => utils}/utils.cc (100%) rename {scylla/utils => utils}/uuid.cc (100%) rename {scylla/utils => utils}/with_relational_operators.hh (100%) diff --git a/cache.hh b/cache.hh index 60a47af..e2539c7 100644 --- a/cache.hh +++ b/cache.hh @@ -35,7 +35,6 @@ #include "sset_lsa.hh" #include "hll.hh" #include "log.hh" -#include "dht/i_partitioner.hh" #include "core/timer-set.hh" #include "hll.hh" #include "bytes.hh" @@ -256,11 +255,11 @@ public: const auto& rk = r._key; return (l.key_hash() == r.key_hash()) && (lk == rk); } - inline bool operator () (const dht::decorated_key& dk, const cache_entry& e) const { + inline bool operator () (const decorated_key& dk, const cache_entry& e) const { return std::hash()(dk.key().representation()) == e.key_hash() && dk.key().representation() == e.key(); } - inline bool operator () (const cache_entry& e, const dht::decorated_key& dk) const { + inline bool operator () (const cache_entry& e, const decorated_key& dk) const { return std::hash()(dk.key().representation()) == e.key_hash() && dk.key().representation() == e.key(); } @@ -464,9 +463,9 @@ public: _store.erase_and_dispose(_store.begin(), _store.end(), current_deleter()); } - inline bool erase(const dht::decorated_key& dk) + inline bool erase(const decorated_key& dk) { - auto hash_fn = [] (const dht::decorated_key& dk) -> size_t { + auto hash_fn = [] (const decorated_key& dk) -> size_t { return std::hash()(dk.key().representation()); }; auto it = _store.find(dk, hash_fn, cache_entry::compare()); @@ -562,8 +561,8 @@ public: } template - inline std::result_of_t with_entry_run(const dht::decorated_key& dk, Func&& func) const { - auto hash_fn = [] (const dht::decorated_key& dk) -> size_t { + inline std::result_of_t run_with_entry(const decorated_key& dk, Func&& func) const { + auto hash_fn = [] (const decorated_key& dk) -> size_t { return std::hash()(dk.key().representation()); }; auto it = _store.find(dk, hash_fn, cache_entry::compare()); @@ -577,8 +576,8 @@ public: } template - inline std::result_of_t with_entry_run(const dht::decorated_key& dk, Func&& func) { - auto hash_fn = [] (const dht::decorated_key& dk) -> size_t { + inline std::result_of_t run_with_entry(const decorated_key& dk, Func&& func) { + auto hash_fn = [] (const decorated_key& dk) -> size_t { return std::hash()(dk.key().representation()); }; auto it = _store.find(dk, hash_fn, cache_entry::compare()); @@ -592,9 +591,9 @@ public: } - inline bool exists(const dht::decorated_key& dk) + inline bool exists(const decorated_key& dk) { - auto hash_fn = [] (const dht::decorated_key& dk) -> size_t { + auto hash_fn = [] (const decorated_key& dk) -> size_t { return std::hash()(dk.key().representation()); }; auto it = _store.find(dk, hash_fn, cache_entry::compare()); @@ -627,10 +626,10 @@ public: return _store.empty(); } - bool expire(const dht::decorated_key& dk, long expired) + bool expire(const decorated_key& dk, long expired) { bool result = false; - auto hash_fn = [] (const dht::decorated_key& dk) -> size_t { + auto hash_fn = [] (const decorated_key& dk) -> size_t { return std::hash()(dk.key().representation()); }; auto it = _store.find(dk, hash_fn, cache_entry::compare()); @@ -659,10 +658,10 @@ public: _timer.arm(_alive.get_next_timeout()); } - bool never_expired(const dht::decorated_key& dk) + bool never_expired(const decorated_key& dk) { bool result = false; - auto hash_fn = [] (const dht::decorated_key& dk) -> size_t { + auto hash_fn = [] (const decorated_key& dk) -> size_t { return std::hash()(dk.key().representation()); }; auto it = _store.find(dk, hash_fn, cache_entry::compare()); diff --git a/conf/cassandra-rackdc.properties b/conf/cassandra-rackdc.properties deleted file mode 100644 index 61a1df7..0000000 --- a/conf/cassandra-rackdc.properties +++ /dev/null @@ -1,15 +0,0 @@ -# -# cassandra-rackdc.properties -# -# The lines may include white spaces at the beginning and the end. -# The rack and data center names may also include white spaces. -# All trailing and leading white spaces will be trimmed. -# -# dc=my_data_center -# rack=my_rack -# prefer_local= -# dc_suffix= -# - - - diff --git a/conf/housekeeping.cfg b/conf/housekeeping.cfg deleted file mode 100644 index 817ee14..0000000 --- a/conf/housekeeping.cfg +++ /dev/null @@ -1,2 +0,0 @@ -[housekeeping] -check-version: True diff --git a/conf/scylla.yaml b/conf/scylla.yaml deleted file mode 100644 index 8a73436..0000000 --- a/conf/scylla.yaml +++ /dev/null @@ -1,798 +0,0 @@ -# Scylla storage config YAML - -####################################### -# This file is split to two sections: -# 1. Supported parameters -# 2. Unsupported parameters: reserved for future use or backwards -# compatibility. -# Scylla will only read and use the first segment -####################################### - -### Supported Parameters - -# The name of the cluster. This is mainly used to prevent machines in -# one logical cluster from joining another. -cluster_name: 'Test Cluster' - -# This defines the number of tokens randomly assigned to this node on the ring -# The more tokens, relative to other nodes, the larger the proportion of data -# that this node will store. You probably want all nodes to have the same number -# of tokens assuming they have equal hardware capability. -# -# If you already have a cluster with 1 token per node, and wish to migrate to -# multiple tokens per node, see http://wiki.apache.org/cassandra/Operations -num_tokens: 256 - -# Directory where Scylla should store data on disk. -# If not set, the default directory is $CASSANDRA_HOME/data/data. -data_file_directories: - - /var/lib/scylla/data - -# commit log. when running on magnetic HDD, this should be a -# separate spindle than the data directories. -# If not set, the default directory is $CASSANDRA_HOME/data/commitlog. -commitlog_directory: /var/lib/scylla/commitlog - -# commitlog_sync may be either "periodic" or "batch." -# -# When in batch mode, Scylla won't ack writes until the commit log -# has been fsynced to disk. It will wait -# commitlog_sync_batch_window_in_ms milliseconds between fsyncs. -# This window should be kept short because the writer threads will -# be unable to do extra work while waiting. (You may need to increase -# concurrent_writes for the same reason.) -# -# commitlog_sync: batch -# commitlog_sync_batch_window_in_ms: 2 -# -# the other option is "periodic" where writes may be acked immediately -# and the CommitLog is simply synced every commitlog_sync_period_in_ms -# milliseconds. -commitlog_sync: periodic -commitlog_sync_period_in_ms: 10000 - -# The size of the individual commitlog file segments. A commitlog -# segment may be archived, deleted, or recycled once all the data -# in it (potentially from each columnfamily in the system) has been -# flushed to sstables. -# -# The default size is 32, which is almost always fine, but if you are -# archiving commitlog segments (see commitlog_archiving.properties), -# then you probably want a finer granularity of archiving; 8 or 16 MB -# is reasonable. -commitlog_segment_size_in_mb: 32 - -# seed_provider class_name is saved for future use. -# seeds address are mandatory! -seed_provider: - # Addresses of hosts that are deemed contact points. - # Scylla nodes use this list of hosts to find each other and learn - # the topology of the ring. You must change this if you are running - # multiple nodes! - - class_name: org.apache.cassandra.locator.SimpleSeedProvider - parameters: - # seeds is actually a comma-delimited list of addresses. - # Ex: ",," - - seeds: "127.0.0.1" - -# Address or interface to bind to and tell other Scylla nodes to connect to. -# You _must_ change this if you want multiple nodes to be able to communicate! -# -# Setting listen_address to 0.0.0.0 is always wrong. -listen_address: localhost - -# Address to broadcast to other Scylla nodes -# Leaving this blank will set it to the same value as listen_address -# broadcast_address: 1.2.3.4 - -# port for the CQL native transport to listen for clients on -# For security reasons, you should not expose this port to the internet. Firewall it if needed. -native_transport_port: 9042 - -# Enabling native transport encryption in client_encryption_options allows you to either use -# encryption for the standard port or to use a dedicated, additional port along with the unencrypted -# standard native_transport_port. -# Enabling client encryption and keeping native_transport_port_ssl disabled will use encryption -# for native_transport_port. Setting native_transport_port_ssl to a different value -# from native_transport_port will use encryption for native_transport_port_ssl while -# keeping native_transport_port unencrypted. -#native_transport_port_ssl: 9142 - -# Throttles all outbound streaming file transfers on this node to the -# given total throughput in Mbps. This is necessary because Scylla does -# mostly sequential IO when streaming data during bootstrap or repair, which -# can lead to saturating the network connection and degrading rpc performance. -# When unset, the default is 200 Mbps or 25 MB/s. -# stream_throughput_outbound_megabits_per_sec: 200 - -# How long the coordinator should wait for read operations to complete -read_request_timeout_in_ms: 5000 - -# How long the coordinator should wait for writes to complete -write_request_timeout_in_ms: 2000 - -# phi value that must be reached for a host to be marked down. -# most users should never need to adjust this. -# phi_convict_threshold: 8 - -# IEndpointSnitch. The snitch has two functions: -# - it teaches Scylla enough about your network topology to route -# requests efficiently -# - it allows Scylla to spread replicas around your cluster to avoid -# correlated failures. It does this by grouping machines into -# "datacenters" and "racks." Scylla will do its best not to have -# more than one replica on the same "rack" (which may not actually -# be a physical location) -# -# IF YOU CHANGE THE SNITCH AFTER DATA IS INSERTED INTO THE CLUSTER, -# YOU MUST RUN A FULL REPAIR, SINCE THE SNITCH AFFECTS WHERE REPLICAS -# ARE PLACED. -# -# Out of the box, Scylla provides -# - SimpleSnitch: -# Treats Strategy order as proximity. This can improve cache -# locality when disabling read repair. Only appropriate for -# single-datacenter deployments. -# - GossipingPropertyFileSnitch -# This should be your go-to snitch for production use. The rack -# and datacenter for the local node are defined in -# cassandra-rackdc.properties and propagated to other nodes via -# gossip. If cassandra-topology.properties exists, it is used as a -# fallback, allowing migration from the PropertyFileSnitch. -# - PropertyFileSnitch: -# Proximity is determined by rack and data center, which are -# explicitly configured in cassandra-topology.properties. -# - Ec2Snitch: -# Appropriate for EC2 deployments in a single Region. Loads Region -# and Availability Zone information from the EC2 API. The Region is -# treated as the datacenter, and the Availability Zone as the rack. -# Only private IPs are used, so this will not work across multiple -# Regions. -# - Ec2MultiRegionSnitch: -# Uses public IPs as broadcast_address to allow cross-region -# connectivity. (Thus, you should set seed addresses to the public -# IP as well.) You will need to open the storage_port or -# ssl_storage_port on the public IP firewall. (For intra-Region -# traffic, Scylla will switch to the private IP after -# establishing a connection.) -# - RackInferringSnitch: -# Proximity is determined by rack and data center, which are -# assumed to correspond to the 3rd and 2nd octet of each node's IP -# address, respectively. Unless this happens to match your -# deployment conventions, this is best used as an example of -# writing a custom Snitch class and is provided in that spirit. -# -# You can use a custom Snitch by setting this to the full class name -# of the snitch, which will be assumed to be on your classpath. -endpoint_snitch: SimpleSnitch - -# The address or interface to bind the Thrift RPC service and native transport -# server to. -# -# Set rpc_address OR rpc_interface, not both. Interfaces must correspond -# to a single address, IP aliasing is not supported. -# -# Leaving rpc_address blank has the same effect as on listen_address -# (i.e. it will be based on the configured hostname of the node). -# -# Note that unlike listen_address, you can specify 0.0.0.0, but you must also -# set broadcast_rpc_address to a value other than 0.0.0.0. -# -# For security reasons, you should not expose this port to the internet. Firewall it if needed. -# -# If you choose to specify the interface by name and the interface has an ipv4 and an ipv6 address -# you can specify which should be chosen using rpc_interface_prefer_ipv6. If false the first ipv4 -# address will be used. If true the first ipv6 address will be used. Defaults to false preferring -# ipv4. If there is only one address it will be selected regardless of ipv4/ipv6. -rpc_address: localhost -# rpc_interface: eth1 -# rpc_interface_prefer_ipv6: false - -# port for Thrift to listen for clients on -rpc_port: 9160 - -# port for REST API server -api_port: 10000 - -# IP for the REST API server -api_address: 127.0.0.1 - -# Log WARN on any batch size exceeding this value. 5kb per batch by default. -# Caution should be taken on increasing the size of this threshold as it can lead to node instability. -batch_size_warn_threshold_in_kb: 5 - -# Fail any multiple-partition batch exceeding this value. 50kb (10x warn threshold) by default. -batch_size_fail_threshold_in_kb: 50 - -# Authentication backend, identifying users -# Out of the box, Scylla provides org.apache.cassandra.auth.{AllowAllAuthenticator, -# PasswordAuthenticator}. -# -# - AllowAllAuthenticator performs no checks - set it to disable authentication. -# - PasswordAuthenticator relies on username/password pairs to authenticate -# users. It keeps usernames and hashed passwords in system_auth.credentials table. -# Please increase system_auth keyspace replication factor if you use this authenticator. -# authenticator: AllowAllAuthenticator - -# Authorization backend, implementing IAuthorizer; used to limit access/provide permissions -# Out of the box, Scylla provides org.apache.cassandra.auth.{AllowAllAuthorizer, -# CassandraAuthorizer}. -# -# - AllowAllAuthorizer allows any action to any user - set it to disable authorization. -# - CassandraAuthorizer stores permissions in system_auth.permissions table. Please -# increase system_auth keyspace replication factor if you use this authorizer. -# authorizer: AllowAllAuthorizer - -# initial_token allows you to specify tokens manually. While you can use # it with -# vnodes (num_tokens > 1, above) -- in which case you should provide a -# comma-separated list -- it's primarily used when adding nodes # to legacy clusters -# that do not have vnodes enabled. -# initial_token: - -# RPC address to broadcast to drivers and other Scylla nodes. This cannot -# be set to 0.0.0.0. If left blank, this will be set to the value of -# rpc_address. If rpc_address is set to 0.0.0.0, broadcast_rpc_address must -# be set. -# broadcast_rpc_address: 1.2.3.4 - -# Uncomment to enable experimental features -# experimental: true - -################################################### -## Not currently supported, reserved for future use -################################################### - -# See http://wiki.apache.org/cassandra/HintedHandoff -# May either be "true" or "false" to enable globally, or contain a list -# of data centers to enable per-datacenter. -# hinted_handoff_enabled: DC1,DC2 -# hinted_handoff_enabled: true - -# this defines the maximum amount of time a dead host will have hints -# generated. After it has been dead this long, new hints for it will not be -# created until it has been seen alive and gone down again. -# max_hint_window_in_ms: 10800000 # 3 hours - -# Maximum throttle in KBs per second, per delivery thread. This will be -# reduced proportionally to the number of nodes in the cluster. (If there -# are two nodes in the cluster, each delivery thread will use the maximum -# rate; if there are three, each will throttle to half of the maximum, -# since we expect two nodes to be delivering hints simultaneously.) -# hinted_handoff_throttle_in_kb: 1024 -# Number of threads with which to deliver hints; -# Consider increasing this number when you have multi-dc deployments, since -# cross-dc handoff tends to be slower -# max_hints_delivery_threads: 2 - -# Maximum throttle in KBs per second, total. This will be -# reduced proportionally to the number of nodes in the cluster. -# batchlog_replay_throttle_in_kb: 1024 - -# Validity period for permissions cache (fetching permissions can be an -# expensive operation depending on the authorizer, CassandraAuthorizer is -# one example). Defaults to 2000, set to 0 to disable. -# Will be disabled automatically for AllowAllAuthorizer. -# permissions_validity_in_ms: 2000 - -# Refresh interval for permissions cache (if enabled). -# After this interval, cache entries become eligible for refresh. Upon next -# access, an async reload is scheduled and the old value returned until it -# completes. If permissions_validity_in_ms is non-zero, then this must be -# also. -# Defaults to the same value as permissions_validity_in_ms. -# permissions_update_interval_in_ms: 1000 - -# The partitioner is responsible for distributing groups of rows (by -# partition key) across nodes in the cluster. You should leave this -# alone for new clusters. The partitioner can NOT be changed without -# reloading all data, so when upgrading you should set this to the -# same partitioner you were already using. -# -# Besides Murmur3Partitioner, partitioners included for backwards -# compatibility include RandomPartitioner, ByteOrderedPartitioner, and -# OrderPreservingPartitioner. -# -partitioner: org.apache.cassandra.dht.Murmur3Partitioner - -# Maximum size of the key cache in memory. -# -# Each key cache hit saves 1 seek and each row cache hit saves 2 seeks at the -# minimum, sometimes more. The key cache is fairly tiny for the amount of -# time it saves, so it's worthwhile to use it at large numbers. -# The row cache saves even more time, but must contain the entire row, -# so it is extremely space-intensive. It's best to only use the -# row cache if you have hot rows or static rows. -# -# NOTE: if you reduce the size, you may not get you hottest keys loaded on startup. -# -# Default value is empty to make it "auto" (min(5% of Heap (in MB), 100MB)). Set to 0 to disable key cache. -# key_cache_size_in_mb: - -# Duration in seconds after which Scylla should -# save the key cache. Caches are saved to saved_caches_directory as -# specified in this configuration file. -# -# Saved caches greatly improve cold-start speeds, and is relatively cheap in -# terms of I/O for the key cache. Row cache saving is much more expensive and -# has limited use. -# -# Default is 14400 or 4 hours. -# key_cache_save_period: 14400 - -# Number of keys from the key cache to save -# Disabled by default, meaning all keys are going to be saved -# key_cache_keys_to_save: 100 - -# Maximum size of the row cache in memory. -# NOTE: if you reduce the size, you may not get you hottest keys loaded on startup. -# -# Default value is 0, to disable row caching. -# row_cache_size_in_mb: 0 - -# Duration in seconds after which Scylla should -# save the row cache. Caches are saved to saved_caches_directory as specified -# in this configuration file. -# -# Saved caches greatly improve cold-start speeds, and is relatively cheap in -# terms of I/O for the key cache. Row cache saving is much more expensive and -# has limited use. -# -# Default is 0 to disable saving the row cache. -# row_cache_save_period: 0 - -# Number of keys from the row cache to save -# Disabled by default, meaning all keys are going to be saved -# row_cache_keys_to_save: 100 - -# Maximum size of the counter cache in memory. -# -# Counter cache helps to reduce counter locks' contention for hot counter cells. -# In case of RF = 1 a counter cache hit will cause Scylla to skip the read before -# write entirely. With RF > 1 a counter cache hit will still help to reduce the duration -# of the lock hold, helping with hot counter cell updates, but will not allow skipping -# the read entirely. Only the local (clock, count) tuple of a counter cell is kept -# in memory, not the whole counter, so it's relatively cheap. -# -# NOTE: if you reduce the size, you may not get you hottest keys loaded on startup. -# -# Default value is empty to make it "auto" (min(2.5% of Heap (in MB), 50MB)). Set to 0 to disable counter cache. -# NOTE: if you perform counter deletes and rely on low gcgs, you should disable the counter cache. -# counter_cache_size_in_mb: - -# Duration in seconds after which Scylla should -# save the counter cache (keys only). Caches are saved to saved_caches_directory as -# specified in this configuration file. -# -# Default is 7200 or 2 hours. -# counter_cache_save_period: 7200 - -# Number of keys from the counter cache to save -# Disabled by default, meaning all keys are going to be saved -# counter_cache_keys_to_save: 100 - -# The off-heap memory allocator. Affects storage engine metadata as -# well as caches. Experiments show that JEMAlloc saves some memory -# than the native GCC allocator (i.e., JEMalloc is more -# fragmentation-resistant). -# -# Supported values are: NativeAllocator, JEMallocAllocator -# -# If you intend to use JEMallocAllocator you have to install JEMalloc as library and -# modify cassandra-env.sh as directed in the file. -# -# Defaults to NativeAllocator -# memory_allocator: NativeAllocator - -# saved caches -# If not set, the default directory is $CASSANDRA_HOME/data/saved_caches. -# saved_caches_directory: /var/lib/scylla/saved_caches - - - -# For workloads with more data than can fit in memory, Scylla's -# bottleneck will be reads that need to fetch data from -# disk. "concurrent_reads" should be set to (16 * number_of_drives) in -# order to allow the operations to enqueue low enough in the stack -# that the OS and drives can reorder them. Same applies to -# "concurrent_counter_writes", since counter writes read the current -# values before incrementing and writing them back. -# -# On the other hand, since writes are almost never IO bound, the ideal -# number of "concurrent_writes" is dependent on the number of cores in -# your system; (8 * number_of_cores) is a good rule of thumb. -# concurrent_reads: 32 -# concurrent_writes: 32 -# concurrent_counter_writes: 32 - -# Total memory to use for sstable-reading buffers. Defaults to -# the smaller of 1/4 of heap or 512MB. -# file_cache_size_in_mb: 512 - -# Total space to use for commitlogs. -# -# If space gets above this value (it will round up to the next nearest -# segment multiple), Scylla will flush every dirty CF in the oldest -# segment and remove it. So a small total commitlog space will tend -# to cause more flush activity on less-active columnfamilies. -# -# A value of -1 (default) will automatically equate it to the total amount of memory -# available for Scylla. -commitlog_total_space_in_mb: -1 - -# A fixed memory pool size in MB for for SSTable index summaries. If left -# empty, this will default to 5% of the heap size. If the memory usage of -# all index summaries exceeds this limit, SSTables with low read rates will -# shrink their index summaries in order to meet this limit. However, this -# is a best-effort process. In extreme conditions Scylla may need to use -# more than this amount of memory. -# index_summary_capacity_in_mb: - -# How frequently index summaries should be resampled. This is done -# periodically to redistribute memory from the fixed-size pool to sstables -# proportional their recent read rates. Setting to -1 will disable this -# process, leaving existing index summaries at their current sampling level. -# index_summary_resize_interval_in_minutes: 60 - -# Whether to, when doing sequential writing, fsync() at intervals in -# order to force the operating system to flush the dirty -# buffers. Enable this to avoid sudden dirty buffer flushing from -# impacting read latencies. Almost always a good idea on SSDs; not -# necessarily on platters. -# trickle_fsync: false -# trickle_fsync_interval_in_kb: 10240 - -# TCP port, for commands and data -# For security reasons, you should not expose this port to the internet. Firewall it if needed. -# storage_port: 7000 - -# SSL port, for encrypted communication. Unused unless enabled in -# encryption_options -# For security reasons, you should not expose this port to the internet. Firewall it if needed. -# ssl_storage_port: 7001 - -# listen_interface: eth0 -# listen_interface_prefer_ipv6: false - -# Internode authentication backend, implementing IInternodeAuthenticator; -# used to allow/disallow connections from peer nodes. -# internode_authenticator: org.apache.cassandra.auth.AllowAllInternodeAuthenticator - -# Whether to start the native transport server. -# Please note that the address on which the native transport is bound is the -# same as the rpc_address. The port however is different and specified below. -# start_native_transport: true - -# The maximum threads for handling requests when the native transport is used. -# This is similar to rpc_max_threads though the default differs slightly (and -# there is no native_transport_min_threads, idle threads will always be stopped -# after 30 seconds). -# native_transport_max_threads: 128 -# -# The maximum size of allowed frame. Frame (requests) larger than this will -# be rejected as invalid. The default is 256MB. -# native_transport_max_frame_size_in_mb: 256 - -# The maximum number of concurrent client connections. -# The default is -1, which means unlimited. -# native_transport_max_concurrent_connections: -1 - -# The maximum number of concurrent client connections per source ip. -# The default is -1, which means unlimited. -# native_transport_max_concurrent_connections_per_ip: -1 - -# Whether to start the thrift rpc server. -# start_rpc: true - -# enable or disable keepalive on rpc/native connections -# rpc_keepalive: true - -# Scylla provides two out-of-the-box options for the RPC Server: -# -# sync -> One thread per thrift connection. For a very large number of clients, memory -# will be your limiting factor. On a 64 bit JVM, 180KB is the minimum stack size -# per thread, and that will correspond to your use of virtual memory (but physical memory -# may be limited depending on use of stack space). -# -# hsha -> Stands for "half synchronous, half asynchronous." All thrift clients are handled -# asynchronously using a small number of threads that does not vary with the amount -# of thrift clients (and thus scales well to many clients). The rpc requests are still -# synchronous (one thread per active request). If hsha is selected then it is essential -# that rpc_max_threads is changed from the default value of unlimited. -# -# The default is sync because on Windows hsha is about 30% slower. On Linux, -# sync/hsha performance is about the same, with hsha of course using less memory. -# -# Alternatively, can provide your own RPC server by providing the fully-qualified class name -# of an o.a.c.t.TServerFactory that can create an instance of it. -# rpc_server_type: sync - -# Uncomment rpc_min|max_thread to set request pool size limits. -# -# Regardless of your choice of RPC server (see above), the number of maximum requests in the -# RPC thread pool dictates how many concurrent requests are possible (but if you are using the sync -# RPC server, it also dictates the number of clients that can be connected at all). -# -# The default is unlimited and thus provides no protection against clients overwhelming the server. You are -# encouraged to set a maximum that makes sense for you in production, but do keep in mind that -# rpc_max_threads represents the maximum number of client requests this server may execute concurrently. -# -# rpc_min_threads: 16 -# rpc_max_threads: 2048 - -# uncomment to set socket buffer sizes on rpc connections -# rpc_send_buff_size_in_bytes: -# rpc_recv_buff_size_in_bytes: - -# Uncomment to set socket buffer size for internode communication -# Note that when setting this, the buffer size is limited by net.core.wmem_max -# and when not setting it it is defined by net.ipv4.tcp_wmem -# See: -# /proc/sys/net/core/wmem_max -# /proc/sys/net/core/rmem_max -# /proc/sys/net/ipv4/tcp_wmem -# /proc/sys/net/ipv4/tcp_wmem -# and: man tcp -# internode_send_buff_size_in_bytes: -# internode_recv_buff_size_in_bytes: - -# Frame size for thrift (maximum message length). -# thrift_framed_transport_size_in_mb: 15 - -# Set to true to have Scylla create a hard link to each sstable -# flushed or streamed locally in a backups/ subdirectory of the -# keyspace data. Removing these links is the operator's -# responsibility. -# incremental_backups: false - -# Whether or not to take a snapshot before each compaction. Be -# careful using this option, since Scylla won't clean up the -# snapshots for you. Mostly useful if you're paranoid when there -# is a data format change. -# snapshot_before_compaction: false - -# Whether or not a snapshot is taken of the data before keyspace truncation -# or dropping of column families. The STRONGLY advised default of true -# should be used to provide data safety. If you set this flag to false, you will -# lose data on truncation or drop. -# auto_snapshot: true - -# When executing a scan, within or across a partition, we need to keep the -# tombstones seen in memory so we can return them to the coordinator, which -# will use them to make sure other replicas also know about the deleted rows. -# With workloads that generate a lot of tombstones, this can cause performance -# problems and even exaust the server heap. -# (http://www.datastax.com/dev/blog/cassandra-anti-patterns-queues-and-queue-like-datasets) -# Adjust the thresholds here if you understand the dangers and want to -# scan more tombstones anyway. These thresholds may also be adjusted at runtime -# using the StorageService mbean. -# tombstone_warn_threshold: 1000 -# tombstone_failure_threshold: 100000 - -# Granularity of the collation index of rows within a partition. -# Increase if your rows are large, or if you have a very large -# number of rows per partition. The competing goals are these: -# 1) a smaller granularity means more index entries are generated -# and looking up rows withing the partition by collation column -# is faster -# 2) but, Scylla will keep the collation index in memory for hot -# rows (as part of the key cache), so a larger granularity means -# you can cache more hot rows -# column_index_size_in_kb: 64 - - -# Number of simultaneous compactions to allow, NOT including -# validation "compactions" for anti-entropy repair. Simultaneous -# compactions can help preserve read performance in a mixed read/write -# workload, by mitigating the tendency of small sstables to accumulate -# during a single long running compactions. The default is usually -# fine and if you experience problems with compaction running too -# slowly or too fast, you should look at -# compaction_throughput_mb_per_sec first. -# -# concurrent_compactors defaults to the smaller of (number of disks, -# number of cores), with a minimum of 2 and a maximum of 8. -# -# If your data directories are backed by SSD, you should increase this -# to the number of cores. -#concurrent_compactors: 1 - -# Throttles compaction to the given total throughput across the entire -# system. The faster you insert data, the faster you need to compact in -# order to keep the sstable count down, but in general, setting this to -# 16 to 32 times the rate you are inserting data is more than sufficient. -# Setting this to 0 disables throttling. Note that this account for all types -# of compaction, including validation compaction. -# compaction_throughput_mb_per_sec: 16 - -# Log a warning when compacting partitions larger than this value -# compaction_large_partition_warning_threshold_mb: 100 - -# When compacting, the replacement sstable(s) can be opened before they -# are completely written, and used in place of the prior sstables for -# any range that has been written. This helps to smoothly transfer reads -# between the sstables, reducing page cache churn and keeping hot rows hot -# sstable_preemptive_open_interval_in_mb: 50 - -# Throttles all streaming file transfer between the datacenters, -# this setting allows users to throttle inter dc stream throughput in addition -# to throttling all network stream traffic as configured with -# stream_throughput_outbound_megabits_per_sec -# inter_dc_stream_throughput_outbound_megabits_per_sec: - -# How long the coordinator should wait for seq or index scans to complete -# range_request_timeout_in_ms: 10000 -# How long the coordinator should wait for writes to complete -# counter_write_request_timeout_in_ms: 5000 -# How long a coordinator should continue to retry a CAS operation -# that contends with other proposals for the same row -# cas_contention_timeout_in_ms: 1000 -# How long the coordinator should wait for truncates to complete -# (This can be much longer, because unless auto_snapshot is disabled -# we need to flush first so we can snapshot before removing the data.) -# truncate_request_timeout_in_ms: 60000 -# The default timeout for other, miscellaneous operations -# request_timeout_in_ms: 10000 - -# Enable operation timeout information exchange between nodes to accurately -# measure request timeouts. If disabled, replicas will assume that requests -# were forwarded to them instantly by the coordinator, which means that -# under overload conditions we will waste that much extra time processing -# already-timed-out requests. -# -# Warning: before enabling this property make sure to ntp is installed -# and the times are synchronized between the nodes. -# cross_node_timeout: false - -# Enable socket timeout for streaming operation. -# When a timeout occurs during streaming, streaming is retried from the start -# of the current file. This _can_ involve re-streaming an important amount of -# data, so you should avoid setting the value too low. -# Default value is 0, which never timeout streams. -# streaming_socket_timeout_in_ms: 0 - -# controls how often to perform the more expensive part of host score -# calculation -# dynamic_snitch_update_interval_in_ms: 100 - -# controls how often to reset all host scores, allowing a bad host to -# possibly recover -# dynamic_snitch_reset_interval_in_ms: 600000 - -# if set greater than zero and read_repair_chance is < 1.0, this will allow -# 'pinning' of replicas to hosts in order to increase cache capacity. -# The badness threshold will control how much worse the pinned host has to be -# before the dynamic snitch will prefer other replicas over it. This is -# expressed as a double which represents a percentage. Thus, a value of -# 0.2 means Scylla would continue to prefer the static snitch values -# until the pinned host was 20% worse than the fastest. -# dynamic_snitch_badness_threshold: 0.1 - -# request_scheduler -- Set this to a class that implements -# RequestScheduler, which will schedule incoming client requests -# according to the specific policy. This is useful for multi-tenancy -# with a single Scylla cluster. -# NOTE: This is specifically for requests from the client and does -# not affect inter node communication. -# org.apache.cassandra.scheduler.NoScheduler - No scheduling takes place -# org.apache.cassandra.scheduler.RoundRobinScheduler - Round robin of -# client requests to a node with a separate queue for each -# request_scheduler_id. The scheduler is further customized by -# request_scheduler_options as described below. -# request_scheduler: org.apache.cassandra.scheduler.NoScheduler - -# Scheduler Options vary based on the type of scheduler -# NoScheduler - Has no options -# RoundRobin -# - throttle_limit -- The throttle_limit is the number of in-flight -# requests per client. Requests beyond -# that limit are queued up until -# running requests can complete. -# The value of 80 here is twice the number of -# concurrent_reads + concurrent_writes. -# - default_weight -- default_weight is optional and allows for -# overriding the default which is 1. -# - weights -- Weights are optional and will default to 1 or the -# overridden default_weight. The weight translates into how -# many requests are handled during each turn of the -# RoundRobin, based on the scheduler id. -# -# request_scheduler_options: -# throttle_limit: 80 -# default_weight: 5 -# weights: -# Keyspace1: 1 -# Keyspace2: 5 - -# request_scheduler_id -- An identifier based on which to perform -# the request scheduling. Currently the only valid option is keyspace. -# request_scheduler_id: keyspace - -# Enable or disable inter-node encryption. -# You must also generate keys and provide the appropriate key and trust store locations and passwords. -# No custom encryption options are currently enabled. The available options are: -# -# The available internode options are : all, none, dc, rack -# If set to dc scylla will encrypt the traffic between the DCs -# If set to rack scylla will encrypt the traffic between the racks -# -# server_encryption_options: -# internode_encryption: none -# certificate: conf/scylla.crt -# keyfile: conf/scylla.key -# truststore: -# require_client_auth: False -# priority_string: - -# enable or disable client/server encryption. -# client_encryption_options: -# enabled: false -# certificate: conf/scylla.crt -# keyfile: conf/scylla.key -# truststore: -# require_client_auth: False -# priority_string: - -# internode_compression controls whether traffic between nodes is -# compressed. -# can be: all - all traffic is compressed -# dc - traffic between different datacenters is compressed -# none - nothing is compressed. -# internode_compression: none - -# Enable or disable tcp_nodelay for inter-dc communication. -# Disabling it will result in larger (but fewer) network packets being sent, -# reducing overhead from the TCP protocol itself, at the cost of increasing -# latency if you block for cross-datacenter responses. -# inter_dc_tcp_nodelay: false - -# Relaxation of environment checks. -# -# Scylla places certain requirements on its environment. If these requirements are -# not met, performance and reliability can be degraded. -# -# These requirements include: -# - A filesystem with good support for aysnchronous I/O (AIO). Currently, -# this means XFS. -# -# false: strict environment checks are in place; do not start if they are not met. -# true: relaxed environment checks; performance and reliability may degraade. -# -# developer_mode: false - - -# Idle-time background processing -# -# Scylla can perform certain jobs in the background while the system is otherwise idle, -# freeing processor resources when there is other work to be done. -# -# defragment_memory_on_idle: true -# -# prometheus port -# By default, Scylla opens prometheus API port on port 9180 -# setting the port to 0 will disable the prometheus API. -# prometheus_port: 9180 -# -# prometheus address -# By default, Scylla binds all interfaces to the prometheus API -# It is possible to restrict the listening address to a specific one -# prometheus_address: 0.0.0.0 - -# Distribution of data among cores (shards) within a node -# -# Scylla distributes data within a node among shards, using a round-robin -# strategy: -# [shard0] [shard1] ... [shardN-1] [shard0] [shard1] ... [shardN-1] ... -# -# Scylla versions 1.6 and below used just one repetition of the pattern; -# this intefered with data placement among nodes (vnodes). -# -# Scylla versions 1.7 and above use 4096 repetitions of the pattern; this -# provides for better data distribution. -# -# the value below is log (base 2) of the number of repetitions. -# -# Set to 0 to avoid rewriting all data when upgrading from Scylla 1.6 and -# below. -# -# Keep at 12 for new clusters. -murmur3_partitioner_ignore_msb_bits: 12 diff --git a/scylla/db/config.cc b/config.cc similarity index 98% rename from scylla/db/config.cc rename to config.cc index 1ce6793..cf92348 100644 --- a/scylla/db/config.cc +++ b/config.cc @@ -1,10 +1,5 @@ /* - * Copyright (C) 2015 ScyllaDB - * - */ - -/* - * This file is part of Scylla. + * This file is part of Pedis. * * Scylla is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by @@ -17,7 +12,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . + * along with Pedis. If not, see . */ #include @@ -47,7 +42,7 @@ db::config::config() namespace bpo = boost::program_options; -namespace db { +namespace redis { // Special "validator" for boost::program_options to allow reading options // into an unordered_map (we have in config.hh a bunch of // those). This validator allows the parameter of each option to look like diff --git a/scylla/db/config.hh b/config.hh similarity index 97% rename from scylla/db/config.hh rename to config.hh index acf8d18..5016808 100644 --- a/scylla/db/config.hh +++ b/config.hh @@ -1,10 +1,5 @@ /* - * Copyright (C) 2015 ScyllaDB - * - */ - -/* - * This file is part of Scylla. + * This file is part of Pedis. * * Scylla is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by @@ -17,7 +12,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . + * along with Pedis. If not, see . */ #pragma once @@ -31,7 +26,7 @@ namespace seastar { class file; } -namespace db { +namespace redis { class string_map : public std::unordered_map { public: @@ -248,18 +243,6 @@ public: val(rpc_interface, sstring, "eth1", Unused, \ "The listen address for client connections. Interfaces must correspond to a single address, IP aliasing is not supported. See rpc_address." \ ) \ - val(redis_rpc_address, sstring, "localhost", Used, \ - "The listen address for client connections (Redis RPC service and native transport).Valid values are:\n" \ - "\n" \ - "\tunset: Resolves the address using the hostname configuration of the node. If left unset, the hostname must resolve to the IP address of this node using /etc/hostname, /etc/hosts, or DNS.\n" \ - "\t0.0.0.0 : Listens on all configured interfaces, but you must set the broadcast_rpc_address to a value other than 0.0.0.0.\n" \ - "\tIP address\n" \ - "\thostname\n" \ - "Related information: Network\n" \ - ) \ - val(redis_rpc_interface, sstring, "eth1", Unused, \ - "The listen address for client connections. Interfaces must correspond to a single address, IP aliasing is not supported. See rpc_address." \ - ) \ val(seed_provider, seed_provider_type, seed_provider_type("org.apache.cassandra.locator.SimpleSeedProvider"), Used, \ "The addresses of hosts deemed contact points. Scylla nodes use the -seeds list to find each other and learn the topology of the ring.\n" \ "\n" \ @@ -573,16 +556,6 @@ public: "from native_transport_port will use encryption for native_transport_port_ssl while" \ "keeping native_transport_port unencrypted" \ ) \ - val(native_redis_transport_port, uint16_t, 6379, Used, \ - "Port on which the REDIS transport listens for clients." \ - ) \ - val(native_redis_transport_port_ssl, uint16_t, 6079, Used, \ - "Port on which the REDIS TLS native transport listens for clients." \ - "Enabling client encryption and keeping native_transport_port_ssl disabled will use encryption" \ - "for native_transport_port. Setting native_transport_port_ssl to a different value" \ - "from native_transport_port will use encryption for native_transport_port_ssl while" \ - "keeping native_transport_port unencrypted" \ - ) \ val(native_transport_max_threads, uint32_t, 128, Invalid, \ "The maximum number of thread handling requests. The meaning is the same as rpc_max_threads.\n" \ "Default is different (128 versus unlimited).\n" \ diff --git a/db.cc b/db.cc index 5a09777..c12f9da 100644 --- a/db.cc +++ b/db.cc @@ -228,7 +228,7 @@ void database::setup_metrics() }); } -future database::set(const dht::decorated_key& dk, bytes& val, long expired, uint32_t flag) +future database::set(const decorated_key& dk, bytes& val, long expired, uint32_t flag) { ++_stat._set; return with_allocator(allocator(), [this, &dk, &val, expired, flag] { @@ -245,38 +245,19 @@ future database::set(const dht::decorated_key& dk, bytes& }); } -future database::del(const dht::decorated_key& dk) +future database::del(const decorated_key& dk) { ++_stat._del; - return current_store().with_entry_run(dk, [this, &dk] (cache_entry* e) { + return current_store().run_with_entry(dk, [this, &dk] (cache_entry* e) { if (!e) return reply_builder::build(msg_zero); - if (e->type_of_bytes()) { - --_stat._total_string_entries; - } - else if (e->type_of_set()) { - --_stat._total_set_entries; - } - else if (e->type_of_list()) { - --_stat._total_list_entries; - } - else if (e->type_of_map()) { - --_stat._total_dict_entries; - } - else if (e->type_of_sset()) { - --_stat._total_zset_entries; - } - else if (e->type_of_hll()) { - --_stat._total_hll_entries; - } - else { - --_stat._total_counter_entries; - } + auto& meta = e->meta(); auto result = current_store().erase(*e); + _store.erase(dk); return reply_builder::build(result ? msg_one : msg_zero); }); } -future database::get(const dht::decorated_key& dk) +future database::get(const decorated_key& dk) { ++_stat._read; ++_stat._get; @@ -285,8 +266,6 @@ future database::get(const dht::decorated_key& dk) return reply_builder::build(msg_type_err); } else { - if (e != nullptr) ++_stat._hit; - //return reply_builder::build(e); return reply_builder::build(msg_type_err); } }); diff --git a/db.hh b/db.hh index 50027ef..7ba28d1 100644 --- a/db.hh +++ b/db.hh @@ -29,8 +29,6 @@ #include #include #include "cache.hh" -#include "geo.hh" -#include "bits_operation.hh" #include "reply_builder.hh" #include #include "bytes.hh" @@ -50,9 +48,9 @@ public: database(); ~database(); - future set(const dht::decorated_key& dk, bytes& val, long expired, uint32_t flag); - future del(const dht::decorated_key& key); - future get(const dht::decorated_key& key); + future set(const decorated_key& dk, bytes& val, long expired, uint32_t flag); + future del(const decorated_key& key); + future get(const decorated_key& key); private: static const int DEFAULT_DB_COUNT = 1; cache _cache_stores[DEFAULT_DB_COUNT]; diff --git a/scylla/dht/murmur3_partitioner.cc b/default_partitioner.cc similarity index 63% rename from scylla/dht/murmur3_partitioner.cc rename to default_partitioner.cc index 642b771..0c516e9 100644 --- a/scylla/dht/murmur3_partitioner.cc +++ b/default_partitioner.cc @@ -1,27 +1,5 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "murmur3_partitioner.hh" +#include "default_partitioner.hh" #include "utils/murmur_hash.hh" -#include "sstables/key.hh" #include "utils/class_registrator.hh" #include #include @@ -30,7 +8,7 @@ namespace dht { inline unsigned -murmur3_partitioner::zero_based_shard_of(uint64_t token, unsigned shards, unsigned sharding_ignore_msb_bits) { +partitioner::zero_based_shard_of(uint64_t token, unsigned shards, unsigned sharding_ignore_msb_bits) { // This is the master function, the inverses have to match it wrt. rounding errors. token <<= sharding_ignore_msb_bits; // Treat "token" as a fraction in the interval [0, 1); compute: @@ -39,7 +17,7 @@ murmur3_partitioner::zero_based_shard_of(uint64_t token, unsigned shards, unsign } std::vector -murmur3_partitioner::init_zero_based_shard_start(unsigned shards, unsigned sharding_ignore_msb_bits) { +partitioner::init_zero_based_shard_start(unsigned shards, unsigned sharding_ignore_msb_bits) { // computes the inverse of zero_based_shard_of(). ret[s] will return the smallest token that belongs to s if (shards == 1) { // Avoid the while loops below getting confused finding the "edge" between two nonexistent shards @@ -60,14 +38,14 @@ murmur3_partitioner::init_zero_based_shard_start(unsigned shards, unsigned shard inline int64_t -murmur3_partitioner::normalize(int64_t in) { +partitioner::normalize(int64_t in) { return in == std::numeric_limits::lowest() ? std::numeric_limits::max() : in; } token -murmur3_partitioner::get_token(bytes_view key) { +partitioner::get_token(bytes_view key) { if (key.empty()) { return minimum_token(); } @@ -77,7 +55,7 @@ murmur3_partitioner::get_token(bytes_view key) { } token -murmur3_partitioner::get_token(uint64_t value) const { +partitioner::get_token(uint64_t value) const { // We don't normalize() the value, since token includes an is-before-everything // indicator. // FIXME: will this require a repair when importing a database? @@ -87,28 +65,6 @@ murmur3_partitioner::get_token(uint64_t value) const { return token{token::kind::key, std::move(b)}; } -token -murmur3_partitioner::get_token(const sstables::key_view& key) { - return get_token(bytes_view(key)); -} - -token -murmur3_partitioner::get_token(const bytes& key) { - return get_token(bytes_view(key)); -} - -token -murmur3_partitioner::get_token(const schema& s, partition_key_view key) { - std::array hash; - auto&& legacy = key.legacy_form(s); - utils::murmur_hash::hash3_x64_128(legacy.begin(), legacy.size(), 0, hash); - return get_token(hash[0]); -} - -token murmur3_partitioner::get_random_token() { - auto rand = dht::get_random_number(); - return get_token(rand); -} inline int64_t long_token(const token& t) { if (t.is_minimum() || t.is_maximum()) { @@ -125,20 +81,20 @@ inline int64_t long_token(const token& t) { } uint64_t -murmur3_partitioner::unbias(const token& t) const { +partitioner::unbias(const token& t) const { return uint64_t(long_token(t)) + uint64_t(std::numeric_limits::min()); } token -murmur3_partitioner::bias(uint64_t n) const { +partitioner::bias(uint64_t n) const { return get_token(n - uint64_t(std::numeric_limits::min())); } -sstring murmur3_partitioner::to_sstring(const token& t) const { +sstring partitioner::to_sstring(const token& t) const { return seastar::to_sstring(long_token(t)); } -dht::token murmur3_partitioner::from_sstring(const sstring& t) const { +dht::token partitioner::from_sstring(const sstring& t) const { auto lp = boost::lexical_cast(t); if (lp == std::numeric_limits::min()) { return minimum_token(); @@ -147,7 +103,7 @@ dht::token murmur3_partitioner::from_sstring(const sstring& t) const { } } -dht::token murmur3_partitioner::from_bytes(bytes_view bytes) const { +dht::token partitioner::from_bytes(bytes_view bytes) const { if (bytes.size() != sizeof(int64_t)) { throw runtime_exception(sprint("Invalid token. Should have size %ld, has size %ld\n", sizeof(int64_t), bytes.size())); } @@ -162,7 +118,7 @@ dht::token murmur3_partitioner::from_bytes(bytes_view bytes) const { } } -int murmur3_partitioner::tri_compare(const token& t1, const token& t2) const { +int partitioner::tri_compare(const token& t1, const token& t2) const { auto l1 = long_token(t1); auto l2 = long_token(t2); @@ -181,7 +137,7 @@ static std::make_unsigned_t positive_subtract(T x, T y) { return std::make_unsigned_t(x) - std::make_unsigned_t(y); } -token murmur3_partitioner::midpoint(const token& t1, const token& t2) const { +token partitioner::midpoint(const token& t1, const token& t2) const { auto l1 = long_token(t1); auto l2 = long_token(t2); int64_t mid; @@ -211,45 +167,8 @@ static float ratio_helper(int64_t a, int64_t b) { return val/(float)std::numeric_limits::max(); } -std::map -murmur3_partitioner::describe_ownership(const std::vector& sorted_tokens) { - std::map ownerships; - auto i = sorted_tokens.begin(); - - // 0-case - if (i == sorted_tokens.end()) { - throw runtime_exception("No nodes present in the cluster. Has this node finished starting up?"); - } - // 1-case - if (sorted_tokens.size() == 1) { - ownerships[sorted_tokens[0]] = 1.0; - // n-case - } else { - const token& start = sorted_tokens[0]; - - int64_t ti = long_token(start); // The first token and its value - int64_t start_long = ti; - int64_t tim1 = ti; // The last token and its value (after loop) - for (i++; i != sorted_tokens.end(); i++) { - ti = long_token(*i); // The next token and its value - ownerships[*i]= ratio_helper(ti, tim1); // save (T(i) -> %age) - tim1 = ti; - } - - // The start token's range extends backward to the last token, which is why both were saved above. - ownerships[start] = ratio_helper(start_long, ti); - } - - return ownerships; -} - -data_type -murmur3_partitioner::get_token_validator() { - return long_type; -} - unsigned -murmur3_partitioner::shard_of(const token& t) const { +partitioner::shard_of(const token& t) const { switch (t._kind) { case token::kind::before_all_keys: return 0; @@ -263,7 +182,7 @@ murmur3_partitioner::shard_of(const token& t) const { } token -murmur3_partitioner::token_for_next_shard(const token& t, shard_id shard, unsigned spans) const { +partitioner::token_for_next_shard(const token& t, shard_id shard, unsigned spans) const { uint64_t n = 0; switch (t._kind) { case token::kind::before_all_keys: @@ -294,12 +213,4 @@ murmur3_partitioner::token_for_next_shard(const token& t, shard_id shard, unsign } return bias(n); } - - -using registry = class_registrator; -static registry registrator("org.apache.cassandra.dht.Murmur3Partitioner"); -static registry registrator_short_name("Murmur3Partitioner"); - } - - diff --git a/default_partitioner.hh b/default_partitioner.hh new file mode 100644 index 0000000..7886f5b --- /dev/null +++ b/default_partitioner.hh @@ -0,0 +1,39 @@ +#pragma once +#include "bytes.hh" +#include + +namespace redis { + +class partitioner { + unsigned _sharding_ignore_msb_bits; + std::vector _shard_start = init_zero_based_shard_start(_shard_count, _sharding_ignore_msb_bits); +public: + partitioner(unsigned shard_count = smp::count, unsigned sharding_ignore_msb_bits = 0) + : i_partitioner(shard_count) + // if one shard, ignore sharding_ignore_msb_bits as they will just cause needless + // range breaks + , _sharding_ignore_msb_bits(shard_count > 1 ? sharding_ignore_msb_bits : 0) { + } + virtual const sstring name() const { return "default"; } + virtual int tri_compare(const token& t1, const token& t2) const override; + virtual token midpoint(const token& t1, const token& t2) const override; + virtual sstring to_sstring(const dht::token& t) const override; + virtual token from_sstring(const sstring& t) const override; + virtual token from_bytes(bytes_view bytes) const override; + + virtual unsigned shard_of(const token& t) const override; + virtual token token_for_next_shard(const token& t, shard_id shard, unsigned spans) const override; +private: + using uint128_t = unsigned __int128; + static int64_t normalize(int64_t in); + token get_token(bytes_view key); + token get_token(uint64_t value) const; + token bias(uint64_t value) const; // translate from a zero-baed range + uint64_t unbias(const token& t) const; // translate to a zero-baed range + static unsigned zero_based_shard_of(uint64_t zero_based_token, unsigned shards, unsigned sharding_ignore_msb_bits); + static std::vector init_zero_based_shard_start(unsigned shards, unsigned sharding_ignore_msb_bits); +}; + + +} + diff --git a/scylla/gms/application_state.cc b/gms/application_state.cc similarity index 100% rename from scylla/gms/application_state.cc rename to gms/application_state.cc diff --git a/scylla/gms/application_state.hh b/gms/application_state.hh similarity index 100% rename from scylla/gms/application_state.hh rename to gms/application_state.hh diff --git a/scylla/gms/endpoint_state.cc b/gms/endpoint_state.cc similarity index 100% rename from scylla/gms/endpoint_state.cc rename to gms/endpoint_state.cc diff --git a/scylla/gms/endpoint_state.hh b/gms/endpoint_state.hh similarity index 100% rename from scylla/gms/endpoint_state.hh rename to gms/endpoint_state.hh diff --git a/scylla/gms/failure_detector.cc b/gms/failure_detector.cc similarity index 100% rename from scylla/gms/failure_detector.cc rename to gms/failure_detector.cc diff --git a/scylla/gms/failure_detector.hh b/gms/failure_detector.hh similarity index 100% rename from scylla/gms/failure_detector.hh rename to gms/failure_detector.hh diff --git a/scylla/gms/feature.hh b/gms/feature.hh similarity index 100% rename from scylla/gms/feature.hh rename to gms/feature.hh diff --git a/scylla/gms/gossip_digest.hh b/gms/gossip_digest.hh similarity index 100% rename from scylla/gms/gossip_digest.hh rename to gms/gossip_digest.hh diff --git a/scylla/gms/gossip_digest_ack.cc b/gms/gossip_digest_ack.cc similarity index 100% rename from scylla/gms/gossip_digest_ack.cc rename to gms/gossip_digest_ack.cc diff --git a/scylla/gms/gossip_digest_ack.hh b/gms/gossip_digest_ack.hh similarity index 100% rename from scylla/gms/gossip_digest_ack.hh rename to gms/gossip_digest_ack.hh diff --git a/scylla/gms/gossip_digest_ack2.cc b/gms/gossip_digest_ack2.cc similarity index 100% rename from scylla/gms/gossip_digest_ack2.cc rename to gms/gossip_digest_ack2.cc diff --git a/scylla/gms/gossip_digest_ack2.hh b/gms/gossip_digest_ack2.hh similarity index 100% rename from scylla/gms/gossip_digest_ack2.hh rename to gms/gossip_digest_ack2.hh diff --git a/scylla/gms/gossip_digest_syn.cc b/gms/gossip_digest_syn.cc similarity index 100% rename from scylla/gms/gossip_digest_syn.cc rename to gms/gossip_digest_syn.cc diff --git a/scylla/gms/gossip_digest_syn.hh b/gms/gossip_digest_syn.hh similarity index 100% rename from scylla/gms/gossip_digest_syn.hh rename to gms/gossip_digest_syn.hh diff --git a/scylla/gms/gossiper.cc b/gms/gossiper.cc similarity index 99% rename from scylla/gms/gossiper.cc rename to gms/gossiper.cc index 3586dcb..717117b 100644 --- a/scylla/gms/gossiper.cc +++ b/gms/gossiper.cc @@ -195,11 +195,6 @@ future<> gossiper::handle_syn_msg(msg_addr from, gossip_digest_syn syn_msg) { return make_ready_future<>(); } - if (syn_msg.partioner() != "" && syn_msg.partioner() != get_partitioner_name()) { - logger.warn("Partitioner mismatch from {} {}!={}", from.addr, syn_msg.partioner(), get_partitioner_name()); - return make_ready_future<>(); - } - auto g_digest_list = syn_msg.get_gossip_digests(); do_sort(g_digest_list); std::vector delta_gossip_digest_list; diff --git a/scylla/gms/gossiper.hh b/gms/gossiper.hh similarity index 100% rename from scylla/gms/gossiper.hh rename to gms/gossiper.hh diff --git a/scylla/gms/heart_beat_state.hh b/gms/heart_beat_state.hh similarity index 100% rename from scylla/gms/heart_beat_state.hh rename to gms/heart_beat_state.hh diff --git a/scylla/gms/i_endpoint_state_change_subscriber.hh b/gms/i_endpoint_state_change_subscriber.hh similarity index 100% rename from scylla/gms/i_endpoint_state_change_subscriber.hh rename to gms/i_endpoint_state_change_subscriber.hh diff --git a/scylla/gms/i_failure_detection_event_listener.hh b/gms/i_failure_detection_event_listener.hh similarity index 100% rename from scylla/gms/i_failure_detection_event_listener.hh rename to gms/i_failure_detection_event_listener.hh diff --git a/scylla/gms/i_failure_detector.hh b/gms/i_failure_detector.hh similarity index 100% rename from scylla/gms/i_failure_detector.hh rename to gms/i_failure_detector.hh diff --git a/scylla/gms/inet_address.cc b/gms/inet_address.cc similarity index 100% rename from scylla/gms/inet_address.cc rename to gms/inet_address.cc diff --git a/scylla/gms/inet_address.hh b/gms/inet_address.hh similarity index 100% rename from scylla/gms/inet_address.hh rename to gms/inet_address.hh diff --git a/scylla/gms/version_generator.cc b/gms/version_generator.cc similarity index 100% rename from scylla/gms/version_generator.cc rename to gms/version_generator.cc diff --git a/scylla/gms/version_generator.hh b/gms/version_generator.hh similarity index 100% rename from scylla/gms/version_generator.hh rename to gms/version_generator.hh diff --git a/scylla/gms/versioned_value.cc b/gms/versioned_value.cc similarity index 100% rename from scylla/gms/versioned_value.cc rename to gms/versioned_value.cc diff --git a/scylla/gms/versioned_value.hh b/gms/versioned_value.hh similarity index 100% rename from scylla/gms/versioned_value.hh rename to gms/versioned_value.hh diff --git a/scylla/idl/gossip_digest.idl.hh b/idl/gossip_digest.idl.hh similarity index 100% rename from scylla/idl/gossip_digest.idl.hh rename to idl/gossip_digest.idl.hh diff --git a/scylla/idl/idl_test.idl.hh b/idl/idl_test.idl.hh similarity index 100% rename from scylla/idl/idl_test.idl.hh rename to idl/idl_test.idl.hh diff --git a/scylla/idl/keys.idl.hh b/idl/keys.idl.hh similarity index 100% rename from scylla/idl/keys.idl.hh rename to idl/keys.idl.hh diff --git a/scylla/idl/result.idl.hh b/idl/result.idl.hh similarity index 100% rename from scylla/idl/result.idl.hh rename to idl/result.idl.hh diff --git a/scylla/idl/streaming.idl.hh b/idl/streaming.idl.hh similarity index 100% rename from scylla/idl/streaming.idl.hh rename to idl/streaming.idl.hh diff --git a/scylla/idl/token.idl.hh b/idl/token.idl.hh similarity index 100% rename from scylla/idl/token.idl.hh rename to idl/token.idl.hh diff --git a/scylla/idl/uuid.idl.hh b/idl/uuid.idl.hh similarity index 100% rename from scylla/idl/uuid.idl.hh rename to idl/uuid.idl.hh diff --git a/scylla/io/i_serializer.hh b/io/i_serializer.hh similarity index 100% rename from scylla/io/i_serializer.hh rename to io/i_serializer.hh diff --git a/scylla/io/i_versioned_serializer.hh b/io/i_versioned_serializer.hh similarity index 100% rename from scylla/io/i_versioned_serializer.hh rename to io/i_versioned_serializer.hh diff --git a/scylla/io/io.cc b/io/io.cc similarity index 100% rename from scylla/io/io.cc rename to io/io.cc diff --git a/main.cc b/main.cc index 0e28cc6..baa0444 100644 --- a/main.cc +++ b/main.cc @@ -1,27 +1,3 @@ -/* - * This work is open source software, licensed under the terms of the - * AGPL license as described in the LICENSE.AGPL file in the top-level directory. - * - * Modified by Peng Jian, pstack at 163.com - */ - -/* - * This file is part of Pedis. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - #include "supervisor.hh" #include "database.hh" #include "core/app-template.hh" @@ -292,26 +268,18 @@ int main(int ac, char** av) { bool help_loggers = false; bool help_version = false; cfg->add_options(opt_add) - // TODO : default, always read? ("options-file", bpo::value(), "configuration file (i.e. /conf/redis.yaml)") ("help-loggers", bpo::bool_switch(&help_loggers), "print a list of logger names and exit") ("version", bpo::bool_switch(&help_version), "print version number and exit") ; - distributed db; - seastar::sharded cf_cache_hitrate_calculator; - debug::db = &db; - auto& qp = cql3::get_query_processor(); - auto& proxy = service::get_storage_proxy(); - auto& mm = service::get_migration_manager(); - api::http_context ctx(db, proxy); httpd::http_server_control prometheus_server; prometheus::config pctx; directories dirs; return app.run_deprecated(ac, av, [&] { if (help_version) { - print("%s\n", scylla_version()); + print("%s\n", pedis_version()); engine().exit(0); return make_ready_future<>(); } @@ -320,12 +288,12 @@ int main(int ac, char** av) { engine().exit(1); return make_ready_future<>(); } - print("Redis version %s starting ...\n", scylla_version()); + print("Redis version %s starting ...\n", pedis_version()); auto&& opts = app.configuration(); namespace sm = seastar::metrics; app_metrics.add_group("redis", { - sm::make_gauge("current_version", sm::description("Current Redis version."), { sm::label_instance("version", scylla_version()), sm::shard_label("") }, [] { return 0; }) + sm::make_gauge("current_version", sm::description("Redis version."), { sm::label_instance("version", scylla_version()), sm::shard_label("") }, [] { return 0; }) }); // Do this first once set log applied from command line so for example config @@ -341,19 +309,15 @@ int main(int ac, char** av) { } tcp_syncookies_sanity(); + auto& proxy = redis::get_proxy(); - return seastar::async([cfg, &db, &qp, &proxy, &mm, &ctx, &opts, &dirs, &pctx, &prometheus_server, &return_value, &cf_cache_hitrate_calculator] { + return seastar::async([cfg, &proxy, &ctx, &opts, &dirs, &pctx, &prometheus_server, &return_value] { read_config(opts, *cfg).get(); apply_logger_settings(cfg->default_log_level(), cfg->logger_log_level(), cfg->log_to_stdout(), cfg->log_to_syslog()); verify_rlimit(cfg->developer_mode()); verify_adequate_memory_per_shard(cfg->developer_mode()); - if (cfg->partitioner() != "org.apache.cassandra.dht.Murmur3Partitioner") { - startlog.warn("The partitioner {} is deprecated and will be removed in a future version." - " Contact scylladb-users@googlegroups.com if you are using it in production", cfg->partitioner()); - } - dht::set_global_partitioner(cfg->partitioner(), cfg->murmur3_partitioner_ignore_msb_bits()); - auto start_thrift = cfg->start_rpc(); + uint16_t api_port = cfg->api_port(); ctx.api_dir = cfg->api_ui_dir(); ctx.api_doc = cfg->api_doc_dir(); @@ -430,41 +394,18 @@ int main(int ac, char** av) { if (opts.count("developer-mode")) { smp::invoke_on_all([] { engine().set_strict_dma(false); }).get(); } - supervisor::notify("creating tracing"); - tracing::tracing::create_tracing("trace_keyspace_helper").get(); - supervisor::notify("creating snitch"); - i_endpoint_snitch::create_snitch(cfg->endpoint_snitch()).get(); - // #293 - do not stop anything - // engine().at_exit([] { return i_endpoint_snitch::stop_snitch(); }); supervisor::notify("determining DNS name"); auto e = seastar::net::dns::get_host_by_name(api_address).get0(); supervisor::notify("starting API server"); auto ip = e.addr_list.front(); - ctx.http_server.start("API").get(); - api::set_server_init(ctx).get(); - ctx.http_server.listen(ipv4_addr{ip, api_port}).get(); - startlog.info("Redis API server listening on {}:{} ...", api_address, api_port); + supervisor::notify("initializing storage service"); - init_storage_service(db); - supervisor::notify("starting per-shard database core"); - // Note: changed from using a move here, because we want the config object intact. - db.start(std::ref(*cfg)).get(); - engine().at_exit([&db, &return_value] { - // A shared sstable must be compacted by all shards before it can be deleted. - // Since we're stoping, that's not going to happen. Cancel those pending - // deletions to let anyone waiting on them to continue. - sstables::cancel_atomic_deletions(); - // #293 - do not stop anything - not even db (for real) - //return db.stop(); - // call stop on each db instance, but leave the shareded pointers alive. - return db.invoke_on_all([](auto& db) { - return db.stop(); - }).then([] { - return sstables::await_background_jobs_on_all_shards(); - }).then([&return_value] { - ::_exit(return_value); - }); - }); + redis::get_storage_service().start().get(); + auto ss = redis::get_local_storage_service(); + ss.initialize().get(); + + /* initializing redis service */ + //init_storage_service(db); verify_seastar_io_scheduler(opts.count("max-io-requests"), db.local().get_config().developer_mode()).get(); supervisor::notify("creating data directories"); dirs.touch_and_lock(db.local().get_config().data_file_directories()).get(); @@ -479,14 +420,7 @@ int main(int ac, char** av) { return disk_sanity(pathname, db.local().get_config().developer_mode()); }).get(); - // Initialization of a keyspace is done by shard 0 only. For system - // keyspace, the procedure will go through the hardcoded column - // families, and in each of them, it will load the sstables for all - // shards using distributed database object. - // Iteration through column family directory for sstable loading is - // done only by shard 0, so we'll no longer face race conditions as - // described here: https://github.com/scylladb/scylla/issues/1014 - distributed_loader::init_system_keyspace(db).get(); + //distributed_loader::init_system_keyspace(db).get(); supervisor::notify("starting gossip"); // Moved local parameters here, esp since with the @@ -505,47 +439,15 @@ int main(int ac, char** av) { auto key = get_or_default(ssl_opts, "keyfile", relative_conf_dir("redis.key").string()); auto prio = get_or_default(ssl_opts, "priority_string", sstring()); auto clauth = is_true(get_or_default(ssl_opts, "require_client_auth", "false")); - init_ms_fd_gossiper(listen_address - , storage_port - , ssl_storage_port - , tcp_nodelay_inter_dc - , encrypt_what - , trust_store - , cert - , key - , prio - , clauth - , cfg->internode_compression() - , seed_provider - , cluster_name - , phi - , cfg->listen_on_broadcast_address()); - supervisor::notify("starting messaging service"); - supervisor::notify("starting storage proxy"); + + /* initializing message system */ + /* initializing gossiper system */ + /* initializing storage proxy */ proxy.start(std::ref(db)).get(); - // #293 - do not stop anything - // engine().at_exit([&proxy] { return proxy.stop(); }); - supervisor::notify("starting migration manager"); - mm.start().get(); - // #293 - do not stop anything - // engine().at_exit([&mm] { return mm.stop(); }); - supervisor::notify("starting query processor"); - qp.start(std::ref(proxy), std::ref(db)).get(); - // #293 - do not stop anything + /* initializing migration system */ // engine().at_exit([&qp] { return qp.stop(); }); - supervisor::notify("initializing batchlog manager"); - db::get_batchlog_manager().start(std::ref(qp)).get(); - // #293 - do not stop anything - // engine().at_exit([] { return db::get_batchlog_manager().stop(); }); - sstables::init_metrics().get(); - - db::system_keyspace::minimal_setup(db, qp); - - // schema migration, if needed, is also done on shard 0 - db::legacy_schema_migrator::migrate(proxy, qp.local()).get(); supervisor::notify("loading sstables"); - distributed_loader::ensure_system_table_directories(db).get(); supervisor::notify("loading sstables"); @@ -554,79 +456,17 @@ int main(int ac, char** av) { db.invoke_on_all([] (database& db) { db.register_connection_drop_notifier(netw::get_local_messaging_service()); }).get(); - supervisor::notify("setting up system keyspace"); - db::system_keyspace::setup(db, qp).get(); - supervisor::notify("starting commit log"); - auto cl = db.local().commitlog(); - if (cl != nullptr) { - auto paths = cl->get_segments_to_replay(); - if (!paths.empty()) { - supervisor::notify("replaying commit log"); - auto rp = db::commitlog_replayer::create_replayer(qp).get0(); - rp.recover(paths).get(); - supervisor::notify("replaying commit log - flushing memtables"); - db.invoke_on_all([] (database& db) { - return db.flush_all_memtables(); - }).get(); - supervisor::notify("replaying commit log - removing old commitlog segments"); - for (auto& path : paths) { - ::unlink(path.c_str()); - } - } - } - // If the same sstable is shared by several shards, it cannot be - // deleted until all shards decide to compact it. So we want to - // start thse compactions now. Note we start compacting only after - // all sstables in this CF were loaded on all shards - otherwise - // we will have races between the compaction and loading processes - // We also want to trigger regular compaction on boot. - - for (auto& x : db.local().get_column_families()) { - column_family& cf = *(x.second); - distributed_loader::reshard(db, cf.schema()->ks_name(), cf.schema()->cf_name()); - } - db.invoke_on_all([&proxy] (database& db) { - for (auto& x : db.get_column_families()) { - column_family& cf = *(x.second); - cf.trigger_compaction(); - } - }).get(); api::set_server_storage_service(ctx).get(); api::set_server_gossip(ctx).get(); - api::set_server_snitch(ctx).get(); api::set_server_storage_proxy(ctx).get(); - api::set_server_load_sstable(ctx).get(); - supervisor::notify("initializing migration manager RPC verbs"); - service::get_migration_manager().invoke_on_all([] (auto& mm) { - mm.init_messaging_service(); - }).get(); - supervisor::notify("initializing storage proxy RPC verbs"); proxy.invoke_on_all([] (service::storage_proxy& p) { p.init_messaging_service(); }).get(); - supervisor::notify("starting streaming service"); - streaming::stream_session::init_streaming_service(db).get(); - api::set_server_stream_manager(ctx).get(); - // Start handling REPAIR_CHECKSUM_RANGE messages - netw::get_messaging_service().invoke_on_all([&db] (auto& ms) { - ms.register_repair_checksum_range([&db] (sstring keyspace, sstring cf, dht::token_range range, rpc::optional hash_version) { - auto hv = hash_version ? *hash_version : repair_checksum::legacy; - return do_with(std::move(keyspace), std::move(cf), std::move(range), - [&db, hv] (auto& keyspace, auto& cf, auto& range) { - return checksum_range(db, keyspace, cf, range, hv); - }); - }); - }).get(); supervisor::notify("starting storage service", true); auto& ss = service::get_local_storage_service(); ss.init_server().get(); api::set_server_messaging_service(ctx).get(); api::set_server_storage_service(ctx).get(); - supervisor::notify("starting batchlog manager"); - db::get_batchlog_manager().invoke_on_all([] (db::batchlog_manager& b) { - return b.start(); - }).get(); - supervisor::notify("starting load broadcaster"); // should be unique_ptr, but then lambda passed to at_exit will be non copieable and // casting to std::function<> will fail to compile auto lb = make_shared(db, gms::get_local_gossiper()); @@ -640,11 +480,7 @@ int main(int ac, char** av) { gms::get_local_gossiper().wait_for_gossip_to_settle().get(); api::set_server_gossip_settle(ctx).get(); supervisor::notify("starting native transport"); - service::get_local_storage_service().start_native_transport().get(); service::get_local_storage_service().start_redis_transport().get(); - if (start_thrift) { - service::get_local_storage_service().start_rpc_server().get(); - } if (cfg->defragment_memory_on_idle()) { smp::invoke_on_all([] () { engine().set_idle_cpu_handler([] (reactor::work_waiting_on_reactor check_for_work) { diff --git a/scylla/message/messaging_service.cc b/message/messaging_service.cc similarity index 68% rename from scylla/message/messaging_service.cc rename to message/messaging_service.cc index bf9424a..ec6cf85 100644 --- a/scylla/message/messaging_service.cc +++ b/message/messaging_service.cc @@ -394,17 +394,8 @@ static unsigned get_rpc_client_idx(messaging_verb verb) { if (verb == messaging_verb::GOSSIP_DIGEST_SYN || verb == messaging_verb::GOSSIP_DIGEST_ACK2 || verb == messaging_verb::GOSSIP_SHUTDOWN || - verb == messaging_verb::GOSSIP_ECHO || - verb == messaging_verb::GET_SCHEMA_VERSION) { + verb == messaging_verb::GOSSIP_ECHO) { idx = 1; - } else if (verb == messaging_verb::PREPARE_MESSAGE || - verb == messaging_verb::PREPARE_DONE_MESSAGE || - verb == messaging_verb::STREAM_MUTATION || - verb == messaging_verb::STREAM_MUTATION_DONE || - verb == messaging_verb::COMPLETE_MESSAGE) { - idx = 2; - } else if (verb == messaging_verb::MUTATION_DONE) { - idx = 3; } return idx; } @@ -714,64 +705,6 @@ static constexpr int streaming_nr_retry = 10; static constexpr std::chrono::seconds streaming_timeout{10*60}; static constexpr std::chrono::seconds streaming_wait_before_retry{30}; -// PREPARE_MESSAGE -void messaging_service::register_prepare_message(std::function (const rpc::client_info& cinfo, - streaming::prepare_message msg, UUID plan_id, sstring description)>&& func) { - register_handler(this, messaging_verb::PREPARE_MESSAGE, std::move(func)); -} -future messaging_service::send_prepare_message(msg_addr id, streaming::prepare_message msg, UUID plan_id, - sstring description) { - return send_message_timeout_and_retry(this, messaging_verb::PREPARE_MESSAGE, id, - streaming_timeout, streaming_nr_retry, streaming_wait_before_retry, - std::move(msg), plan_id, std::move(description)); -} - -// PREPARE_DONE_MESSAGE -void messaging_service::register_prepare_done_message(std::function (const rpc::client_info& cinfo, UUID plan_id, unsigned dst_cpu_id)>&& func) { - register_handler(this, messaging_verb::PREPARE_DONE_MESSAGE, std::move(func)); -} -future<> messaging_service::send_prepare_done_message(msg_addr id, UUID plan_id, unsigned dst_cpu_id) { - return send_message_timeout_and_retry(this, messaging_verb::PREPARE_DONE_MESSAGE, id, - streaming_timeout, streaming_nr_retry, streaming_wait_before_retry, - plan_id, dst_cpu_id); -} - -// STREAM_MUTATION -void messaging_service::register_stream_mutation(std::function (const rpc::client_info& cinfo, UUID plan_id, frozen_mutation fm, unsigned dst_cpu_id, rpc::optional fragmented)>&& func) { - register_handler(this, messaging_verb::STREAM_MUTATION, std::move(func)); -} -future<> messaging_service::send_stream_mutation(msg_addr id, UUID plan_id, frozen_mutation fm, unsigned dst_cpu_id, bool fragmented) { - return send_message_timeout_and_retry(this, messaging_verb::STREAM_MUTATION, id, - streaming_timeout, streaming_nr_retry, streaming_wait_before_retry, - plan_id, std::move(fm), dst_cpu_id, fragmented); -} - -// STREAM_MUTATION_DONE -void messaging_service::register_stream_mutation_done(std::function (const rpc::client_info& cinfo, - UUID plan_id, dht::token_range_vector ranges, UUID cf_id, unsigned dst_cpu_id)>&& func) { - register_handler(this, messaging_verb::STREAM_MUTATION_DONE, - [func = std::move(func)] (const rpc::client_info& cinfo, - UUID plan_id, std::vector> ranges, - UUID cf_id, unsigned dst_cpu_id) mutable { - return func(cinfo, plan_id, compat::unwrap(std::move(ranges)), cf_id, dst_cpu_id); - }); -} -future<> messaging_service::send_stream_mutation_done(msg_addr id, UUID plan_id, dht::token_range_vector ranges, UUID cf_id, unsigned dst_cpu_id) { - return send_message_timeout_and_retry(this, messaging_verb::STREAM_MUTATION_DONE, id, - streaming_timeout, streaming_nr_retry, streaming_wait_before_retry, - plan_id, std::move(ranges), cf_id, dst_cpu_id); -} - -// COMPLETE_MESSAGE -void messaging_service::register_complete_message(std::function (const rpc::client_info& cinfo, UUID plan_id, unsigned dst_cpu_id)>&& func) { - register_handler(this, messaging_verb::COMPLETE_MESSAGE, std::move(func)); -} -future<> messaging_service::send_complete_message(msg_addr id, UUID plan_id, unsigned dst_cpu_id) { - return send_message_timeout_and_retry(this, messaging_verb::COMPLETE_MESSAGE, id, - streaming_timeout, streaming_nr_retry, streaming_wait_before_retry, - plan_id, dst_cpu_id); -} - void messaging_service::register_gossip_echo(std::function ()>&& func) { register_handler(this, messaging_verb::GOSSIP_ECHO, std::move(func)); } @@ -824,162 +757,4 @@ void messaging_service::unregister_gossip_digest_ack2() { future<> messaging_service::send_gossip_digest_ack2(msg_addr id, gossip_digest_ack2 msg) { return send_message_oneway(this, messaging_verb::GOSSIP_DIGEST_ACK2, std::move(id), std::move(msg)); } - -void messaging_service::register_definitions_update(std::function fm)>&& func) { - register_handler(this, netw::messaging_verb::DEFINITIONS_UPDATE, std::move(func)); -} -void messaging_service::unregister_definitions_update() { - _rpc->unregister_handler(netw::messaging_verb::DEFINITIONS_UPDATE); -} -future<> messaging_service::send_definitions_update(msg_addr id, std::vector fm) { - return send_message_oneway(this, messaging_verb::DEFINITIONS_UPDATE, std::move(id), std::move(fm)); -} - -void messaging_service::register_migration_request(std::function> (const rpc::client_info&)>&& func) { - register_handler(this, netw::messaging_verb::MIGRATION_REQUEST, std::move(func)); -} -void messaging_service::unregister_migration_request() { - _rpc->unregister_handler(netw::messaging_verb::MIGRATION_REQUEST); -} -future> messaging_service::send_migration_request(msg_addr id) { - return send_message>(this, messaging_verb::MIGRATION_REQUEST, std::move(id)); -} - -void messaging_service::register_mutation(std::function (const rpc::client_info&, rpc::opt_time_point, frozen_mutation fm, std::vector forward, - inet_address reply_to, unsigned shard, response_id_type response_id, rpc::optional> trace_info)>&& func) { - register_handler(this, netw::messaging_verb::MUTATION, std::move(func)); -} -void messaging_service::unregister_mutation() { - _rpc->unregister_handler(netw::messaging_verb::MUTATION); -} -future<> messaging_service::send_mutation(msg_addr id, clock_type::time_point timeout, const frozen_mutation& fm, std::vector forward, - inet_address reply_to, unsigned shard, response_id_type response_id, std::experimental::optional trace_info) { - return send_message_oneway_timeout(this, timeout, messaging_verb::MUTATION, std::move(id), fm, std::move(forward), - std::move(reply_to), std::move(shard), std::move(response_id), std::move(trace_info)); -} - -void messaging_service::register_counter_mutation(std::function (const rpc::client_info&, rpc::opt_time_point, std::vector fms, db::consistency_level cl, stdx::optional trace_info)>&& func) { - register_handler(this, netw::messaging_verb::COUNTER_MUTATION, std::move(func)); -} -void messaging_service::unregister_counter_mutation() { - _rpc->unregister_handler(netw::messaging_verb::COUNTER_MUTATION); -} -future<> messaging_service::send_counter_mutation(msg_addr id, clock_type::time_point timeout, std::vector fms, db::consistency_level cl, stdx::optional trace_info) { - return send_message_timeout(this, messaging_verb::COUNTER_MUTATION, std::move(id), timeout, std::move(fms), cl, std::move(trace_info)); -} - -void messaging_service::register_mutation_done(std::function (const rpc::client_info& cinfo, unsigned shard, response_id_type response_id)>&& func) { - register_handler(this, netw::messaging_verb::MUTATION_DONE, std::move(func)); -} -void messaging_service::unregister_mutation_done() { - _rpc->unregister_handler(netw::messaging_verb::MUTATION_DONE); -} -future<> messaging_service::send_mutation_done(msg_addr id, unsigned shard, response_id_type response_id) { - return send_message_oneway(this, messaging_verb::MUTATION_DONE, std::move(id), std::move(shard), std::move(response_id)); -} - -void messaging_service::register_read_data(std::function>, cache_temperature> (const rpc::client_info&, query::read_command cmd, compat::wrapping_partition_range pr, rpc::optional oda)>&& func) { - register_handler(this, netw::messaging_verb::READ_DATA, std::move(func)); -} -void messaging_service::unregister_read_data() { - _rpc->unregister_handler(netw::messaging_verb::READ_DATA); -} -future> messaging_service::send_read_data(msg_addr id, clock_type::time_point timeout, const query::read_command& cmd, const dht::partition_range& pr, query::digest_algorithm da) { - return send_message_timeout>>(this, messaging_verb::READ_DATA, std::move(id), timeout, cmd, pr, da); -} - -void messaging_service::register_get_schema_version(std::function(unsigned, table_schema_version)>&& func) { - register_handler(this, netw::messaging_verb::GET_SCHEMA_VERSION, std::move(func)); -} -void messaging_service::unregister_get_schema_version() { - _rpc->unregister_handler(netw::messaging_verb::GET_SCHEMA_VERSION); -} -future messaging_service::send_get_schema_version(msg_addr dst, table_schema_version v) { - return send_message(this, messaging_verb::GET_SCHEMA_VERSION, dst, static_cast(dst.cpu_id), v); -} - -void messaging_service::register_schema_check(std::function()>&& func) { - register_handler(this, netw::messaging_verb::SCHEMA_CHECK, std::move(func)); -} -void messaging_service::unregister_schema_check() { - _rpc->unregister_handler(netw::messaging_verb::SCHEMA_CHECK); -} -future messaging_service::send_schema_check(msg_addr dst) { - return send_message(this, netw::messaging_verb::SCHEMA_CHECK, dst); -} - -void messaging_service::register_read_mutation_data(std::function>, cache_temperature> (const rpc::client_info&, query::read_command cmd, compat::wrapping_partition_range pr)>&& func) { - register_handler(this, netw::messaging_verb::READ_MUTATION_DATA, std::move(func)); -} -void messaging_service::unregister_read_mutation_data() { - _rpc->unregister_handler(netw::messaging_verb::READ_MUTATION_DATA); -} -future> messaging_service::send_read_mutation_data(msg_addr id, clock_type::time_point timeout, const query::read_command& cmd, const dht::partition_range& pr) { - return send_message_timeout>>(this, messaging_verb::READ_MUTATION_DATA, std::move(id), timeout, cmd, pr); -} - -void messaging_service::register_read_digest(std::function (const rpc::client_info&, query::read_command cmd, compat::wrapping_partition_range pr)>&& func) { - register_handler(this, netw::messaging_verb::READ_DIGEST, std::move(func)); -} -void messaging_service::unregister_read_digest() { - _rpc->unregister_handler(netw::messaging_verb::READ_DIGEST); -} -future, rpc::optional> messaging_service::send_read_digest(msg_addr id, clock_type::time_point timeout, const query::read_command& cmd, const dht::partition_range& pr) { - return send_message_timeout, rpc::optional>>(this, netw::messaging_verb::READ_DIGEST, std::move(id), timeout, cmd, pr); -} - -// Wrapper for TRUNCATE -void messaging_service::register_truncate(std::function (sstring, sstring)>&& func) { - register_handler(this, netw::messaging_verb::TRUNCATE, std::move(func)); -} - -void messaging_service::unregister_truncate() { - _rpc->unregister_handler(netw::messaging_verb::TRUNCATE); -} - -future<> messaging_service::send_truncate(msg_addr id, std::chrono::milliseconds timeout, sstring ks, sstring cf) { - return send_message_timeout(this, netw::messaging_verb::TRUNCATE, std::move(id), std::move(timeout), std::move(ks), std::move(cf)); -} - -// Wrapper for REPLICATION_FINISHED -void messaging_service::register_replication_finished(std::function (inet_address)>&& func) { - register_handler(this, messaging_verb::REPLICATION_FINISHED, std::move(func)); -} -void messaging_service::unregister_replication_finished() { - _rpc->unregister_handler(messaging_verb::REPLICATION_FINISHED); -} -future<> messaging_service::send_replication_finished(msg_addr id, inet_address from) { - // FIXME: getRpcTimeout : conf.request_timeout_in_ms - return send_message_timeout(this, messaging_verb::REPLICATION_FINISHED, std::move(id), 10000ms, std::move(from)); -} - -// Wrapper for REPAIR_CHECKSUM_RANGE -void messaging_service::register_repair_checksum_range( - std::function (sstring keyspace, - sstring cf, dht::token_range range, rpc::optional hash_version)>&& f) { - register_handler(this, messaging_verb::REPAIR_CHECKSUM_RANGE, std::move(f)); -} -void messaging_service::unregister_repair_checksum_range() { - _rpc->unregister_handler(messaging_verb::REPAIR_CHECKSUM_RANGE); -} -future messaging_service::send_repair_checksum_range( - msg_addr id, sstring keyspace, sstring cf, ::dht::token_range range, repair_checksum hash_version) -{ - return send_message(this, - messaging_verb::REPAIR_CHECKSUM_RANGE, std::move(id), - std::move(keyspace), std::move(cf), std::move(range), hash_version); -} -/* -// Wrapper for REDIS_INTERNAL_DATA_BUS -void messaging_service::register_redis_internal_data_bus( - std::function(std::function< future (const rpc::client_info&, redis::request_wrapper&)> && func)>) { - register_handler(this, messaging_verb::REDIS_INTERNAL_DATA_BUS, std::move(func)); -} -void messaging_service::unregister_redis_internal_data_bus() { - _rpc->unregister_handler(messaging_verb::REDIS_INTERNAL_DATA_BUS); -} -future messaging_service::edis_internal_data_bus(msg_addr id, redis::request_wrapper&& req) { - return send_message(this, messaging_verb::REDIS_INTERNAL_DATA_BUS, std::move(id), std::move(req)); -} -*/ } // namespace net diff --git a/scylla/message/messaging_service.hh b/message/messaging_service.hh similarity index 51% rename from scylla/message/messaging_service.hh rename to message/messaging_service.hh index f26cdc9..3a6c2a4 100644 --- a/scylla/message/messaging_service.hh +++ b/message/messaging_service.hh @@ -85,37 +85,16 @@ namespace netw { /* All verb handler identifiers */ enum class messaging_verb : int32_t { CLIENT_ID = 0, - MUTATION = 1, - MUTATION_DONE = 2, - READ_DATA = 3, - READ_MUTATION_DATA = 4, - READ_DIGEST = 5, // Used by gossip - GOSSIP_DIGEST_SYN = 6, - GOSSIP_DIGEST_ACK = 7, - GOSSIP_DIGEST_ACK2 = 8, - GOSSIP_ECHO = 9, - GOSSIP_SHUTDOWN = 10, + GOSSIP_DIGEST_SYN = 1, + GOSSIP_DIGEST_ACK = 2, + GOSSIP_DIGEST_ACK2 = 3, + GOSSIP_ECHO = 4, + GOSSIP_SHUTDOWN = 5, // end of gossip verb - DEFINITIONS_UPDATE = 11, - TRUNCATE = 12, - REPLICATION_FINISHED = 13, - MIGRATION_REQUEST = 14, - // Used by streaming - PREPARE_MESSAGE = 15, - PREPARE_DONE_MESSAGE = 16, - STREAM_MUTATION = 17, - STREAM_MUTATION_DONE = 18, - COMPLETE_MESSAGE = 19, - // end of streaming verbs - REPAIR_CHECKSUM_RANGE = 20, - GET_SCHEMA_VERSION = 21, - SCHEMA_CHECK = 22, - COUNTER_MUTATION = 23, - // USED by REDIS - REDIS_INTERNAL_DATA_BUS = 24, - LAST = 25, + REDIS_INTERNAL_DATA_BUS = 6, + LAST = 7, }; } // namespace netw @@ -239,31 +218,6 @@ public: future<> init_local_preferred_ip_cache(); void cache_preferred_ip(gms::inet_address ep, gms::inet_address ip); - // Wrapper for PREPARE_MESSAGE verb - void register_prepare_message(std::function (const rpc::client_info& cinfo, - streaming::prepare_message msg, UUID plan_id, sstring description)>&& func); - future send_prepare_message(msg_addr id, streaming::prepare_message msg, UUID plan_id, - sstring description); - - // Wrapper for PREPARE_DONE_MESSAGE verb - void register_prepare_done_message(std::function (const rpc::client_info& cinfo, UUID plan_id, unsigned dst_cpu_id)>&& func); - future<> send_prepare_done_message(msg_addr id, UUID plan_id, unsigned dst_cpu_id); - - // Wrapper for STREAM_MUTATION verb - void register_stream_mutation(std::function (const rpc::client_info& cinfo, UUID plan_id, frozen_mutation fm, unsigned dst_cpu_id, rpc::optional)>&& func); - future<> send_stream_mutation(msg_addr id, UUID plan_id, frozen_mutation fm, unsigned dst_cpu_id, bool fragmented); - - void register_stream_mutation_done(std::function (const rpc::client_info& cinfo, UUID plan_id, dht::token_range_vector ranges, UUID cf_id, unsigned dst_cpu_id)>&& func); - future<> send_stream_mutation_done(msg_addr id, UUID plan_id, dht::token_range_vector ranges, UUID cf_id, unsigned dst_cpu_id); - - void register_complete_message(std::function (const rpc::client_info& cinfo, UUID plan_id, unsigned dst_cpu_id)>&& func); - future<> send_complete_message(msg_addr id, UUID plan_id, unsigned dst_cpu_id); - - // Wrapper for REPAIR_CHECKSUM_RANGE verb - void register_repair_checksum_range(std::function (sstring keyspace, sstring cf, dht::token_range range, rpc::optional hash_version)>&& func); - void unregister_repair_checksum_range(); - future send_repair_checksum_range(msg_addr id, sstring keyspace, sstring cf, dht::token_range range, repair_checksum hash_version); - // Wrapper for GOSSIP_ECHO verb void register_gossip_echo(std::function ()>&& func); void unregister_gossip_echo(); @@ -289,76 +243,6 @@ public: void unregister_gossip_digest_ack2(); future<> send_gossip_digest_ack2(msg_addr id, gms::gossip_digest_ack2 msg); - // Wrapper for DEFINITIONS_UPDATE - void register_definitions_update(std::function fm)>&& func); - void unregister_definitions_update(); - future<> send_definitions_update(msg_addr id, std::vector fm); - - // Wrapper for MIGRATION_REQUEST - void register_migration_request(std::function> (const rpc::client_info&)>&& func); - void unregister_migration_request(); - future> send_migration_request(msg_addr id); - - // FIXME: response_id_type is an alias in service::storage_proxy::response_id_type - using response_id_type = uint64_t; - // Wrapper for MUTATION - void register_mutation(std::function (const rpc::client_info&, rpc::opt_time_point, frozen_mutation fm, std::vector forward, - inet_address reply_to, unsigned shard, response_id_type response_id, rpc::optional> trace_info)>&& func); - void unregister_mutation(); - future<> send_mutation(msg_addr id, clock_type::time_point timeout, const frozen_mutation& fm, std::vector forward, - inet_address reply_to, unsigned shard, response_id_type response_id, std::experimental::optional trace_info = std::experimental::nullopt); - - // Wrapper for COUNTER_MUTATION - void register_counter_mutation(std::function (const rpc::client_info&, rpc::opt_time_point, std::vector fms, db::consistency_level cl, stdx::optional trace_info)>&& func); - void unregister_counter_mutation(); - future<> send_counter_mutation(msg_addr id, clock_type::time_point timeout, std::vector fms, db::consistency_level cl, stdx::optional trace_info = std::experimental::nullopt); - - // Wrapper for MUTATION_DONE - void register_mutation_done(std::function (const rpc::client_info& cinfo, unsigned shard, response_id_type response_id)>&& func); - void unregister_mutation_done(); - future<> send_mutation_done(msg_addr id, unsigned shard, response_id_type response_id); - - // Wrapper for READ_DATA - // Note: WTH is future> - void register_read_data(std::function>, cache_temperature> (const rpc::client_info&, query::read_command cmd, compat::wrapping_partition_range pr, rpc::optional digest)>&& func); - void unregister_read_data(); - future> send_read_data(msg_addr id, clock_type::time_point timeout, const query::read_command& cmd, const dht::partition_range& pr, query::digest_algorithm da); - - // Wrapper for GET_SCHEMA_VERSION - void register_get_schema_version(std::function(unsigned, table_schema_version)>&& func); - void unregister_get_schema_version(); - future send_get_schema_version(msg_addr, table_schema_version); - - // Wrapper for SCHEMA_CHECK - void register_schema_check(std::function()>&& func); - void unregister_schema_check(); - future send_schema_check(msg_addr); - - // Wrapper for READ_MUTATION_DATA - void register_read_mutation_data(std::function>, cache_temperature> (const rpc::client_info&, query::read_command cmd, compat::wrapping_partition_range pr)>&& func); - void unregister_read_mutation_data(); - future> send_read_mutation_data(msg_addr id, clock_type::time_point timeout, const query::read_command& cmd, const dht::partition_range& pr); - - // Wrapper for READ_DIGEST - void register_read_digest(std::function (const rpc::client_info&, query::read_command cmd, compat::wrapping_partition_range pr)>&& func); - void unregister_read_digest(); - future, rpc::optional> send_read_digest(msg_addr id, clock_type::time_point timeout, const query::read_command& cmd, const dht::partition_range& pr); - - // Wrapper for TRUNCATE - void register_truncate(std::function(sstring, sstring)>&& func); - void unregister_truncate(); - future<> send_truncate(msg_addr, std::chrono::milliseconds, sstring, sstring); - -/* - // Wrapper for REDIS_INTERNAL_DATA_BUS - void register_redis_internal_data_bus(std::function(std::function< future (const rpc::client_info&, redis::request_wrapper&&)> && func)>); - void unregister_redis_internal_data_bus(); - future redis_internal_data_bus(msg_addr id, redis::request_wrapper&& req); -*/ - // Wrapper for REPLICATION_FINISHED verb - void register_replication_finished(std::function (inet_address from)>&& func); - void unregister_replication_finished(); - future<> send_replication_finished(msg_addr id, inet_address from); void foreach_server_connection_stats(std::function&& f) const; private: bool remove_rpc_client_one(clients_map& clients, msg_addr id, bool dead_only); diff --git a/scylla/message/messaging_service_fwd.hh b/message/messaging_service_fwd.hh similarity index 100% rename from scylla/message/messaging_service_fwd.hh rename to message/messaging_service_fwd.hh diff --git a/redis_storage_proxy.cc b/proxy.cc similarity index 90% rename from redis_storage_proxy.cc rename to proxy.cc index 168baf6..4333fb8 100644 --- a/redis_storage_proxy.cc +++ b/proxy.cc @@ -1,20 +1,10 @@ -//FIXME: Copyright header -#include "partition_range_compat.hh" -#include "db/consistency_level.hh" -#include "db/commitlog/commitlog.hh" -#include "redis_storage_proxy.hh" -#include "unimplemented.hh" -#include "frozen_mutation.hh" -#include "query_result_merger.hh" +#include "proxy.hh" #include "core/do_with.hh" #include "message/messaging_service.hh" #include "gms/failure_detector.hh" #include "gms/gossiper.hh" -#include "service/storage_service.hh" #include "core/future-util.hh" -#include "db/read_repair_decision.hh" #include "db/config.hh" -#include "db/batchlog_manager.hh" #include "exceptions/exceptions.hh" #include #include @@ -32,13 +22,11 @@ #include #include #include "utils/latency.hh" -#include "schema.hh" -#include "schema_registry.hh" #include "utils/joinpoint.hh" #include #include "core/metrics.hh" #include -#include "redis_service.hh" +#include "service.hh" #include "request_wrapper.hh" #include "result.hh" #include "reply_builder.hh" diff --git a/redis_storage_proxy.hh b/proxy.hh similarity index 58% rename from redis_storage_proxy.hh rename to proxy.hh index 8355ccc..646bafd 100644 --- a/redis_storage_proxy.hh +++ b/proxy.hh @@ -1,38 +1,22 @@ //FIXME: Copyright header #pragma once -#include "database.hh" -#include "query-request.hh" -#include "query-result.hh" -#include "query-result-set.hh" #include "core/distributed.hh" #include "db/consistency_level.hh" #include "db/write_type.hh" #include "utils/histogram.hh" #include "utils/estimated_histogram.hh" -#include "tracing/trace_state.hh" #include -namespace compat { - -class one_or_two_partition_ranges; - -} -namespace dht { - class decorated_key; -} namespace redis { - class redis_service; + class service; class request_wrapper; class result; -} - -namespace service { -class redis_storage_proxy : public seastar::async_sharded_service { +class proxy : public seastar::async_sharded_service { public: using clock_type = lowres_clock; private: - distributed _redis; + distributed _redis; seastar::metrics::metric_groups _metrics; private: void uninit_messaging_service() {} @@ -46,8 +30,8 @@ private: future<> execute_command_del(const redis::request_wrapper& req, output_stream& out); public: - redis_storage_proxy(); - ~redis_storage_proxy(); + proxy(); + ~proxy(); future<> execute(const redis::request_wrapper& req, output_stream& out); void init_messaging_service(); @@ -56,18 +40,18 @@ public: future<> stop(); }; -extern distributed _the_redis_storage_proxy; +extern distributed _the_proxy; -inline distributed& get_redis_storage_proxy() { - return _the_redis_storage_proxy; +inline distributed& get_proxy() { + return _the_proxy; } -inline redis_storage_proxy& get_local_redis_storage_proxy() { - return _the_redis_storage_proxy.local(); +inline proxy& get_local_proxy() { + return _the_proxy.local(); } -inline shared_ptr get_local_shared_redis_storage_proxy() { - return _the_redis_storage_proxy.local_shared(); +inline shared_ptr get_local_shared_proxy() { + return _the_proxy.local_shared(); } } diff --git a/redis.cc b/redis.cc index 2d87fcd..d6ab878 100644 --- a/redis.cc +++ b/redis.cc @@ -18,7 +18,7 @@ * Copyright (c) 2016-2026, Peng Jian, pstack@163.com. All rights reserved. * */ -#include "redis.hh" +#include "service.hh" #include #include #include @@ -36,7 +36,7 @@ #include "core/align.hh" #include "net/api.hh" #include "net/packet-data-source.hh" -#include "util/log.hh" +#include "log.hh" #include #include #include "redis_protocol.hh" @@ -51,2037 +51,39 @@ namespace redis { using logger = seastar::logger; static logger redis_log ("redis"); -namespace stdx = std::experimental; +distributed _the_redis_srvice; -distributed _the_redis; - -future<> redis_service::start() +future<> service::set(const dht::decorated_key& dk, const sstring& value, output_stream& out) { return make_ready_future<>(); -} - -future<> redis_service::stop() -{ - return make_ready_future<>(); -} - -future redis_service::echo(args_collection& args) -{ - if (args._command_args_count < 1) { - return make_ready_future(""); - } - sstring& message = args._command_args[0]; - return make_ready_future(std::move(message)); -} - -future redis_service::set_impl(sstring& key, sstring& val, long expir, uint8_t flag) -{ - redis_key rk { std::ref(key) }; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::set_direct, std::move(rk), std::ref(val), expir, flag).then([] (auto&& m) { - return m == REDIS_OK; - }); -} - -future<> redis_service::set(args_collection& args, output_stream& out) -{ - // parse args - if (args._command_args_count < 2) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - sstring& val = args._command_args[1]; - long expir = 0; - uint8_t flag = FLAG_SET_NO; - // [EX seconds] [PS milliseconds] [NX] [XX] - if (args._command_args_count > 2) { - for (unsigned int i = 2; i < args._command_args_count; ++i) { - sstring* v = (i == args._command_args_count - 1) ? nullptr : &(args._command_args[i + 1]); - sstring& o = args._command_args[i]; - if (o.size() != 2) { - return out.write(msg_syntax_err); - } - if ((o[0] == 'e' || o[0] == 'E') && (o[1] == 'x' || o[1] == 'X') && o[2] == '\0') { - flag |= FLAG_SET_EX; - if (v == nullptr) { - return out.write(msg_syntax_err); - } - try { - expir = std::atol(v->c_str()) * 1000; - } catch (const std::invalid_argument&) { - return out.write(msg_syntax_err); - } - i++; - } - if ((o[0] == 'p' || o[0] == 'P') && (o[1] == 'x' || o[1] == 'X') && o[2] == '\0') { - flag |= FLAG_SET_PX; - if (v == nullptr) { - return out.write(msg_syntax_err); - } - try { - expir = std::atol(v->c_str()); - } catch (const std::invalid_argument&) { - return out.write(msg_syntax_err); - } - i++; - } - if ((o[0] == 'n' || o[0] == 'N') && (o[1] == 'x' || o[1] == 'X') && o[2] == '\0') { - flag |= FLAG_SET_NX; - } - if ((o[0] == 'x' || o[0] == 'X') && (o[1] == 'x' || o[1] == 'X') && o[2] == '\0') { - flag |= FLAG_SET_XX; - } - } - } - redis_key rk { std::ref(key) }; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::set, std::move(rk), std::ref(val), expir, flag).then([&out] (auto&& m) { + /* + auto cpu = get_cpu(dk); + return _db.invoke_on(cpu, &database::set, std::ref(dk.key()), std::ref(value), 0, 0).then([&out] (auto&& m) { return out.write(std::move(*m)); });; + */ } -future redis_service::remove_impl(sstring& key) { - redis_key rk { std::ref(key) }; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::del_direct, std::move(rk)); -} - -future<> redis_service::del(args_collection& args, output_stream& out) -{ - if (args._command_args_count <= 0 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - if (args._command_args.size() == 1) { - sstring& key = args._command_args[0]; - return remove_impl(key).then([&out] (auto r) { - return out.write( r ? msg_one : msg_zero); - }); - } - else { - struct mdel_state { - std::vector& keys; - size_t success_count; - }; - for (size_t i = 0; i < args._command_args_count; ++i) { - args._tmp_keys.emplace_back(args._command_args[i]); - } - return do_with(mdel_state{args._tmp_keys, 0}, [this, &out] (auto& state) { - return parallel_for_each(std::begin(state.keys), std::end(state.keys), [this, &state] (auto& key) { - return this->remove_impl(key).then([&state] (auto r) { - if (r) state.success_count++; - }); - }).then([&state, &out] { - return reply_builder::build_local(out, state.success_count); - }); - }); - } -} - -future<> redis_service::mset(args_collection& args, output_stream& out) +future<> service::del(const dht::decorated_key& dk, output_stream& out) { - if (args._command_args_count <= 1 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - if (args._command_args.size() % 2 != 0) { - return out.write(msg_syntax_err); - } - struct mset_state { - std::vector>& key_value_pairs; - size_t success_count; - }; - auto pair_size = args._command_args.size() / 2; - for (size_t i = 0; i < pair_size; ++i) { - args._tmp_key_value_pairs.emplace_back(std::make_pair(std::move(args._command_args[i * 2]), std::move(args._command_args[i * 2 + 1]))); - } - return do_with(mset_state{std::ref(args._tmp_key_value_pairs), 0}, [this, &out] (auto& state) { - return parallel_for_each(std::begin(state.key_value_pairs), std::end(state.key_value_pairs), [this, &state] (auto& entry) { - sstring& key = entry.first; - sstring& value = entry.second; - redis_key rk {std::ref(key)}; - return get_database().invoke_on(this->get_cpu(rk), &database::set_direct, std::move(rk), std::ref(value), 0, FLAG_SET_NO).then([&state] (auto m) { - if (m) state.success_count++ ; - }); - }).then([&state, &out] { - return out.write(state.key_value_pairs.size() == state.success_count ? msg_ok : msg_err); - }); - }); -} - -future<> redis_service::get(args_collection& args, output_stream& out) -{ - if (args._command_args_count < 1) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - redis_key rk { std::ref(key) }; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::get, std::move(rk)).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); -} - -future<> redis_service::mget(args_collection& args, output_stream& out) -{ - if (args._command_args_count < 1) { - return out.write(msg_syntax_err); - } - using return_type = foreign_ptr>; - struct mget_state { - std::vector keys; - std::vector values; - }; - for (size_t i = 0; i < args._command_args_count; ++i) { - args._tmp_keys.emplace_back(args._command_args[i]); - } - return do_with(mget_state{std::move(args._tmp_keys), {}}, [this, &out] (auto& state) { - return parallel_for_each(std::begin(state.keys), std::end(state.keys), [this, &state] (sstring& key) { - redis_key rk { std::ref(key) }; - return get_database().invoke_on(this->get_cpu(rk), &database::get_direct, std::move(rk)).then([&state] (auto&& m) { - if (m) { - state.values.emplace_back(std::move(m)); - } - }); - }).then([&state, &out] { - return reply_builder::build_local(out, state.values); - }); - }); -} - -future<> redis_service::strlen(args_collection& args, output_stream& out) -{ - if (args._command_args_count <= 0 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - redis_key rk { std::ref(key) }; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::strlen, std::ref(rk)).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); -} - -future redis_service::exists_impl(sstring& key) -{ - redis_key rk { std::ref(key) }; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::exists_direct, std::move(rk)); -} - -future<> redis_service::exists(args_collection& args, output_stream& out) -{ - if (args._command_args_count <= 0 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - if (args._command_args_count == 1) { - sstring& key = args._command_args[0]; - return exists_impl(key).then([&out] (auto r) { - return out.write( r ? msg_one : msg_zero); - }); - } - else { - struct mexists_state { - std::vector& keys; - size_t success_count; - }; - for (size_t i = 0; i < args._command_args_count; ++i) { - args._tmp_keys.emplace_back(args._command_args[i]); - } - return do_with(mexists_state{std::ref(args._tmp_keys), 0}, [this, &out] (auto& state) { - return parallel_for_each(std::begin(state.keys), std::end(state.keys), [this, &state] (auto& key) { - return this->exists_impl(key).then([&state] (auto r) { - if (r) state.success_count++; - }); - }).then([&state, &out] { - return reply_builder::build_local(out, state.success_count); - }); - }); - } -} - -future<> redis_service::append(args_collection& args, output_stream& out) -{ - if (args._command_args_count <= 1 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - sstring& val = args._command_args[1]; - redis_key rk { std::ref(key) }; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::append, std::move(rk), std::ref(val)).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); -} - -future<> redis_service::push_impl(sstring& key, sstring& val, bool force, bool left, output_stream& out) -{ - redis_key rk {std::ref(key)}; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::push, std::move(rk), std::ref(val), force, left).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); -} - -future<> redis_service::push_impl(sstring& key, std::vector& vals, bool force, bool left, output_stream& out) -{ - redis_key rk {std::ref(key)}; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::push_multi, std::move(rk), std::ref(vals), force, left).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); -} - -future<> redis_service::push_impl(args_collection& args, bool force, bool left, output_stream& out) -{ - if (args._command_args_count < 2 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - if (args._command_args_count == 2) { - sstring& value = args._command_args[1]; - return push_impl(key, value, force, left, out); - } - else { - for (size_t i = 1; i < args._command_args.size(); ++i) args._tmp_keys.emplace_back(args._command_args[i]); - return push_impl(key, args._tmp_keys, force, left, out); - } -} - -future<> redis_service::lpush(args_collection& args, output_stream& out) -{ - return push_impl(args, true, false, out); -} - -future<> redis_service::lpushx(args_collection& args, output_stream& out) -{ - return push_impl(args, false, false, out); -} - -future<> redis_service::rpush(args_collection& args, output_stream& out) -{ - return push_impl(args, true, true, out); -} - -future<> redis_service::rpushx(args_collection& args, output_stream& out) -{ - return push_impl(args, false, true, out); -} - -future<> redis_service::lpop(args_collection& args, output_stream& out) -{ - return pop_impl(args, false, out); -} - -future<> redis_service::rpop(args_collection& args, output_stream& out) -{ - return pop_impl(args, true, out); -} - -future<> redis_service::pop_impl(args_collection& args, bool left, output_stream& out) -{ - if (args._command_args_count <= 0 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - redis_key rk {std::ref(key)}; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::pop, std::move(rk), left).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); -} - -future<> redis_service::lindex(args_collection& args, output_stream& out) -{ - if (args._command_args_count <= 1 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - int idx = std::atoi(args._command_args[1].c_str()); - redis_key rk {std::ref(key)}; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::lindex, std::move(rk), idx).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); -} - -future<> redis_service::llen(args_collection& args, output_stream& out) -{ - if (args._command_args_count <= 0 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - auto cpu = get_cpu(key); - redis_key rk {std::ref(key)}; - return get_database().invoke_on(cpu, &database::llen, std::move(rk)).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); -} - -future<> redis_service::linsert(args_collection& args, output_stream& out) -{ - if (args._command_args_count <= 3 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - sstring& dir = args._command_args[1]; - sstring& pivot = args._command_args[2]; - sstring& value = args._command_args[3]; - std::transform(dir.begin(), dir.end(), dir.begin(), ::toupper); - bool after = true; - if (dir == "BEFORE") after = false; - redis_key rk {std::ref(key)}; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::linsert, std::move(rk), std::ref(pivot), std::ref(value), after).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); -} - -future<> redis_service::lrange(args_collection& args, output_stream& out) -{ - if (args._command_args_count < 3 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - sstring& s = args._command_args[1]; - sstring& e = args._command_args[2]; - int start = std::atoi(s.c_str()); - int end = std::atoi(e.c_str()); - redis_key rk {std::ref(key)}; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::lrange, std::move(rk), start, end).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); -} - -future<> redis_service::lset(args_collection& args, output_stream& out) -{ - if (args._command_args_count < 3 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - sstring& index = args._command_args[1]; - sstring& value = args._command_args[2]; - int idx = std::atoi(index.c_str()); - redis_key rk {std::ref(key)}; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::lset, std::move(rk), idx, std::ref(value)).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); -} - -future<> redis_service::ltrim(args_collection& args, output_stream& out) -{ - if (args._command_args_count < 3 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - int start = std::atoi(args._command_args[1].c_str()); - int stop = std::atoi(args._command_args[2].c_str()); - redis_key rk {std::ref(key)}; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::ltrim, std::move(rk), start, stop).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); -} - -future<> redis_service::lrem(args_collection& args, output_stream& out) -{ - if (args._command_args_count < 3 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - int count = std::atoi(args._command_args[1].c_str()); - sstring& value = args._command_args[2]; - redis_key rk{std::ref(key)}; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::lrem, std::move(rk), count, std::ref(value)).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); -} - -future<> redis_service::incr(args_collection& args, output_stream& out) -{ - return counter_by(args, true, false, out); -} - -future<> redis_service::incrby(args_collection& args, output_stream& out) -{ - return counter_by(args, true, true, out); -} -future<> redis_service::decr(args_collection& args, output_stream& out) -{ - return counter_by(args, false, false, out); -} -future<> redis_service::decrby(args_collection& args, output_stream& out) -{ - return counter_by(args, false, true, out); -} - -future<> redis_service::counter_by(args_collection& args, bool incr, bool with_step, output_stream& out) -{ - if (args._command_args_count < 1 || args._command_args.empty() || (with_step == true && args._command_args_count <= 1)) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - uint64_t step = 1; - if (with_step) { - sstring& s = args._command_args[1]; - try { - step = std::atol(s.c_str()); - } catch (const std::invalid_argument&) { - return out.write(msg_syntax_err); - } - } - redis_key rk{std::ref(key)}; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::counter_by, std::move(rk), step, incr).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); -} - -future<> redis_service::hdel(args_collection& args, output_stream& out) -{ - if (args._command_args_count < 2 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - sstring& field = args._command_args[1]; - redis_key rk{std::ref(key)}; - auto cpu = get_cpu(rk); - if (args._command_args_count == 2) { - return get_database().invoke_on(cpu, &database::hdel, std::move(rk), std::ref(field)).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); - } - else { - for (size_t i = 1; i < args._command_args.size(); ++i) args._tmp_keys.emplace_back(args._command_args[i]); - auto& keys = args._tmp_keys; - return get_database().invoke_on(cpu, &database::hdel_multi, std::move(rk), std::ref(keys)).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); - } -} - -future<> redis_service::hexists(args_collection& args, output_stream& out) -{ - if (args._command_args_count < 2 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - sstring& field = args._command_args[1]; - redis_key rk{std::ref(key)}; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::hexists, std::move(rk), std::ref(field)).then([&out] (auto&& m) { - out.write(std::move(*m)); - }); -} - -future<> redis_service::hset(args_collection& args, output_stream& out) -{ - if (args._command_args_count < 3 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - sstring& field = args._command_args[1]; - sstring& val = args._command_args[2]; - redis_key rk{std::ref(key)}; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::hset, std::move(rk), std::ref(field), std::ref(val)).then([&out] (auto&& m) { - out.write(std::move(*m)); - }); -} - -future<> redis_service::hmset(args_collection& args, output_stream& out) -{ - if (args._command_args_count < 3 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - unsigned int field_count = (args._command_args_count - 1) / 2; - sstring& key = args._command_args[0]; - for (unsigned int i = 0; i < field_count; ++i) { - args._tmp_key_values.emplace(std::make_pair(args._command_args[i], args._command_args[i + 1])); - } - redis_key rk{std::ref(key)}; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::hmset, std::move(rk), std::ref(args._tmp_key_values)).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); -} - -future<> redis_service::hincrby(args_collection& args, output_stream& out) -{ - if (args._command_args_count < 3 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - sstring& field = args._command_args[1]; - sstring& val = args._command_args[2]; - int delta = std::atoi(val.c_str()); - redis_key rk{std::ref(key)}; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::hincrby, std::move(rk), std::ref(field), delta).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); -} - -future<> redis_service::hincrbyfloat(args_collection& args, output_stream& out) -{ - if (args._command_args_count < 3 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - sstring& field = args._command_args[1]; - sstring& val = args._command_args[2]; - double delta = std::atof(val.c_str()); - redis_key rk{std::ref(key)}; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::hincrbyfloat, std::move(rk), std::ref(field), delta).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); -} - -future<> redis_service::hlen(args_collection& args, output_stream& out) -{ - if (args._command_args_count < 1 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - redis_key rk{std::ref(key)}; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::hlen, std::move(rk)).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); -} - -future<> redis_service::hstrlen(args_collection& args, output_stream& out) -{ - if (args._command_args_count < 2 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - sstring& field = args._command_args[1]; - redis_key rk{std::ref(key)}; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::hstrlen, std::move(rk), std::ref(field)).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); -} - -future<> redis_service::hget(args_collection& args, output_stream& out) -{ - if (args._command_args_count < 2 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - sstring& field = args._command_args[1]; - redis_key rk{std::ref(key)}; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::hget, std::move(rk), std::ref(field)).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); -} - -future<> redis_service::hgetall(args_collection& args, output_stream& out) -{ - if (args._command_args_count < 1 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - redis_key rk{std::ref(key)}; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::hgetall, std::move(rk)).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); -} - -future<> redis_service::hgetall_keys(args_collection& args, output_stream& out) -{ - if (args._command_args_count < 1 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - redis_key rk{std::ref(key)}; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::hgetall_keys, std::move(rk)).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); -} - -future<> redis_service::hgetall_values(args_collection& args, output_stream& out) -{ - if (args._command_args_count < 1 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - redis_key rk{std::ref(key)}; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::hgetall_values, std::move(rk)).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); -} - -future<> redis_service::hmget(args_collection& args, output_stream& out) -{ - if (args._command_args_count < 1 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - for (unsigned int i = 1; i < args._command_args_count; ++i) { - args._tmp_keys.emplace_back(std::move(args._command_args[i])); - } - redis_key rk{std::ref(key)}; - auto cpu = get_cpu(rk); - auto& keys = args._tmp_keys; - return get_database().invoke_on(cpu, &database::hmget, std::move(rk), std::ref(keys)).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); -} - -future<> redis_service::smembers_impl(sstring& key, output_stream& out) -{ - redis_key rk{std::ref(key)}; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::smembers, std::move(rk)).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); -} - -future<> redis_service::smembers(args_collection& args, output_stream& out) -{ - if (args._command_args_count < 1 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - return smembers_impl(key, out); -} - -future<> redis_service::sadds_impl(sstring& key, std::vector& members, output_stream& out) -{ - redis_key rk{std::ref(key)}; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::sadds, std::move(rk), std::ref(members)).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); -} - -future<> redis_service::sadds_impl_return_keys(sstring& key, std::vector& members, output_stream& out) -{ - redis_key rk{std::ref(key)}; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::sadds_direct, std::move(rk), std::ref(members)).then([&out, &members] (auto m) { - if (m) - return reply_builder::build_local(out, members); - return reply_builder::build_local(out, msg_err); - }); -} - -future<> redis_service::sadd(args_collection& args, output_stream& out) -{ - if (args._command_args_count < 2 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - for (uint32_t i = 1; i < args._command_args_count; ++i) args._tmp_keys.emplace_back(std::move(args._command_args[i])); - auto& keys = args._tmp_keys; - return sadds_impl(key, std::ref(keys), out); -} - -future<> redis_service::scard(args_collection& args, output_stream& out) -{ - if (args._command_args_count < 1 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - redis_key rk{std::ref(key)}; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::scard, std::move(rk)).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); -} -future<> redis_service::sismember(args_collection& args, output_stream& out) -{ - if (args._command_args_count < 2 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - sstring& member = args._command_args[1]; - redis_key rk{std::ref(key)}; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::sismember, std::move(rk), std::ref(member)).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); -} - -future<> redis_service::srem(args_collection& args, output_stream& out) -{ - if (args._command_args_count < 2 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - redis_key rk{std::ref(key)}; - auto cpu = get_cpu(rk); - for (uint32_t i = 1; i < args._command_args_count; ++i) args._tmp_keys.emplace_back(std::move(args._command_args[i])); - auto& keys = args._tmp_keys; - return get_database().invoke_on(cpu, &database::srems, std::move(rk), std::ref(keys)).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); -} - -future<> redis_service::sdiff_store(args_collection& args, output_stream& out) -{ - if (args._command_args_count < 2 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& dest = args._command_args[0]; - for (size_t i = 1; i < args._command_args.size(); ++i) { - args._tmp_keys.emplace_back(std::move(args._command_args[i])); - } - return sdiff_impl(args._tmp_keys, &dest, out); -} - -future<> redis_service::sdiff(args_collection& args, output_stream& out) -{ - if (args._command_args_count < 1 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - if (args._command_args_count == 1) { - return smembers(args, out); - } - return sdiff_impl(std::ref(args._command_args), nullptr, out); -} - -future<> redis_service::sdiff_impl(std::vector& keys, sstring* dest, output_stream& out) -{ - using item_unordered_map = std::unordered_map>; - struct sdiff_state { - item_unordered_map items_set; - std::vector& keys; - sstring* dest = nullptr; - std::vector result; - }; - uint32_t count = static_cast(keys.size()); - return do_with(sdiff_state{item_unordered_map{}, std::ref(keys), dest, {}}, [&out, this, count] (auto& state) { - return parallel_for_each(boost::irange(0, count), [this, &state] (unsigned k) { - sstring& key = state.keys[k]; - redis_key rk { std::ref(key) }; - auto cpu = this->get_cpu(rk); - return get_database().invoke_on(cpu, &database::smembers_direct, std::move(rk)).then([&state, index = k] (auto&& members) { - state.items_set[index] = std::move(*members); - }); - }).then([this, &out, &state, count] { - auto& temp = state.items_set[0]; - for (uint32_t i = 1; i < count; ++i) { - auto&& next_items = std::move(state.items_set[i]); - for (auto& item : next_items) { - stdx::erase_if(temp, [&item] (auto& o) { return item == o; }); - } - } - std::vector& result = temp; - if (state.dest) { - return this->sadds_impl_return_keys(*state.dest, result, out); - } - return reply_builder::build_local(out, result); - }); - }); -} - -future<> redis_service::sinter_impl(std::vector& keys, sstring* dest, output_stream& out) -{ - using item_unordered_map = std::unordered_map>; - struct sinter_state { - item_unordered_map items_set; - std::vector& keys; - sstring* dest = nullptr; - }; - uint32_t count = static_cast(keys.size()); - return do_with(sinter_state{item_unordered_map{}, std::ref(keys), dest}, [this, &out, count] (auto& state) { - return parallel_for_each(boost::irange(0, count), [this, &state] (unsigned k) { - sstring& key = state.keys[k]; - redis_key rk { std::ref(key) }; - auto cpu = this->get_cpu(rk); - return get_database().invoke_on(cpu, &database::smembers_direct, std::move(rk)).then([&state, index = k] (auto&& members) { - state.items_set[index] = std::move(*members); - }); - }).then([this, &out, &state, count] { - auto& result = state.items_set[0]; - for (uint32_t i = 1; i < count; ++i) { - auto& next_items = state.items_set[i]; - if (result.empty() || next_items.empty()) { - return reply_builder::build_local(out, result); - } - std::vector temp; - for (auto& item : next_items) { - if (std::find_if(result.begin(), result.end(), [&item] (auto& o) { return item == o; }) != result.end()) { - temp.emplace_back(std::move(item)); - } - } - result = std::move(temp); - } - if (state.dest) { - return this->sadds_impl_return_keys(*state.dest, result, out); - } - return reply_builder::build_local(out, result); - }); - }); -} - -future<> redis_service::sinter(args_collection& args, output_stream& out) -{ - if (args._command_args_count < 1 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - if (args._command_args_count == 1) { - return smembers(args, out); - } - return sinter_impl(args._command_args, nullptr, out); -} - -future<> redis_service::sinter_store(args_collection& args, output_stream& out) -{ - if (args._command_args_count < 2 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& dest = args._command_args[0]; - for (size_t i = 1; i < args._command_args.size(); ++i) { - args._tmp_keys.emplace_back(std::move(args._command_args[i])); - } - return sinter_impl(args._tmp_keys, &dest, out); -} - -future<> redis_service::sunion_impl(std::vector& keys, sstring* dest, output_stream& out) -{ - struct union_state { - std::vector result; - std::vector& keys; - sstring* dest = nullptr; - }; - uint32_t count = static_cast(keys.size()); - return do_with(union_state{{}, std::ref(keys), dest}, [this, &out, count] (auto& state) { - return parallel_for_each(boost::irange(0, count), [this, &out, &state] (unsigned k) { - sstring& key = state.keys[k]; - redis_key rk { std::ref(key) }; - auto cpu = this->get_cpu(rk); - return get_database().invoke_on(cpu, &database::smembers_direct, std::move(rk)).then([&state] (auto&& members) { - auto& result = state.result; - for (auto& item : *members) { - if (std::find_if(result.begin(), result.end(), [&item] (auto& o) { return o == item; }) == result.end()) { - result.emplace_back(std::move(item)); - } - } - }); - }).then([this, &state, &out] { - auto& result = state.result; - if (state.dest) { - return this->sadds_impl_return_keys(*state.dest, result, out); - } - return reply_builder::build_local(out, result); - }); - }); -} - -future<> redis_service::sunion(args_collection& args, output_stream& out) -{ - if (args._command_args_count < 1 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - if (args._command_args_count == 1) { - return smembers(args, out); - } - return sunion_impl(args._command_args, nullptr, out); -} - -future<> redis_service::sunion_store(args_collection& args, output_stream& out) -{ - if (args._command_args_count < 2 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& dest = args._command_args[0]; - for (size_t i = 1; i < args._command_args.size(); ++i) { - args._tmp_keys.emplace_back(std::move(args._command_args[i])); - } - return sunion_impl(args._tmp_keys, &dest, out); -} - -future redis_service::srem_direct(sstring& key, sstring& member) -{ - redis_key rk {std::ref(key) }; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::srem_direct, rk, std::ref(member)); -} - -future redis_service::sadd_direct(sstring& key, sstring& member) -{ - redis_key rk {std::ref(key) }; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::sadd_direct, rk, std::ref(member)); -} - -future<> redis_service::smove(args_collection& args, output_stream& out) -{ - if (args._command_args_count < 3 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - sstring& dest = args._command_args[1]; - sstring& member = args._command_args[2]; - struct smove_state { - sstring& src; - sstring& dst; - sstring& member; - }; - return do_with(smove_state{std::ref(key), std::ref(dest), std::ref(member)}, [this, &out] (auto& state) { - return this->srem_direct(state.src, state.member).then([this, &state, &out] (auto u) { - if (u) { - return this->sadd_direct(state.dst, state.member).then([&out] (auto m) { - return out.write(m ? msg_one : msg_zero); - }); - } - return out.write(msg_zero); - }); - }); -} - -future<> redis_service::srandmember(args_collection& args, output_stream& out) -{ - if (args._command_args_count <= 0 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - size_t count = 1; - if (args._command_args_count > 1) { - try { - count = std::stol(args._command_args[1].c_str()); - } catch (const std::invalid_argument&) { - return out.write(msg_syntax_err); - } - } - redis_key rk{std::ref(key)}; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::srandmember, rk, count).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); -} - -future<> redis_service::spop(args_collection& args, output_stream& out) -{ - if (args._command_args_count <= 0 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - size_t count = 1; - if (args._command_args_count > 1) { - try { - count = std::stol(args._command_args[1].c_str()); - } catch (const std::invalid_argument&) { - return out.write(msg_syntax_err); - } - } - redis_key rk{std::ref(key)}; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::spop, rk, count).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); -} -future<> redis_service::type(args_collection& args, output_stream& out) -{ - if (args._command_args_count <= 0 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - redis_key rk{std::ref(key)}; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::type, std::move(rk)).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); -} - -future<> redis_service::expire(args_collection& args, output_stream& out) -{ - if (args._command_args_count <= 1 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - long expir = 0; - try { - expir = std::atol(args._command_args[1].c_str()); - expir *= 1000; - } catch (const std::invalid_argument&) { - return out.write(msg_syntax_err); - } - redis_key rk{std::ref(key)}; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::expire, std::move(rk), expir).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); -} - -future<> redis_service::pexpire(args_collection& args, output_stream& out) -{ - if (args._command_args_count <= 1 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - long expir = 0; - try { - expir = std::atol(args._command_args[1].c_str()); - } catch (const std::invalid_argument&) { - return out.write(msg_syntax_err); - } - redis_key rk{std::ref(key)}; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::expire, std::move(rk), expir).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); -} - -future<> redis_service::pttl(args_collection& args, output_stream& out) -{ - if (args._command_args_count < 1 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - redis_key rk{std::ref(key)}; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::pttl, std::move(rk)).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); -} - -future<> redis_service::ttl(args_collection& args, output_stream& out) -{ - if (args._command_args_count < 1 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - redis_key rk{std::ref(key)}; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::ttl, std::move(rk)).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); -} - -future<> redis_service::persist(args_collection& args, output_stream& out) -{ - if (args._command_args_count < 1 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - redis_key rk{std::ref(key)}; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::persist, std::move(rk)).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); -} - -future<> redis_service::zadd(args_collection& args, output_stream& out) -{ - if (args._command_args_count < 3 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - std::string un = args._command_args[1]; - std::transform(un.begin(), un.end(), un.begin(), ::tolower); - int zadd_flags = ZADD_CH; - size_t first_score_index = 2; - if (un == "nx") { - zadd_flags |= ZADD_NX; - } - else if (un == "xx") { - zadd_flags |= ZADD_XX; - } - else if (un == "ch") { - zadd_flags |= ZADD_CH; - } - else if (un == "incr") { - zadd_flags |= ZADD_INCR; - } - else { - first_score_index = 1; - } - if (zadd_flags & ZADD_INCR) { - if (args._command_args_count - first_score_index > 2) { - return out.write(msg_syntax_err); - } - redis_key rk{std::ref(key)}; - auto cpu = get_cpu(rk); - sstring& member = args._command_args[first_score_index + 1]; - sstring& delta = args._command_args[first_score_index]; - double score = 0; - try { - score = std::stod(delta.c_str()); - } catch (const std::invalid_argument&) { - return out.write(msg_syntax_err); - } - return get_database().invoke_on(cpu, &database::zincrby, std::move(rk), std::ref(member), score).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); - } - else { - if ((args._command_args_count - first_score_index) % 2 != 0 || ((zadd_flags & ZADD_NX) && (zadd_flags & ZADD_XX))) { - return out.write(msg_syntax_err); - } - } - for (size_t i = first_score_index; i < args._command_args_count; i += 2) { - sstring& score_ = args._command_args[i]; - sstring& member = args._command_args[i + 1]; - double score = 0; - try { - score = std::stod(score_.c_str()); - } catch (const std::invalid_argument&) { - return out.write(msg_syntax_err); - } - args._tmp_key_scores.emplace(std::pair(member, score)); - } - redis_key rk{std::ref(key)}; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::zadds, std::move(rk), std::ref(args._tmp_key_scores), zadd_flags).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); -} - -future<> redis_service::zcard(args_collection& args, output_stream& out) -{ - if (args._command_args_count < 1 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - redis_key rk{std::ref(key)}; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::zcard, std::move(rk)).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); -} - -future<> redis_service::zrange(args_collection& args, bool reverse, output_stream& out) -{ - if (args._command_args_count < 3 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - long begin = 0, end = 0; - try { - begin = std::stoi(args._command_args[1].c_str()); - end = std::stoi(args._command_args[2].c_str()); - } catch (const std::invalid_argument&) { - return out.write(msg_syntax_err); - } - bool with_score = false; - if (args._command_args_count == 4) { - auto ws = args._command_args[3]; - std::transform(ws.begin(), ws.end(), ws.begin(), ::toupper); - if (ws == "WITHSCORES") { - with_score = true; - } - } - redis_key rk{std::ref(key)}; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::zrange, std::move(rk), begin, end, reverse, with_score).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); -} - -future<> redis_service::zrangebyscore(args_collection& args, bool reverse, output_stream& out) -{ - if (args._command_args_count < 3 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - redis_key rk{std::ref(key)}; - auto cpu = get_cpu(rk); - double min = 0, max = 0; - try { - min = std::stod(args._command_args[1].c_str()); - max = std::stod(args._command_args[2].c_str()); - } catch (const std::invalid_argument&) { - return out.write(msg_syntax_err); - } - bool with_score = false; - if (args._command_args_count == 4) { - auto ws = args._command_args[3]; - std::transform(ws.begin(), ws.end(), ws.begin(), ::toupper); - if (ws == "WITHSCORES") { - with_score = true; - } - } - return get_database().invoke_on(cpu, &database::zrangebyscore, std::move(rk), min, max, reverse, with_score).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); -} - -future<> redis_service::zcount(args_collection& args, output_stream& out) -{ - if (args._command_args_count < 1 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - double min = 0, max = 0; - try { - min = std::stod(args._command_args[1].c_str()); - max = std::stod(args._command_args[2].c_str()); - } catch (const std::invalid_argument&) { - return out.write(msg_syntax_err); - } - redis_key rk{std::ref(key)}; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::zcount, std::move(rk), min, max).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); -} - -future<> redis_service::zincrby(args_collection& args, output_stream& out) -{ - if (args._command_args_count < 3 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - sstring& member = args._command_args[2]; - double delta = 0; - try { - delta = std::stod(args._command_args[1].c_str()); - } catch (const std::invalid_argument&) { - return out.write(msg_syntax_err); - } - redis_key rk{std::ref(key)}; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::zincrby, std::move(rk), std::ref(member), delta).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); -} - -future<> redis_service::zrank(args_collection& args, bool reverse, output_stream& out) -{ - if (args._command_args_count < 2 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - sstring& member = args._command_args[1]; - redis_key rk{std::ref(key)}; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::zrank, std::move(rk), std::ref(member), reverse).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); -} - -future<> redis_service::zrem(args_collection& args, output_stream& out) -{ - if (args._command_args_count < 2 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - for (size_t i = 1; i < args._command_args_count; ++i) { - sstring& member = args._command_args[i]; - args._tmp_keys.emplace_back(std::move(member)); - } - redis_key rk{std::ref(key)}; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::zrem, std::move(rk), std::ref(args._tmp_keys)).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); -} - -future<> redis_service::zscore(args_collection& args, output_stream& out) -{ - if (args._command_args_count < 2 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - sstring& member = args._command_args[1]; - redis_key rk{std::ref(key)}; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::zscore, std::move(rk), std::ref(member)).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); -} - -bool redis_service::parse_zset_args(args_collection& args, zset_args& uargs) -{ - if (args._command_args_count < 3 || args._command_args.empty()) { - return false; - } - uargs.dest = std::move(args._command_args[0]); - try { - uargs.numkeys = std::stol(args._command_args[1].c_str()); - } catch(const std::invalid_argument&) { - return false; - } - size_t index = static_cast(uargs.numkeys) + 2; - if (args._command_args_count < index) { - return false; - } - for (size_t i = 2; i < index; ++i) { - uargs.keys.emplace_back(std::move(args._command_args[i])); - } - bool has_weights = false, has_aggregate = false; - if (index < args._command_args_count) { - for (; index < args._command_args_count; ++index) { - sstring& syntax = args._command_args[index]; - if (syntax == "WEIGHTS") { - index ++; - if (index + uargs.numkeys > args._command_args_count) { - return false; - } - has_weights = true; - size_t i = index; - index += uargs.numkeys; - for (; i < index; ++i) { - try { - uargs.weights.push_back(std::stod(args._command_args[i].c_str())); - } catch (const std::invalid_argument&) { - return false; - } - } - } - if (syntax == "AGGREGATE") { - if (index + 1 > args._command_args_count) { - return false; - } - index++; - sstring& aggre = args._command_args[index]; - if (aggre == "SUM") { - uargs.aggregate_flag |= ZAGGREGATE_SUM; - } - else if (aggre == "MIN") { - uargs.aggregate_flag |= ZAGGREGATE_MIN; - } - else if (aggre == "MAX") { - uargs.aggregate_flag |= ZAGGREGATE_MAX; - } - else { - return false; - } - has_aggregate = true; - } - } - } - if (has_weights == false) { - for (size_t i = 0; i < uargs.numkeys; ++i) { - uargs.weights.push_back(1); - } - } - if (has_aggregate == false) { - uargs.aggregate_flag = ZAGGREGATE_SUM; - } - return true; -} - -future<> redis_service::zunionstore(args_collection& args, output_stream& out) -{ - zset_args uargs; - if (parse_zset_args(args, uargs) == false) { - return out.write(msg_syntax_err); - } - struct zunion_store_state { - std::vector> wkeys; - sstring dest; - std::unordered_map result; - int aggregate_flag; - }; - std::vector> wkeys; - for (size_t i = 0; i < uargs.numkeys; ++i) { - wkeys.emplace_back(std::pair(std::move(uargs.keys[i]), uargs.weights[i])); - } - return do_with(zunion_store_state{std::move(wkeys), std::move(uargs.dest), {}, uargs.aggregate_flag}, [this, &out] (auto& state) { - return parallel_for_each(std::begin(state.wkeys), std::end(state.wkeys), [this, &state] (auto& entry) { - redis_key rk{std::ref(entry.first)}; - auto cpu = rk.get_cpu(); - return get_database().invoke_on(cpu, &database::zrange_direct, std::move(rk), 0, -1).then([this, weight = entry.second, &state] (auto&& m) { - auto& range_result = *m; - auto& result = state.result; - for (size_t i = 0; i < range_result.size(); ++i) { - auto& key = range_result[i].first; - auto& score = range_result[i].second; - if (result.find(key) != result.end()) { - result[key] = redis_service::score_aggregation(result[key], score * weight, state.aggregate_flag); - } - else { - result[key] = score; - } - } - }); - }).then([this, &state, &out] () { - redis_key rk{std::ref(state.dest)}; - auto cpu = rk.get_cpu(); - return get_database().invoke_on(cpu, &database::zadds, std::move(rk), std::ref(state.result), ZADD_CH).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); - }); - }); -} - -future<> redis_service::zinterstore(args_collection& args, output_stream& out) -{ - zset_args uargs; - if (parse_zset_args(args, uargs) == false) { - return out.write(msg_syntax_err); - } - struct zinter_store_state { - std::vector> wkeys; - sstring dest; - std::unordered_map result; - int aggregate_flag; - }; - std::vector> wkeys; - for (size_t i = 0; i < uargs.numkeys; ++i) { - wkeys.emplace_back(std::pair(std::move(uargs.keys[i]), uargs.weights[i])); - } - return do_with(zinter_store_state{std::move(wkeys), std::move(uargs.dest), {}, uargs.aggregate_flag}, [this, &out] (auto& state) { - redis_key rk{std::ref(state.wkeys[0].first)}; - return get_database().invoke_on(rk.get_cpu(), &database::zrange_direct, std::move(rk), 0, -1).then([this, &state, weight = state.wkeys[0].second] (auto&& m) { - auto& range_result = *m; - auto& result = state.result; - for (size_t i = 0; i < range_result.size(); ++i) { - result[range_result[i].first] = range_result[i].second * weight; - } - return make_ready_future(!result.empty() && state.wkeys.size() > 1); - }).then([this, &state, &out] (bool continue_next) { - if (!continue_next) { - return make_ready_future<>(); - } - else { - return parallel_for_each(boost::irange(1, state.wkeys.size()), [this, &state, &out] (size_t k) { - auto& entry = state.wkeys[k]; - redis_key rk{std::ref(entry.first)}; - auto cpu = rk.get_cpu(); - return get_database().invoke_on(cpu, &database::zrange_direct, std::move(rk), 0, -1).then([this, &state, weight = entry.second] (auto&& m) { - auto& range_result = *m; - auto& result = state.result; - std::unordered_map new_result; - for (size_t i = 0; i < range_result.size(); ++i) { - auto& key = range_result[i].first; - auto& score = range_result[i].second; - auto it = result.find(key); - if (it != result.end()) { - auto& old_score = it->second; - new_result[key] = redis_service::score_aggregation(old_score, score * weight, state.aggregate_flag); - } - } - state.result = std::move(new_result); - }); - }).then([this] () { - return make_ready_future<>(); - }); - } - }).then([this, &state, &out] { - redis_key rk{std::ref(state.dest)}; - auto cpu = rk.get_cpu(); - return get_database().invoke_on(cpu, &database::zadds, std::move(rk), std::ref(state.result), ZADD_CH).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); - }); - }); -} - -future<> redis_service::zremrangebyscore(args_collection& args, output_stream& out) -{ - // ZREMRANGEBYSCORE key min max - // Removes all elements in the sorted set stored at key with a score between min and max (inclusive). - // Integer reply: the number of elements removed. - if (args._command_args_count < 3 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - double min = 0, max = 0; - try { - min = std::stod(args._command_args[1].c_str()); - max = std::stod(args._command_args[2].c_str()); - } catch (const std::invalid_argument&) { - return out.write(msg_syntax_err); - } - redis_key rk{std::ref(key)}; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::zremrangebyscore, std::move(rk), min, max).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); -} - -future<> redis_service::zremrangebyrank(args_collection& args, output_stream& out) -{ - // ZREMRANGEBYRANK key begin end - // Removes all elements in the sorted set stored at key with a rank between start and end (inclusive). - // Integer reply: the number of elements removed. - if (args._command_args_count < 3 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - long begin = 0, end = 0; - try { - begin = std::stol(args._command_args[1].c_str()); - end = std::stol(args._command_args[2].c_str()); - } catch(const std::invalid_argument& e) { - return out.write(msg_syntax_err); - } - redis_key rk{std::ref(key)}; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::zremrangebyrank, std::move(rk), begin, end).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); -} - -future<> redis_service::zdiffstore(args_collection&, output_stream& out) -{ - return out.write(msg_syntax_err); -} - -future<> redis_service::zunion(args_collection& args, output_stream& out) -{ - return out.write(msg_syntax_err); -} - -future<> redis_service::zinter(args_collection& args, output_stream& out) -{ - return out.write(msg_syntax_err); -} - -future<> redis_service::zdiff(args_collection& args, output_stream& out) -{ - return out.write(msg_syntax_err); -} - -future<> redis_service::zrangebylex(args_collection& args, output_stream& out) -{ - return out.write(msg_syntax_err); -} - -future<> redis_service::zlexcount(args_collection& args, output_stream& out) -{ - return out.write(msg_syntax_err); -} - -future<> redis_service::select(args_collection& args, output_stream& out) -{ - if (args._command_args_count < 1 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - size_t index = 0; - try { - index = std::stol(args._command_args[0].c_str()); - } catch (const std::invalid_argument&) { - return out.write(msg_err); - } - if (index >= smp::count) { - return out.write(msg_err); - } - return do_with(size_t {0}, [this, index, &out] (auto& count) { - return parallel_for_each(boost::irange(0, smp::count), [this, index, &count] (unsigned cpu) { - return get_database().invoke_on(cpu, &database::select, index).then([&count] (auto&& u) { - if (u) { - count++; - } - }); - }).then([&count, &out] () { - return out.write((count == smp::count) ? msg_ok : msg_err); - }); - }); -} - - -future<> redis_service::geoadd(args_collection& args, output_stream& out) -{ - if (args._command_args_count < 4 || (args._command_args_count - 1) % 3 != 0 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - for (size_t i = 1; i < args._command_args_count; i += 3) { - sstring& longitude = args._command_args[i]; - sstring& latitude = args._command_args[i + 1]; - sstring& member = args._command_args[i + 2]; - double longitude_ = 0, latitude_ = 0, score = 0; - try { - longitude_ = std::stod(longitude.c_str()); - latitude_ = std::stod(latitude.c_str()); - } catch (const std::invalid_argument&) { - return out.write(msg_syntax_err); - } - if (geo::encode_to_geohash(longitude_, latitude_, score) == false) { - return out.write(msg_err); - } - args._tmp_key_scores.emplace(std::pair(member, score)); - } - redis_key rk{std::ref(key)}; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::zadds, std::move(rk), std::ref(args._tmp_key_scores), ZADD_CH).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); -} - -future<> redis_service::geodist(args_collection& args, output_stream& out) -{ - if (args._command_args_count < 3 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - sstring& lpos = args._command_args[1]; - sstring& rpos = args._command_args[2]; - int geodist_flag = GEODIST_UNIT_M; - if (args._command_args_count == 4) { - sstring& unit = args._command_args[3]; - if (unit == "km") { - geodist_flag = GEODIST_UNIT_KM; - } - else if (unit == "mi") { - geodist_flag = GEODIST_UNIT_MI; - } - else if (unit == "ft") { - geodist_flag = GEODIST_UNIT_FT; - } - else { - return out.write(msg_syntax_err); - } - } - redis_key rk {std::ref(key)}; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::geodist, std::move(rk), std::ref(lpos), std::ref(rpos), geodist_flag).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); -} - -future<> redis_service::geohash(args_collection& args, output_stream& out) -{ - if (args._command_args_count < 2 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - for (size_t i = 1; i < args._command_args_count; ++i) { - args._tmp_keys.emplace_back(std::move(args._command_args[i])); - } - redis_key rk {std::ref(key)}; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::geohash, std::move(rk), std::ref(args._tmp_keys)).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); -} - -future<> redis_service::geopos(args_collection& args, output_stream& out) -{ - if (args._command_args_count < 2 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - std::vector members; - for (size_t i = 1; i < args._command_args_count; ++i) { - args._tmp_keys.emplace_back(std::move(args._command_args[i])); - } - redis_key rk {std::ref(key)}; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::geopos, std::move(rk), std::ref(members)).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); -} - -future<> redis_service::georadius(args_collection& args, bool member, output_stream& out) -{ - size_t option_index = member ? 4 : 5; - if (args._command_args_count < option_index || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - sstring unit{}, member_key{}; - double log = 0, lat = 0, radius = 0; - if (!member) { - sstring& longitude = args._command_args[1]; - sstring& latitude = args._command_args[2]; - sstring& rad = args._command_args[3]; - unit = std::move(args._command_args[4]); - try { - log = std::stod(longitude.c_str()); - lat = std::stod(latitude.c_str()); - radius = std::stod(rad.c_str()); - } catch (const std::invalid_argument&) { - return out.write(msg_syntax_err); - } - } - else { - member_key = std::move(args._command_args[1]); - sstring& rad = args._command_args[2]; - unit = std::move(args._command_args[3]); - try { - radius = std::stod(rad.c_str()); - } catch (const std::invalid_argument&) { - return out.write(msg_syntax_err); - } - } - - int flags = 0; - size_t count = 0, stored_key_index = 0; - if (args._command_args_count > option_index) { - for (size_t i = option_index; i < args._command_args_count; ++i) { - sstring& cc = args._command_args[i]; - std::transform(cc.begin(), cc.end(), cc.begin(), ::toupper); - if (cc == "WITHCOORD") { - flags |= GEORADIUS_WITHCOORD; - } - else if (cc == "WITHDIST") { - flags |= GEORADIUS_WITHDIST; - } - else if (cc == "WITHHASH") { - flags |= GEORADIUS_WITHHASH; - } - else if (cc == "COUNT") { - flags |= GEORADIUS_COUNT; - if (i + 1 == args._command_args_count) { - return out.write(msg_syntax_err); - } - sstring& c = args._command_args[++i]; - try { - count = std::stol(c.c_str()); - } catch (const std::invalid_argument&) { - return out.write(msg_syntax_err); - } - } - else if (cc == "ASC") { - flags |= GEORADIUS_ASC; - } - else if (cc == "DESC") { - flags |= GEORADIUS_DESC; - } - else if (cc == "STORE") { - flags |= GEORADIUS_STORE_SCORE; - if (i + 1 == args._command_args_count) { - return out.write(msg_syntax_err); - } - ++i; - stored_key_index = i; - } - else if (cc == "STOREDIST") { - flags |= GEORADIUS_STORE_DIST; - if (i + 1 == args._command_args_count) { - return out.write(msg_syntax_err); - } - ++i; - stored_key_index = i; - } - else { - return out.write(msg_syntax_err); - } - } - } - if (((flags & GEORADIUS_STORE_SCORE) || (flags & GEORADIUS_STORE_DIST)) && (stored_key_index == 0 || stored_key_index >= args._command_args_count)) { - return out.write(msg_syntax_err); - } - std::transform(unit.begin(), unit.end(), unit.begin(), ::tolower); - if (unit == "m") { - flags |= GEO_UNIT_M; - } - else if (unit == "km") { - flags |= GEO_UNIT_KM; - } - else if (unit == "mi") { - flags |= GEO_UNIT_MI; - } - else if (unit == "ft") { - flags |= GEO_UNIT_FT; - } - else { - return out.write(msg_syntax_err); - } - geo::to_meters(radius, flags); - - redis_key rk {std::ref(key)}; - auto cpu = get_cpu(rk); - auto points_ready = !member ? get_database().invoke_on(cpu, &database::georadius_coord_direct, std::move(rk), log, lat, radius, count, flags) - : get_database().invoke_on(cpu, &database::georadius_member_direct, std::move(rk), std::ref(member_key), radius, count, flags); - return points_ready.then([this, flags, &args, stored_key_index, &out] (auto&& data) { - using data_type = std::vector>; - using return_type = std::pair>, int>; - return_type& return_data = *data; - data_type& data_ = return_data.first; - if (return_data.second == REDIS_WRONG_TYPE) { - return out.write(msg_type_err); - } - else if (return_data.second == REDIS_ERR) { - return out.write(msg_nil); - } - bool store_with_score = flags & GEORADIUS_STORE_SCORE, store_with_dist = flags & GEORADIUS_STORE_DIST; - if (store_with_score || store_with_dist) { - std::unordered_map members; - for (size_t i = 0; i < data_.size(); ++i) { - auto& data_tuple = data_[i]; - auto& key = std::get<0>(data_tuple); - auto score = store_with_score ? std::get<1>(data_tuple) : std::get<2>(data_tuple); - members[key] = score; - } - struct store_state - { - std::unordered_map members; - sstring& stored_key; - data_type& data; - }; - sstring& stored_key = args._command_args[stored_key_index]; - return do_with(store_state{std::move(members), std::ref(stored_key), std::ref(data_)}, [this, &out, flags, &data_] (auto& state) { - redis_key rk{std::ref(state.stored_key)}; - auto cpu = rk.get_cpu(); - return get_database().invoke_on(cpu, &database::zadds_direct, std::move(rk), std::ref(state.members), ZADD_CH).then([&out, flags, &data_] (auto&& m) { - if (m) - return reply_builder::build_local(out, data_, flags); - else - return out.write(msg_err); - }); - }); - } - return reply_builder::build_local(out, data_, flags); - }); -} - -future<> redis_service::setbit(args_collection& args, output_stream& out) -{ - if (args._command_args_count < 3 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - size_t offset = 0; - int value = 0; - try { - offset = std::stol(args._command_args[1]); - value = std::stoi(args._command_args[2]); - } catch (const std::invalid_argument&) { - return out.write(msg_syntax_err); - } - redis_key rk {std::ref(key)}; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::setbit, std::move(rk), offset, value == 1).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); -} - -future<> redis_service::getbit(args_collection& args, output_stream& out) -{ - if (args._command_args_count < 2 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - size_t offset = 0; - try { - offset = std::stol(args._command_args[1]); - } catch (const std::invalid_argument&) { - return out.write(msg_syntax_err); - } - redis_key rk {std::ref(key)}; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::getbit, std::move(rk), offset).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); -} - -future<> redis_service::bitcount(args_collection& args, output_stream& out) -{ - if (args._command_args_count < 3 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - long start = 0, end = 0; - try { - start = std::stol(args._command_args[1]); - end = std::stol(args._command_args[2]); - } catch (const std::invalid_argument&) { - return out.write(msg_syntax_err); - } - redis_key rk {std::ref(key)}; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::bitcount, std::move(rk), start, end).then([&out] (auto&& m) { + return make_ready_future<>(); +/* + auto cpu = get_cpu(dk); + return _db.invoke_on(cpu, &database::set, std::ref(dk.key())).then([&out] (auto&& m) { return out.write(std::move(*m)); - }); -} - -future<> redis_service::bitop(args_collection& args, output_stream& out) -{ - return out.write(msg_nil); -} - -future<> redis_service::bitpos(args_collection& args, output_stream& out) -{ - return out.write(msg_nil); -} - -future<> redis_service::bitfield(args_collection& args, output_stream& out) -{ - return out.write(msg_nil); + });; +*/ } -future<> redis_service::pfadd(args_collection& args, output_stream& out) +future<> service::get(const dht::decorated_key& dk, output_stream& out) { - if (args._command_args_count < 2 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - sstring& key = args._command_args[0]; - for (size_t i = 1; i < args._command_args_count; ++i) { - args._tmp_keys.emplace_back(args._command_args[i]); - } - redis_key rk {std::ref(key)}; - auto& elements = args._tmp_keys; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::pfadd, rk, std::ref(elements)).then([&out] (auto&& m) { + return make_ready_future<>(); +/* + auto cpu = get_cpu(dk); + return _db.invoke_on(cpu, &database::get, std::ref(dk.key())).then([&out] (auto&& m) { return out.write(std::move(*m)); - }); -} - -future<> redis_service::pfcount(args_collection& args, output_stream& out) -{ - if (args._command_args_count < 1 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - if (args._command_args_count == 1) { - sstring& key = args._command_args[0]; - redis_key rk {std::ref(key)}; - auto cpu = get_cpu(rk); - return get_database().invoke_on(cpu, &database::pfcount, std::move(rk)).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); - } - else { - struct merge_state { - std::vector& keys; - uint8_t merged_sources[HLL_BYTES_SIZE]; - }; - for (size_t i = 0; i < args._command_args_count; ++i) { - args._tmp_keys.emplace_back(args._command_args[i]); - } - return do_with(merge_state{std::ref(args._tmp_keys), { 0 }}, [this, &out] (auto& state) { - return parallel_for_each(std::begin(state.keys), std::end(state.keys), [this, &state] (auto& key) { - redis_key rk { std::ref(key) }; - auto cpu = this->get_cpu(rk); - return get_database().invoke_on(cpu, &database::get_hll_direct, std::move(rk)).then([&state] (auto&& u) { - if (u) { - hll::merge(state.merged_sources, HLL_BYTES_SIZE, *u); - } - return make_ready_future<>(); - }); - }).then([this, &state, &out] { - auto card = hll::count(state.merged_sources, HLL_BYTES_SIZE); - return reply_builder::build_local(out, card); - }); - }); - } + });; +*/ } -future<> redis_service::pfmerge(args_collection& args, output_stream& out) -{ - if (args._command_args_count < 2 || args._command_args.empty()) { - return out.write(msg_syntax_err); - } - struct merge_state { - sstring dest; - std::vector& keys; - uint8_t merged_sources[HLL_BYTES_SIZE]; - }; - for (size_t i = 1; i < args._command_args_count; ++i) { - args._tmp_keys.emplace_back(args._command_args[i]); - } - return do_with(merge_state{std::move(args._command_args[0]), std::ref(args._tmp_keys), { 0 }}, [this, &out] (auto& state) { - return parallel_for_each(std::begin(state.keys), std::end(state.keys), [this, &state] (auto& key) { - redis_key rk { std::ref(key) }; - auto cpu = this->get_cpu(rk); - return get_database().invoke_on(cpu, &database::get_hll_direct, std::move(rk)).then([&state] (auto&& u) { - if (u) { - hll::merge(state.merged_sources, HLL_BYTES_SIZE, *u); - } - return make_ready_future<>(); - }); - }).then([this, &state, &out] { - redis_key rk { std::ref(state.dest) }; - auto cpu = this->get_cpu(rk); - return get_database().invoke_on(cpu, &database::pfmerge, std::move(rk), state.merged_sources, HLL_BYTES_SIZE).then([&out] (auto&& m) { - return out.write(std::move(*m)); - }); - }); - }); } -} /* namespace redis */ diff --git a/redis.hh b/redis.hh index 9d77031..f0297e5 100644 --- a/redis.hh +++ b/redis.hh @@ -41,196 +41,35 @@ #include "net/packet-data-source.hh" #include #include -#include "common.hh" -#include "geo.hh" +#include "db.hh" +#include "dht/i_partitioner.hh" namespace redis { - -namespace stdx = std::experimental; - -class redis_service; -extern distributed _the_redis; -inline distributed& get_redis_service() { - return _the_redis; -} -inline redis_service& local_redis_service() { - return _the_redis.local(); -} - -struct args_collection; -class database; -using message = scattered_message; -class redis_service { +using namespace seastar; +class service { private: - inline unsigned get_cpu(const sstring& key) { - return std::hash()(key) % smp::count; - } - inline unsigned get_cpu(const redis_key& key) { - return key.hash() % smp::count; + inline unsigned get_cpu(const dht::decorated_key& dk) { + return std::hash()(dk.key().representation()) % smp::count; } public: - redis_service() + service() { } - future<> start(); - future<> stop(); - // [TEST APIs] - future echo(args_collection& args); - // [COUNTER APIs] - future<> incr(args_collection& args, output_stream& out); - future<> decr(args_collection& args, output_stream& out); - future<> incrby(args_collection& args, output_stream& out); - future<> decrby(args_collection& args, output_stream& out); - - // [STRING APIs] - future<> mset(args_collection& args, output_stream& out); - future<> set(args_collection& args, output_stream& out); - future<> del(args_collection& args, output_stream& out); - future<> exists(args_collection& args, output_stream& out); - future<> append(args_collection& args, output_stream& out); - future<> strlen(args_collection& args, output_stream& out); - future<> get(args_collection& args, output_stream& out); - future<> mget(args_collection& args, output_stream& out); - - // [LIST APIs] - future<> lpush(args_collection& arg, output_stream& out); - future<> lpushx(args_collection& args, output_stream& out); - future<> rpush(args_collection& arg, output_stream& out); - future<> rpushx(args_collection& args, output_stream& out); - future<> lpop(args_collection& args, output_stream& out); - future<> rpop(args_collection& args, output_stream& out); - future<> llen(args_collection& args, output_stream& out); - future<> lindex(args_collection& args, output_stream& out); - future<> linsert(args_collection& args, output_stream& out); - future<> lset(args_collection& args, output_stream& out); - future<> lrange(args_collection& args, output_stream& out); - future<> ltrim(args_collection& args, output_stream& out); - future<> lrem(args_collection& args, output_stream& out); - - // [HASH APIs] - future<> hdel(args_collection& args, output_stream& out); - future<> hexists(args_collection& args, output_stream& out); - future<> hset(args_collection& args, output_stream& out); - future<> hmset(args_collection& args, output_stream& out); - future<> hincrby(args_collection& args, output_stream& out); - future<> hincrbyfloat(args_collection& args, output_stream& out); - future<> hlen(args_collection& args, output_stream& out); - future<> hstrlen(args_collection& args, output_stream& out); - future<> hget(args_collection& args, output_stream& out); - future<> hgetall(args_collection& args, output_stream& out); - future<> hgetall_keys(args_collection& args, output_stream& out); - future<> hgetall_values(args_collection& args, output_stream& out); - future<> hmget(args_collection& args, output_stream& out); - - // [SET] - future<> sadd(args_collection& args, output_stream& out); - future<> scard(args_collection& args, output_stream& out); - future<> srem(args_collection& args, output_stream& out); - future<> sismember(args_collection& args, output_stream& out); - future<> smembers(args_collection& args, output_stream& out); - future<> sdiff(args_collection& args, output_stream& out); - future<> sdiff_store(args_collection& args, output_stream& out); - future<> sinter(args_collection& args, output_stream& out); - future<> sinter_store(args_collection& args, output_stream& out); - future<> sunion(args_collection& args, output_stream& out); - future<> sunion_store(args_collection& args, output_stream& out); - future<> smove(args_collection& args, output_stream& out); - future<> srandmember(args_collection& args, output_stream& out); - future<> spop(args_collection& args, output_stream& out); + future<> set(const dht::decorated_key& dk, const sstring& value, output_stream& out); + future<> del(const dht::decorated_key& dk, output_stream& out); + future<> get(const dht::decorated_key& dk, output_stream& out); - future<> type(args_collection& args, output_stream& out); - future<> expire(args_collection& args, output_stream& out); - future<> persist(args_collection& args, output_stream& out); - future<> pexpire(args_collection& args, output_stream& out); - future<> ttl(args_collection& args, output_stream& out); - future<> pttl(args_collection& args, output_stream& out); - - // [ZSET] - future<> zadd(args_collection& args, output_stream& out); - future<> zcard(args_collection& args, output_stream& out); - future<> zrange(args_collection&, bool, output_stream& out); - future<> zrangebyscore(args_collection&, bool, output_stream& out); - future<> zcount(args_collection& args, output_stream& out); - future<> zincrby(args_collection& args, output_stream& out); - future<> zrank(args_collection&, bool, output_stream& out); - future<> zrem(args_collection&, output_stream& out); - future<> zscore(args_collection&, output_stream& out); - future<> zunionstore(args_collection&, output_stream& out); - future<> zinterstore(args_collection&, output_stream& out); - future<> zdiffstore(args_collection&, output_stream& out); - future<> zunion(args_collection&, output_stream& out); - future<> zinter(args_collection&, output_stream& out); - future<> zdiff(args_collection&, output_stream& out); - future<> zrangebylex(args_collection&, output_stream& out); - future<> zlexcount(args_collection&, output_stream& out); - future<> zrevrangebylex(args_collection&, output_stream& out); - future<> zremrangebyscore(args_collection&, output_stream& out); - future<> zremrangebyrank(args_collection&, output_stream& out); - future<> select(args_collection&, output_stream& out); - - // [GEO] - future<> geoadd(args_collection&, output_stream& out); - future<> geopos(args_collection&, output_stream& out); - future<> geodist(args_collection&, output_stream& out); - future<> geohash(args_collection&, output_stream& out); - future<> georadius(args_collection&, bool, output_stream& out); - - // [BITMAP] - future<> setbit(args_collection&, output_stream& out); - future<> getbit(args_collection&, output_stream& out); - future<> bitcount(args_collection&, output_stream& out); - future<> bitop(args_collection&, output_stream& out); - future<> bitpos(args_collection&, output_stream& out); - future<> bitfield(args_collection&, output_stream& out); - - // [HLL] - future<> pfadd(args_collection&, output_stream& out); - future<> pfcount(args_collection&, output_stream& out); - future<> pfmerge(args_collection&, output_stream& out); + future<> stop(); private: - future> zadds_impl(sstring& key, std::unordered_map&& members, int flags); - future exists_impl(sstring& key); - future<> srem_impl(sstring& key, sstring& member, output_stream& out); - future<> sadd_impl(sstring& key, sstring& member, output_stream& out); - future<> sadds_impl(sstring& key, std::vector& members, output_stream& out); - future<> sadds_impl_return_keys(sstring& key, std::vector& members, output_stream& out); - future<> sdiff_impl(std::vector& keys, sstring* dest, output_stream& out); - future<> sinter_impl(std::vector& keys, sstring* dest, output_stream& out); - future<> sunion_impl(std::vector& keys, sstring* dest, output_stream& out); - future<> smembers_impl(sstring& key, output_stream& out); - future<> pop_impl(args_collection& args, bool left, output_stream& out); - future<> push_impl(args_collection& arg, bool force, bool left, output_stream& out); - future<> push_impl(sstring& key, sstring& value, bool force, bool left, output_stream& out); - future<> push_impl(sstring& key, std::vector& vals, bool force, bool left, output_stream& out); - future srem_direct(sstring& key, sstring& member); - future sadd_direct(sstring& key, sstring& member); - future set_impl(sstring& key, sstring& value, long expir, uint8_t flag); - //future get_impl(sstring& key); - future remove_impl(sstring& key); - future hdel_impl(sstring& key, sstring& field); - future<> counter_by(args_collection& args, bool incr, bool with_step, output_stream& out); - using georadius_result_type = std::pair>, int>; - struct zset_args - { - sstring dest; - size_t numkeys; - std::vector keys; - std::vector weights; - int aggregate_flag; - }; - bool parse_zset_args(args_collection& args, zset_args& uargs); - static inline double score_aggregation(const double& old, const double& newscore, int flag) - { - if (flag == ZAGGREGATE_MIN) { - return std::min(old, newscore); - } - else if (flag == ZAGGREGATE_SUM) { - return old + newscore; - } - else { - return std::max(old, newscore); - } - } + distributed _db; + seastar::metrics::metric_groups _metrics; }; +extern distributed _the_redis_srvice; +inline distributed& get_service() { + return _the_redis_srvice; +} +inline service& get_local_service() { + return _the_redis_srvice.local(); +} } /* namespace redis */ diff --git a/redis_service.cc b/redis_service.cc deleted file mode 100644 index c258a49..0000000 --- a/redis_service.cc +++ /dev/null @@ -1,89 +0,0 @@ -/* -* Pedis is free software: you can redistribute it and/or modify -* it under the terms of the GNU Affero General Public License as published by -* the Free Software Foundation, either version 3 of the License, or -* (at your option) any later version. -* -* You may obtain a copy of the License at -* -* http://www.gnu.org/licenses -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an -* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -* KIND, either express or implied. See the License for the -* specific language governing permissions and limitations -* under the License. -* -* Copyright (c) 2016-2026, Peng Jian, pstack@163.com. All rights reserved. -* -*/ -#include "redis_service.hh" -#include -#include -#include -#include "core/app-template.hh" -#include "core/future-util.hh" -#include "core/timer-set.hh" -#include "core/shared_ptr.hh" -#include "core/stream.hh" -#include "core/memory.hh" -#include "core/units.hh" -#include "core/distributed.hh" -#include "core/vector-data-sink.hh" -#include "core/bitops.hh" -#include "core/slab.hh" -#include "core/align.hh" -#include "net/api.hh" -#include "net/packet-data-source.hh" -#include "log.hh" -#include -#include -#include "redis_protocol.hh" -#include "db.hh" -#include "reply_builder.hh" -#include -#include "core/metrics.hh" -using namespace net; -namespace redis { - - -using logger = seastar::logger; -static logger redis_log ("redis"); - -distributed _the_redis_srvice; - -future<> redis_service::set(const dht::decorated_key& dk, const sstring& value, output_stream& out) -{ - return make_ready_future<>(); - /* - auto cpu = get_cpu(dk); - return _db.invoke_on(cpu, &database::set, std::ref(dk.key()), std::ref(value), 0, 0).then([&out] (auto&& m) { - return out.write(std::move(*m)); - });; - */ -} - -future<> redis_service::del(const dht::decorated_key& dk, output_stream& out) -{ - return make_ready_future<>(); -/* - auto cpu = get_cpu(dk); - return _db.invoke_on(cpu, &database::set, std::ref(dk.key())).then([&out] (auto&& m) { - return out.write(std::move(*m)); - });; -*/ -} - -future<> redis_service::get(const dht::decorated_key& dk, output_stream& out) -{ - return make_ready_future<>(); -/* - auto cpu = get_cpu(dk); - return _db.invoke_on(cpu, &database::get, std::ref(dk.key())).then([&out] (auto&& m) { - return out.write(std::move(*m)); - });; -*/ -} - -} diff --git a/ring.cc b/ring.cc new file mode 100644 index 0000000..314904a --- /dev/null +++ b/ring.cc @@ -0,0 +1,12 @@ +#include "ring.hh" +namespace redis { +future<> ring::start() +{ + return make_ready_future<>(); +} + +future<> ring::stop() +{ + return make_ready_future<>(); +} +} diff --git a/ring.hh b/ring.hh new file mode 100644 index 0000000..badc787 --- /dev/null +++ b/ring.hh @@ -0,0 +1,12 @@ +#pragma once +namespace redis { +class ring { +private: +public: + ring() + { + } + future<> start(); + future<> stop(); +}; +} diff --git a/scylla/Cql.tokens b/scylla/Cql.tokens deleted file mode 100644 index 1a2ab54..0000000 --- a/scylla/Cql.tokens +++ /dev/null @@ -1,197 +0,0 @@ -T__163=163 -T__164=164 -T__165=165 -T__166=166 -T__167=167 -T__168=168 -T__169=169 -T__170=170 -T__171=171 -T__172=172 -T__173=173 -T__174=174 -T__175=175 -T__176=176 -T__177=177 -T__178=178 -T__179=179 -T__180=180 -T__181=181 -A=4 -B=5 -BOOLEAN=6 -C=7 -COMMENT=8 -D=9 -DIGIT=10 -E=11 -EXPONENT=12 -F=13 -FLOAT=14 -G=15 -H=16 -HEX=17 -HEXNUMBER=18 -I=19 -IDENT=20 -INTEGER=21 -J=22 -K=23 -K_ADD=24 -K_AGGREGATE=25 -K_ALL=26 -K_ALLOW=27 -K_ALTER=28 -K_AND=29 -K_APPLY=30 -K_AS=31 -K_ASC=32 -K_ASCII=33 -K_AUTHORIZE=34 -K_BATCH=35 -K_BEGIN=36 -K_BIGINT=37 -K_BLOB=38 -K_BOOLEAN=39 -K_BY=40 -K_CLUSTERING=41 -K_COLUMNFAMILY=42 -K_COMPACT=43 -K_CONTAINS=44 -K_COUNT=45 -K_COUNTER=46 -K_CREATE=47 -K_CUSTOM=48 -K_DATE=49 -K_DECIMAL=50 -K_DELETE=51 -K_DESC=52 -K_DETERMINISTIC=53 -K_DISTINCT=54 -K_DOUBLE=55 -K_DROP=56 -K_ENTRIES=57 -K_EXISTS=58 -K_FILTERING=59 -K_FINALFUNC=60 -K_FLOAT=61 -K_FROM=62 -K_FROZEN=63 -K_FULL=64 -K_FUNCTION=65 -K_GRANT=66 -K_IF=67 -K_IN=68 -K_INDEX=69 -K_INET=70 -K_INFINITY=71 -K_INITCOND=72 -K_INSERT=73 -K_INT=74 -K_INTO=75 -K_IS=76 -K_KEY=77 -K_KEYS=78 -K_KEYSPACE=79 -K_KEYSPACES=80 -K_LANGUAGE=81 -K_LIMIT=82 -K_LIST=83 -K_MAP=84 -K_MATERIALIZED=85 -K_MODIFY=86 -K_NAN=87 -K_NON=88 -K_NORECURSIVE=89 -K_NOSUPERUSER=90 -K_NOT=91 -K_NULL=92 -K_OF=93 -K_ON=94 -K_OR=95 -K_ORDER=96 -K_PASSWORD=97 -K_PERMISSION=98 -K_PERMISSIONS=99 -K_PRIMARY=100 -K_RENAME=101 -K_REPLACE=102 -K_RETURNS=103 -K_REVOKE=104 -K_SCYLLA_COUNTER_SHARD_LIST=105 -K_SCYLLA_TIMEUUID_LIST_INDEX=106 -K_SELECT=107 -K_SET=108 -K_SFUNC=109 -K_SMALLINT=110 -K_STATIC=111 -K_STORAGE=112 -K_STYPE=113 -K_SUPERUSER=114 -K_TEXT=115 -K_TIME=116 -K_TIMESTAMP=117 -K_TIMEUUID=118 -K_TINYINT=119 -K_TO=120 -K_TOKEN=121 -K_TRIGGER=122 -K_TRUNCATE=123 -K_TTL=124 -K_TUPLE=125 -K_TYPE=126 -K_UNLOGGED=127 -K_UPDATE=128 -K_USE=129 -K_USER=130 -K_USERS=131 -K_USING=132 -K_UUID=133 -K_VALUES=134 -K_VARCHAR=135 -K_VARINT=136 -K_VIEW=137 -K_WHERE=138 -K_WITH=139 -K_WRITETIME=140 -L=141 -LETTER=142 -M=143 -MULTILINE_COMMENT=144 -N=145 -O=146 -P=147 -Q=148 -QMARK=149 -QUOTED_NAME=150 -R=151 -S=152 -STRING_LITERAL=153 -T=154 -U=155 -UUID=156 -V=157 -W=158 -WS=159 -X=160 -Y=161 -Z=162 -'!='=163 -'('=164 -')'=165 -'*'=166 -'+'=167 -','=168 -'-'=169 -'.'=170 -':'=171 -';'=172 -'<'=173 -'<='=174 -'='=175 -'>'=176 -'>='=177 -'['=178 -']'=179 -'{'=180 -'}'=181 diff --git a/scylla/SCYLLA-VERSION-GEN b/scylla/SCYLLA-VERSION-GEN deleted file mode 100755 index 2028169..0000000 --- a/scylla/SCYLLA-VERSION-GEN +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/sh - -VERSION=2.0.rc2 - -if test -f version -then - SCYLLA_VERSION=$(cat version | awk -F'-' '{print $1}') - SCYLLA_RELEASE=$(cat version | awk -F'-' '{print $2}') -else - DATE=$(date +%Y%m%d) - GIT_COMMIT=$(git log --pretty=format:'%h' -n 1) - SCYLLA_VERSION=$VERSION - # For custom package builds, replace "0" with "counter.your_name", - # where counter starts at 1 and increments for successive versions. - # This ensures that the package manager will select your custom - # package over the standard release. - SCYLLA_BUILD=0 - SCYLLA_RELEASE=$SCYLLA_BUILD.$DATE.$GIT_COMMIT -fi - -echo "$SCYLLA_VERSION-$SCYLLA_RELEASE" -mkdir -p build -echo "$SCYLLA_VERSION" > build/SCYLLA-VERSION-FILE -echo "$SCYLLA_RELEASE" > build/SCYLLA-RELEASE-FILE diff --git a/scylla/api/api-doc/cache_service.json b/scylla/api/api-doc/cache_service.json deleted file mode 100644 index 2e5802f..0000000 --- a/scylla/api/api-doc/cache_service.json +++ /dev/null @@ -1,701 +0,0 @@ -{ - "apiVersion":"0.0.1", - "swaggerVersion":"1.2", - "basePath":"{{Protocol}}://{{Host}}", - "resourcePath":"/cache_service", - "produces":[ - "application/json" - ], - "apis":[ - { - "path":"/cache_service/row_cache_save_period", - "operations":[ - { - "method":"GET", - "summary":"get row cache save period in seconds", - "type":"int", - "nickname":"get_row_cache_save_period_in_seconds", - "produces":[ - "application/json" - ], - "parameters":[ - ] - }, - { - "method":"POST", - "summary":"set row cache save period in seconds", - "type":"void", - "nickname":"set_row_cache_save_period_in_seconds", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"period", - "description":"row cache save period in seconds", - "required":true, - "allowMultiple":false, - "type":"int", - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/cache_service/key_cache_save_period", - "operations":[ - { - "method":"GET", - "summary":"get key cache save period in seconds", - "type":"int", - "nickname":"get_key_cache_save_period_in_seconds", - "produces":[ - "application/json" - ], - "parameters":[ - ] - }, - { - "method":"POST", - "summary":"set key cache save period in seconds", - "type":"void", - "nickname":"set_key_cache_save_period_in_seconds", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"period", - "description":"key cache save period in seconds", - "required":true, - "allowMultiple":false, - "type":"int", - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/cache_service/counter_cache_save_period", - "operations":[ - { - "method":"GET", - "summary":"get counter cache save period in seconds", - "type":"int", - "nickname":"get_counter_cache_save_period_in_seconds", - "produces":[ - "application/json" - ], - "parameters":[ - ] - }, - { - "method":"POST", - "summary":"set counter cache save period in seconds", - "type":"void", - "nickname":"set_counter_cache_save_period_in_seconds", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"ccspis", - "description":"counter cache save period in seconds", - "required":true, - "allowMultiple":false, - "type":"int", - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/cache_service/row_cache_keys_to_save", - "operations":[ - { - "method":"GET", - "summary":"get row cache keys to save", - "type":"int", - "nickname":"get_row_cache_keys_to_save", - "produces":[ - "application/json" - ], - "parameters":[ - ] - }, - { - "method":"POST", - "summary":"set row cache keys to save", - "type":"void", - "nickname":"set_row_cache_keys_to_save", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"rckts", - "description":"row cache keys to save", - "required":true, - "allowMultiple":false, - "type":"int", - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/cache_service/key_cache_keys_to_save", - "operations":[ - { - "method":"GET", - "summary":"get key cache keys to save", - "type":"int", - "nickname":"get_key_cache_keys_to_save", - "produces":[ - "application/json" - ], - "parameters":[ - ] - }, - { - "method":"POST", - "summary":"set key cache keys to save", - "type":"void", - "nickname":"set_key_cache_keys_to_save", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"kckts", - "description":"key cache keys to save", - "required":true, - "allowMultiple":false, - "type":"int", - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/cache_service/counter_cache_keys_to_save", - "operations":[ - { - "method":"GET", - "summary":"get counter cache keys to save", - "type":"int", - "nickname":"get_counter_cache_keys_to_save", - "produces":[ - "application/json" - ], - "parameters":[ - ] - }, - { - "method":"POST", - "summary":"set counter cache keys to save", - "type":"void", - "nickname":"set_counter_cache_keys_to_save", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"cckts", - "description":"counter cache keys to save", - "required":true, - "allowMultiple":false, - "type":"int", - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/cache_service/invalidate_key_cache", - "operations":[ - { - "method":"POST", - "summary":"invalidate the key cache; for use after invalidating row cache", - "type":"void", - "nickname":"invalidate_key_cache", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/cache_service/invalidate_counter_cache", - "operations":[ - { - "method":"POST", - "summary":"invalidate counter cache", - "type":"void", - "nickname":"invalidate_counter_cache", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/cache_service/row_cache_capacity", - "operations":[ - { - "method":"POST", - "summary":"set row cache capacity in mb", - "type":"void", - "nickname":"set_row_cache_capacity_in_mb", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"capacity", - "description":"row cache capacity in mb", - "required":true, - "allowMultiple":false, - "type":"long", - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/cache_service/key_cache_capacity", - "operations":[ - { - "method":"POST", - "summary":"set key cache capacity in mb", - "type":"void", - "nickname":"set_key_cache_capacity_in_mb", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"capacity", - "description":"key cache capacity in mb", - "required":true, - "allowMultiple":false, - "type":"long", - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/cache_service/counter_cache_capacity", - "operations":[ - { - "method":"POST", - "summary":"set counter cache capacity in mb", - "type":"void", - "nickname":"set_counter_cache_capacity_in_mb", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"capacity", - "description":"counter cache capacity in mb", - "required":true, - "allowMultiple":false, - "type":"long", - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/cache_service/save_caches", - "operations":[ - { - "method":"POST", - "summary":"save row and key caches", - "type":"void", - "nickname":"save_caches", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path": "/cache_service/metrics/key/capacity", - "operations": [ - { - "method": "GET", - "summary": "Get key capacity", - "type": "long", - "nickname": "get_key_capacity", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path": "/cache_service/metrics/key/hits", - "operations": [ - { - "method": "GET", - "summary": "Get key hits", - "type": "long", - "nickname": "get_key_hits", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path": "/cache_service/metrics/key/requests", - "operations": [ - { - "method": "GET", - "summary": "Get key requests", - "type": "long", - "nickname": "get_key_requests", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path": "/cache_service/metrics/key/hit_rate", - "operations": [ - { - "method": "GET", - "summary": "Get key hit rate", - "type": "double", - "nickname": "get_key_hit_rate", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path": "/cache_service/metrics/key/hits_moving_avrage", - "operations": [ - { - "method": "GET", - "summary": "Get key hits moving avrage", - "type": "#/utils/rate_moving_average", - "nickname": "get_key_hits_moving_avrage", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path": "/cache_service/metrics/key/requests_moving_avrage", - "operations": [ - { - "method": "GET", - "summary": "Get key requests moving avrage", - "type": "#/utils/rate_moving_average", - "nickname": "get_key_requests_moving_avrage", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path": "/cache_service/metrics/key/size", - "operations": [ - { - "method": "GET", - "summary": "Get key cache waited size", - "type": "long", - "nickname": "get_key_size", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path": "/cache_service/metrics/key/entries", - "operations": [ - { - "method": "GET", - "summary": "Get key entries", - "type": "int", - "nickname": "get_key_entries", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path": "/cache_service/metrics/row/capacity", - "operations": [ - { - "method": "GET", - "summary": "Get row capacity", - "type": "long", - "nickname": "get_row_capacity", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path": "/cache_service/metrics/row/hits", - "operations": [ - { - "method": "GET", - "summary": "Get row hits", - "type": "long", - "nickname": "get_row_hits", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path": "/cache_service/metrics/row/requests", - "operations": [ - { - "method": "GET", - "summary": "Get row requests", - "type": "long", - "nickname": "get_row_requests", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path": "/cache_service/metrics/row/hit_rate", - "operations": [ - { - "method": "GET", - "summary": "Get row hit rate", - "type": "double", - "nickname": "get_row_hit_rate", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path": "/cache_service/metrics/row/hits_moving_avrage", - "operations": [ - { - "method": "GET", - "summary": "Get row hits moving avrage", - "type": "#/utils/rate_moving_average", - "nickname": "get_row_hits_moving_avrage", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path": "/cache_service/metrics/row/requests_moving_avrage", - "operations": [ - { - "method": "GET", - "summary": "Get row requests moving avrage", - "type": "#/utils/rate_moving_average", - "nickname": "get_row_requests_moving_avrage", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path": "/cache_service/metrics/row/size", - "operations": [ - { - "method": "GET", - "summary": "Get row cache waited size", - "type": "long", - "nickname": "get_row_size", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path": "/cache_service/metrics/row/entries", - "operations": [ - { - "method": "GET", - "summary": "Get row entries", - "type": "int", - "nickname": "get_row_entries", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path": "/cache_service/metrics/counter/capacity", - "operations": [ - { - "method": "GET", - "summary": "Get counter capacity", - "type": "long", - "nickname": "get_counter_capacity", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path": "/cache_service/metrics/counter/hits", - "operations": [ - { - "method": "GET", - "summary": "Get counter hits", - "type": "long", - "nickname": "get_counter_hits", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path": "/cache_service/metrics/counter/requests", - "operations": [ - { - "method": "GET", - "summary": "Get counter requests", - "type": "long", - "nickname": "get_counter_requests", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path": "/cache_service/metrics/counter/hit_rate", - "operations": [ - { - "method": "GET", - "summary": "Get counter hit rate", - "type": "double", - "nickname": "get_counter_hit_rate", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path": "/cache_service/metrics/counter/hits_moving_avrage", - "operations": [ - { - "method": "GET", - "summary": "Get counter hits moving avrage", - "type": "#/utils/rate_moving_average", - "nickname": "get_counter_hits_moving_avrage", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path": "/cache_service/metrics/counter/requests_moving_avrage", - "operations": [ - { - "method": "GET", - "summary": "Get counter requests moving avrage", - "type": "#/utils/rate_moving_average", - "nickname": "get_counter_requests_moving_avrage", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path": "/cache_service/metrics/counter/size", - "operations": [ - { - "method": "GET", - "summary": "Get counter cache waited size", - "type": "long", - "nickname": "get_counter_size", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path": "/cache_service/metrics/counter/entries", - "operations": [ - { - "method": "GET", - "summary": "Get counter entries", - "type": "int", - "nickname": "get_counter_entries", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - } - ] -} diff --git a/scylla/api/api-doc/collectd.json b/scylla/api/api-doc/collectd.json deleted file mode 100644 index 876cf93..0000000 --- a/scylla/api/api-doc/collectd.json +++ /dev/null @@ -1,202 +0,0 @@ -{ - "apiVersion":"0.0.1", - "swaggerVersion":"1.2", - "basePath":"{{Protocol}}://{{Host}}", - "resourcePath":"/collectd", - "produces":[ - "application/json" - ], - "apis":[ - { - "path":"/collectd/{pluginid}", - "operations":[ - { - "method":"GET", - "summary":"Get a collectd value", - "type":"array", - "items":{ - "type":"collectd_value" - }, - "nickname":"get_collectd", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"pluginid", - "description":"The plugin ID", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - }, - { - "name":"instance", - "description":"The plugin instance", - "required":false, - "allowMultiple":false, - "type":"string", - "paramType":"query" - }, - { - "name":"type", - "description":"The plugin type", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"query" - }, - { - "name":"type_instance", - "description":"The plugin type instance", - "required":false, - "allowMultiple":false, - "type":"string", - "paramType":"query" - } - ] - }, - { - "method":"POST", - "summary":"Start reporting on one or more collectd metric", - "type":"void", - "nickname":"enable_collectd", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"pluginid", - "description":"The plugin ID, describe the component the metric belongs to. Examples are cache, thrift, etc'. Regex are supported.The plugin ID, describe the component the metric belong to. Examples are: cache, thrift etc'. regex are supported", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - }, - { - "name":"instance", - "description":"The plugin instance typically #CPU indicating per CPU metric. Regex are supported. Omit for all", - "required":false, - "allowMultiple":false, - "type":"string", - "paramType":"query" - }, - { - "name":"type", - "description":"The plugin type, the type of the information. Examples are total_operations, bytes, total_operations, etc'. Regex are supported. Omit for all", - "required":false, - "allowMultiple":false, - "type":"string", - "paramType":"query" - }, - { - "name":"type_instance", - "description":"The plugin type instance, the specific metric. Exampls are total_writes, total_size, zones, etc'. Regex are supported, Omit for all", - "required":false, - "allowMultiple":false, - "type":"string", - "paramType":"query" - }, - { - "name":"enable", - "description":"set to true to enable all, anything else or omit to disable", - "required":false, - "allowMultiple":false, - "type":"boolean", - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/collectd/", - "operations":[ - { - "method":"GET", - "summary":"Get a list of all collectd metrics and their status", - "type":"array", - "items":{ - "type":"collectd_metric_status" - }, - "nickname":"get_collectd_items", - "produces":[ - "application/json" - ], - "parameters":[ - ] - }, - { - "method":"POST", - "summary":"Enable or disable all collectd metrics", - "type":"void", - "nickname":"enable_all_collectd", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"enable", - "description":"set to true to enable all, anything else or omit to disable", - "required":false, - "allowMultiple":false, - "type":"boolean", - "paramType":"query" - } - ] - } - ] - } - ], - "models":{ - "type_instance_id":{ - "id":"type_instance_id", - "description":"A type instance ID", - "properties":{ - "plugin":{ - "type":"string", - "description":"The plugin ID" - }, - "plugin_instance":{ - "type":"string", - "description":"The plugin instance" - }, - "type":{ - "type":"string", - "description":"The plugin type" - }, - "type_instance":{ - "type":"string", - "description":"The plugin type instance" - } - } - }, - "collectd_value":{ - "id":"collectd_value", - "description":"Holds a collectd value", - "properties":{ - "values":{ - "description":"An array of values", - "type":"array", - "items":{ - "type":"double" - } - } - } - }, - "collectd_metric_status":{ - "id":"collectd_metric_status", - "description":"Holds a collectd id and an enable flag", - "properties":{ - "id":{ - "description":"The metric ID", - "type":"type_instance_id" - }, - "enable":{ - "description":"Is the metric enabled", - "type":"boolean" - } - } - } - } -} \ No newline at end of file diff --git a/scylla/api/api-doc/column_family.json b/scylla/api/api-doc/column_family.json deleted file mode 100644 index e99bdd2..0000000 --- a/scylla/api/api-doc/column_family.json +++ /dev/null @@ -1,2821 +0,0 @@ -{ - "apiVersion":"0.0.1", - "swaggerVersion":"1.2", - "basePath":"{{Protocol}}://{{Host}}", - "resourcePath":"/column_family", - "produces":[ - "application/json" - ], - "apis":[ - { - "path":"/column_family/", - "operations":[ - { - "method":"GET", - "summary":"Get a list of all column family info", - "type":"array", - "items":{ - "type":"column_family_info" - }, - "nickname":"get_column_family", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/column_family/name", - "operations":[ - { - "method":"GET", - "summary":"Get a list of all column family names", - "type":"array", - "items":{ - "type":"string" - }, - "nickname":"get_column_family_name", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/column_family/name/keyspace", - "operations":[ - { - "method":"GET", - "summary":"Get a list of the key space names", - "type":"array", - "items":{ - "type":"string" - }, - "nickname":"get_column_family_name_keyspace", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/column_family/major_compaction/{name}", - "operations":[ - { - "method":"POST", - "summary":"Force a major compaction of this column family", - "type":"string", - "nickname":"force_major_compaction", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - }, - { - "name":"split_output", - "description":"true if the output of the major compaction should be split in several sstables", - "required":false, - "allowMultiple":false, - "type":"bool", - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/column_family/minimum_compaction/{name}", - "operations":[ - { - "method":"POST", - "summary":"Sets the minimum number of sstables in queue before compaction kicks off", - "type":"string", - "nickname":"set_minimum_compaction_threshold", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - }, - { - "name":"value", - "description":"The minimum number of sstables in queue before compaction kicks off", - "required":true, - "allowMultiple":false, - "type":"int", - "paramType":"query" - } - ] - }, - { - "method":"GET", - "summary":"get the minimum number of sstables in queue before compaction kicks off", - "type":"string", - "nickname":"get_minimum_compaction_threshold", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/maximum_compaction/{name}", - "operations":[ - { - "method":"POST", - "summary":"Sets the maximum number of sstables in queue before compaction kicks off", - "type":"string", - "nickname":"set_maximum_compaction_threshold", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - }, - { - "name":"value", - "description":"The maximum number of sstables in queue before compaction kicks off", - "required":true, - "allowMultiple":false, - "type":"int", - "paramType":"query" - } - ] - }, - { - "method":"GET", - "summary":"get the maximum number of sstables in queue before compaction kicks off", - "type":"string", - "nickname":"get_maximum_compaction_threshold", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/compaction/{name}", - "operations":[ - { - "method":"POST", - "summary":"Sets the minumum and maximum number of sstables in queue before compaction kicks off", - "type":"string", - "nickname":"set_compaction_threshold", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - }, - { - "name":"maximum", - "description":"The maximum number of sstables in queue before compaction kicks off", - "required":true, - "allowMultiple":false, - "type":"int", - "paramType":"query" - }, - { - "name":"minimum", - "description":"The minimum number of sstables in queue before compaction kicks off", - "required":true, - "allowMultiple":false, - "type":"int", - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/column_family/compaction_strategy/{name}", - "operations":[ - { - "method":"POST", - "summary":"Sets the compaction strategy by class name", - "type":"void", - "nickname":"set_compaction_strategy_class", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - }, - { - "name":"class_name", - "description":"The class name", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"query" - } - ] - }, - { - "method":"GET", - "summary":"Gets the compaction strategy class name", - "type":"string", - "nickname":"get_compaction_strategy_class", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/compression_parameters/{name}", - "operations":[ - { - "method":"GET", - "summary":"get the compression parameters", - "type":"array", - "items":{ - "type":"mapper" - }, - "nickname":"get_compression_parameters", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - }, - { - "method":"POST", - "summary":"Sets the compression parameters", - "type":"void", - "nickname":"set_compression_parameters", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - }, - { - "name":"opts", - "description":"The options to set", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/column_family/crc_check_chance/{name}", - "operations":[ - { - "method":"POST", - "summary":"Set new crc check chance", - "type":"void", - "nickname":"set_crc_check_chance", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - }, - { - "name":"check_chance", - "description":"CRC check chance", - "required":true, - "allowMultiple":false, - "type":"double", - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/column_family/autocompaction/{name}", - "operations":[ - { - "method":"GET", - "summary":"check if the auto compaction disabled", - "type":"boolean", - "nickname":"is_auto_compaction_disabled", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/estimate_keys/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get the estimate keys", - "type":"long", - "nickname":"estimate_keys", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/built_indexes/{name}", - "operations":[ - { - "method":"GET", - "summary":"Returns a list of the names of the built column indexes for current store", - "type":"array", - "items":{ - "type":"string" - }, - "nickname":"get_built_indexes", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/sstables/by_key/{name}", - "operations":[ - { - "method":"GET", - "summary":"Returns a list of filenames that contain the given key on this node", - "type":"array", - "items":{ - "type":"string" - }, - "nickname":"get_sstables_for_key", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - }, - { - "name":"key", - "description":"The key", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/column_family/load/sstable/{name}", - "operations":[ - { - "method":"POST", - "summary":"Scan through Keyspace/ColumnFamily's data directory determine which SSTables should be loaded and load them", - "type":"string", - "nickname":"load_new_sstables", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/sstables/unleveled/{name}", - "operations":[ - { - "method":"GET", - "summary":"the number of SSTables in L0. Always return 0 if Leveled compaction is not enabled.", - "type":"array", - "items":{ - "type":"string" - }, - "nickname":"get_unleveled_sstables", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/sstables/per_level/{name}", - "operations":[ - { - "method":"GET", - "summary":"sstable count for each level. empty unless leveled compaction is used", - "type":"array", - "items":{ - "type":"int" - }, - "nickname":"get_sstable_count_per_level", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/droppable_ratio/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get the ratio of droppable tombstones to real columns (and non-droppable tombstones)", - "type":"double", - "nickname":"get_droppable_tombstone_ratio", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/memtable_columns_count/{name}", - "operations":[ - { - "method":"GET", - "summary":"get memtable columns count", - "type":"long", - "nickname":"get_memtable_columns_count", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/memtable_columns_count/", - "operations":[ - { - "method":"GET", - "summary":"get all memtable columns count", - "type":"long", - "nickname":"get_all_memtable_columns_count", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/column_family/metrics/memtable_on_heap_size/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get the column family active memtable on heap size", - "type":"long", - "nickname":"get_memtable_on_heap_size", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/memtable_on_heap_size", - "operations":[ - { - "method":"GET", - "summary":"Get all active memtable of all column family on heap size", - "type":"long", - "nickname":"get_all_memtable_on_heap_size", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/column_family/metrics/memtable_off_heap_size/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get the column family active memtable off heap size", - "type":"long", - "nickname":"get_memtable_off_heap_size", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/memtable_off_heap_size", - "operations":[ - { - "method":"GET", - "summary":"Get all active memtable of all column family off heap size", - "type":"long", - "nickname":"get_all_memtable_off_heap_size", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/column_family/metrics/memtable_live_data_size/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get the column family active memtable live data size", - "type":"long", - "nickname":"get_memtable_live_data_size", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/memtable_live_data_size", - "operations":[ - { - "method":"GET", - "summary":"Get all active memtable of all column family live data size", - "type":"long", - "nickname":"get_all_memtable_live_data_size", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/column_family/metrics/all_memtables_on_heap_size/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get all of the column family active and not memtables on heap size", - "type":"long", - "nickname":"get_cf_all_memtables_on_heap_size", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/all_memtables_on_heap_size", - "operations":[ - { - "method":"GET", - "summary":"Get all memtables active and not of all column family on heap size", - "type":"long", - "nickname":"get_all_cf_all_memtables_on_heap_size", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/column_family/metrics/all_memtables_off_heap_size/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get all of the column family active and not memtables off heap size", - "type":"long", - "nickname":"get_cf_all_memtables_off_heap_size", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/all_memtables_off_heap_size", - "operations":[ - { - "method":"GET", - "summary":"Get all memtables active and not of all column family off heap size", - "type":"long", - "nickname":"get_all_cf_all_memtables_off_heap_size", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/column_family/metrics/all_memtables_live_data_size/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get all of the column family active and not memtables live data size", - "type":"long", - "nickname":"get_cf_all_memtables_live_data_size", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/all_memtables_live_data_size", - "operations":[ - { - "method":"GET", - "summary":"Get all memtables active and not of all column family live data size", - "type":"long", - "nickname":"get_all_cf_all_memtables_live_data_size", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/column_family/metrics/memtable_switch_count/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get memtable switch count", - "type":"int", - "nickname":"get_memtable_switch_count", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/memtable_switch_count", - "operations":[ - { - "method":"GET", - "summary":"Get all memtable switch count", - "type":"int", - "nickname":"get_all_memtable_switch_count", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/column_family/metrics/estimated_row_size_histogram/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get estimated row size histogram", - "type":"array", - "items":{ - "type":"long" - }, - "nickname":"get_estimated_row_size_histogram", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/estimated_row_count/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get estimated row count", - "type":"array", - "items":{ - "type":"long" - }, - "nickname":"get_estimated_row_count", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/estimated_column_count_histogram/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get estimated column count histogram", - "type":"array", - "items":{ - "type":"long" - }, - "nickname":"get_estimated_column_count_histogram", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/compression_ratio/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get compression ratio", - "type":"double", - "nickname":"get_compression_ratio", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/compression_ratio", - "operations":[ - { - "method":"GET", - "summary":"Get all compression ratio", - "type":"double", - "nickname":"get_all_compression_ratio", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/column_family/metrics/read_latency/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get read latency", - "type":"int", - "nickname":"get_read_latency", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/read/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get number of reads", - "type":"long", - "nickname":"get_read", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/read/", - "operations":[ - { - "method":"GET", - "summary":"Get number of reads from all column family, per shard", - "type":"array", - "items":{ - "type":"long" - }, - "nickname":"get_all_read", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/column_family/metrics/read_latency/histogram/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get read latency histogram", - "$ref": "#/utils/histogram", - "nickname":"get_read_latency_histogram_depricated", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/read_latency/histogram/", - "operations":[ - { - "method":"GET", - "summary":"Get read latency histogram from all column family", - "type":"array", - "items":{ - "$ref": "#/utils/histogram" - }, - "nickname":"get_all_read_latency_histogram_depricated", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/column_family/metrics/read_latency/moving_average_histogram/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get read latency moving avrage histogram", - "$ref": "#/utils/rate_moving_average_and_histogram", - "nickname":"get_read_latency_histogram", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/read_latency/moving_average_histogram/", - "operations":[ - { - "method":"GET", - "summary":"Get read latency moving avrage histogram from all column family", - "type":"array", - "items":{ - "$ref": "#/utils/rate_moving_average_and_histogram" - }, - "nickname":"get_all_read_latency_histogram", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/column_family/metrics/read_latency", - "operations":[ - { - "method":"GET", - "summary":"Get all read latency", - "type":"int", - "nickname":"get_all_read_latency", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/column_family/metrics/range_latency/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get range latency", - "type":"int", - "nickname":"get_range_latency", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/range_latency", - "operations":[ - { - "method":"GET", - "summary":"Get all range latency", - "type":"int", - "nickname":"get_all_range_latency", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/column_family/metrics/write_latency/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get write latency", - "type":"int", - "nickname":"get_write_latency", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/write/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get number of writes", - "type":"long", - "nickname":"get_write", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/write/", - "operations":[ - { - "method":"GET", - "summary":"Get number of writes from all column family, per shard", - "type":"array", - "items":{ - "type":"long" - }, - "nickname":"get_all_write", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/column_family/metrics/write_latency/histogram/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get write latency histogram", - "$ref": "#/utils/histogram", - "nickname":"get_write_latency_histogram_depricated", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/write_latency/histogram/", - "operations":[ - { - "method":"GET", - "summary":"Get write latency histogram of all column family", - "type":"array", - "items":{ - "$ref": "#/utils/histogram" - }, - "nickname":"get_all_write_latency_histogram_depricated", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/column_family/metrics/write_latency/moving_average_histogram/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get write latency moving average histogram", - "$ref": "#/utils/rate_moving_average_and_histogram", - "nickname":"get_write_latency_histogram", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/write_latency/moving_average_histogram/", - "operations":[ - { - "method":"GET", - "summary":"Get write latency moving average histogram of all column family", - "type":"array", - "items":{ - "$ref": "#/utils/rate_moving_average_and_histogram" - }, - "nickname":"get_all_write_latency_histogram", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/column_family/metrics/write_latency", - "operations":[ - { - "method":"GET", - "summary":"Get all write latency", - "type":"int", - "nickname":"get_all_write_latency", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/column_family/metrics/pending_flushes/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get pending flushes", - "type":"int", - "nickname":"get_pending_flushes", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/pending_flushes", - "operations":[ - { - "method":"GET", - "summary":"Get all pending flushes", - "type":"int", - "nickname":"get_all_pending_flushes", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/column_family/metrics/pending_compactions/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get pending compactions", - "type":"int", - "nickname":"get_pending_compactions", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/pending_compactions", - "operations":[ - { - "method":"GET", - "summary":"Get all pending compactions", - "type":"int", - "nickname":"get_all_pending_compactions", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/column_family/metrics/live_ss_table_count/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get live ss table count", - "type":"int", - "nickname":"get_live_ss_table_count", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/live_ss_table_count", - "operations":[ - { - "method":"GET", - "summary":"Get all live ss table count", - "type":"int", - "nickname":"get_all_live_ss_table_count", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/column_family/metrics/live_disk_space_used/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get live disk space used", - "type":"int", - "nickname":"get_live_disk_space_used", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/live_disk_space_used", - "operations":[ - { - "method":"GET", - "summary":"Get all live disk space used", - "type":"int", - "nickname":"get_all_live_disk_space_used", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/column_family/metrics/total_disk_space_used/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get total disk space used", - "type":"int", - "nickname":"get_total_disk_space_used", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/total_disk_space_used", - "operations":[ - { - "method":"GET", - "summary":"Get all total disk space used", - "type":"int", - "nickname":"get_all_total_disk_space_used", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/column_family/metrics/min_row_size/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get min row size", - "type":"long", - "nickname":"get_min_row_size", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/min_row_size", - "operations":[ - { - "method":"GET", - "summary":"Get all min row size", - "type":"long", - "nickname":"get_all_min_row_size", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/column_family/metrics/max_row_size/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get max row size", - "type":"long", - "nickname":"get_max_row_size", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/max_row_size", - "operations":[ - { - "method":"GET", - "summary":"Get all max row size", - "type":"long", - "nickname":"get_all_max_row_size", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/column_family/metrics/mean_row_size/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get mean row size", - "type":"long", - "nickname":"get_mean_row_size", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/mean_row_size", - "operations":[ - { - "method":"GET", - "summary":"Get all mean row size", - "type":"long", - "nickname":"get_all_mean_row_size", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/column_family/metrics/bloom_filter_false_positives/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get bloom filter false positives", - "type":"long", - "nickname":"get_bloom_filter_false_positives", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/bloom_filter_false_positives", - "operations":[ - { - "method":"GET", - "summary":"Get all bloom filter false positives", - "type":"long", - "nickname":"get_all_bloom_filter_false_positives", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/column_family/metrics/recent_bloom_filter_false_positives/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get recent bloom filter false positives", - "type":"long", - "nickname":"get_recent_bloom_filter_false_positives", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/recent_bloom_filter_false_positives", - "operations":[ - { - "method":"GET", - "summary":"Get all recent bloom filter false positives", - "type":"long", - "nickname":"get_all_recent_bloom_filter_false_positives", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/column_family/metrics/bloom_filter_false_ratio/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get bloom filter false ratio", - "type":"double", - "nickname":"get_bloom_filter_false_ratio", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/bloom_filter_false_ratio", - "operations":[ - { - "method":"GET", - "summary":"Get all bloom filter false ratio", - "type":"double", - "nickname":"get_all_bloom_filter_false_ratio", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/column_family/metrics/recent_bloom_filter_false_ratio/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get recent bloom filter false ratio", - "type":"double", - "nickname":"get_recent_bloom_filter_false_ratio", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/recent_bloom_filter_false_ratio", - "operations":[ - { - "method":"GET", - "summary":"Get all recent bloom filter false ratio", - "type":"double", - "nickname":"get_all_recent_bloom_filter_false_ratio", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/column_family/metrics/bloom_filter_disk_space_used/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get bloom filter disk space used", - "type":"long", - "nickname":"get_bloom_filter_disk_space_used", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/bloom_filter_disk_space_used", - "operations":[ - { - "method":"GET", - "summary":"Get all bloom filter disk space used", - "type":"long", - "nickname":"get_all_bloom_filter_disk_space_used", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/column_family/metrics/bloom_filter_off_heap_memory_used/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get bloom filter off heap memory used", - "type":"long", - "nickname":"get_bloom_filter_off_heap_memory_used", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/bloom_filter_off_heap_memory_used", - "operations":[ - { - "method":"GET", - "summary":"Get all bloom filter off heap memory used", - "type":"long", - "nickname":"get_all_bloom_filter_off_heap_memory_used", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/column_family/metrics/index_summary_off_heap_memory_used/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get index summary off heap memory used", - "type":"long", - "nickname":"get_index_summary_off_heap_memory_used", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/index_summary_off_heap_memory_used", - "operations":[ - { - "method":"GET", - "summary":"Get all index summary off heap memory used", - "type":"long", - "nickname":"get_all_index_summary_off_heap_memory_used", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/column_family/metrics/compression_metadata_off_heap_memory_used/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get compression metadata off heap memory used", - "type":"long", - "nickname":"get_compression_metadata_off_heap_memory_used", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/compression_metadata_off_heap_memory_used", - "operations":[ - { - "method":"GET", - "summary":"Get all compression metadata off heap memory used", - "type":"long", - "nickname":"get_all_compression_metadata_off_heap_memory_used", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/column_family/metrics/speculative_retries/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get speculative retries", - "type":"int", - "nickname":"get_speculative_retries", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/speculative_retries", - "operations":[ - { - "method":"GET", - "summary":"Get all speculative retries", - "type":"int", - "nickname":"get_all_speculative_retries", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/column_family/metrics/key_cache_hit_rate/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get key cache hit rate", - "type":"double", - "nickname":"get_key_cache_hit_rate", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/snapshots_size/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get true snapshots size", - "type":"long", - "nickname":"get_true_snapshots_size", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/true_snapshots_size", - "operations":[ - { - "method":"GET", - "summary":"Get all true snapshots size", - "type":"long", - "nickname":"get_all_true_snapshots_size", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/column_family/metrics/row_cache_hit_out_of_range/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get row cache hit out of range", - "type":"int", - "nickname":"get_row_cache_hit_out_of_range", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/row_cache_hit_out_of_range", - "operations":[ - { - "method":"GET", - "summary":"Get all row cache hit out of range", - "type":"int", - "nickname":"get_all_row_cache_hit_out_of_range", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/column_family/metrics/row_cache_hit/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get row cache hit", - "type":"int", - "nickname":"get_row_cache_hit", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/row_cache_hit", - "operations":[ - { - "method":"GET", - "summary":"Get all row cache hit", - "type":"int", - "nickname":"get_all_row_cache_hit", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/column_family/metrics/row_cache_miss/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get row cache miss", - "type":"int", - "nickname":"get_row_cache_miss", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/row_cache_miss", - "operations":[ - { - "method":"GET", - "summary":"Get all row cache miss", - "type":"int", - "nickname":"get_all_row_cache_miss", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/column_family/metrics/cas_prepare/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get cas prepare", - "type":"int", - "nickname":"get_cas_prepare", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/cas_propose/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get cas propose", - "type":"int", - "nickname":"get_cas_propose", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/cas_commit/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get cas commit", - "type":"int", - "nickname":"get_cas_commit", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/sstables_per_read_histogram/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get sstables per read histogram", - "type":"array", - "items":{ - "type":"double" - }, - "nickname":"get_sstables_per_read_histogram", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/tombstone_scanned_histogram/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get tombstone scanned histogram", - "type":"array", - "items":{ - "type":"double" - }, - "nickname":"get_tombstone_scanned_histogram", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/live_scanned_histogram/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get live scanned histogram", - "type":"array", - "items":{ - "type":"double" - }, - "nickname":"get_live_scanned_histogram", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/col_update_time_delta_histogram/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get col update time delta histogram", - "type":"array", - "items":{ - "type":"double" - }, - "nickname":"get_col_update_time_delta_histogram", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path": "/column_family/metrics/coordinator/read", - "operations": [ - { - "method": "GET", - "summary": "Get coordinator read latency", - "$ref": "#/utils/histogram", - "nickname": "get_coordinator_read_latency", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path": "/column_family/metrics/coordinator/scan", - "operations": [ - { - "method": "GET", - "summary": "Get coordinator scan latency", - "$ref": "#/utils/histogram", - "nickname": "get_coordinator_scan_latency", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path": "/column_family/metrics/waiting_on_free_memtable", - "operations": [ - { - "method": "GET", - "summary": "Get waiting on free memtable space", - "$ref": "#/utils/histogram", - "nickname": "get_waiting_on_free_memtable_space", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path":"/column_family/metrics/read_latency/estimated_recent_histogram/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get read latency", - "$ref":"#/utils/estimated_histogram", - "nickname":"get_read_latency_estimated_recent_histogram", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/read_latency/estimated_histogram/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get read latency", - "$ref":"#/utils/estimated_histogram", - "nickname":"get_read_latency_estimated_histogram", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/range_latency/estimated_recent_histogram/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get range latency", - "$ref":"#/utils/estimated_histogram", - "nickname":"get_range_latency_estimated_recent_histogram", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/range_latency/estimated_histogram/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get range latency", - "$ref":"#/utils/estimated_histogram", - "nickname":"get_range_latency_estimated_histogram", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/write_latency/estimated_recent_histogram/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get write latency", - "$ref":"#/utils/estimated_histogram", - "nickname":"get_write_latency_estimated_recent_histogram", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/write_latency/estimated_histogram/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get write latency", - "$ref":"#/utils/estimated_histogram", - "nickname":"get_write_latency_estimated_histogram", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/cas_prepare/estimated_recent_histogram/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get cas prepare", - "$ref":"#/utils/estimated_histogram", - "nickname":"get_cas_prepare_estimated_recent_histogram", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/cas_prepare/estimated_histogram/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get cas prepare", - "$ref":"#/utils/estimated_histogram", - "nickname":"get_cas_prepare_estimated_histogram", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/cas_propose/estimated_recent_histogram/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get cas propose", - "$ref":"#/utils/estimated_histogram", - "nickname":"get_cas_propose_estimated_recent_histogram", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/cas_propose/estimated_histogram/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get cas propose", - "$ref":"#/utils/estimated_histogram", - "nickname":"get_cas_propose_estimated_histogram", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/cas_commit/estimated_recent_histogram/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get cas commit", - "$ref":"#/utils/estimated_histogram", - "nickname":"get_cas_commit_estimated_recent_histogram", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/column_family/metrics/cas_commit/estimated_histogram/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get cas commit", - "$ref":"#/utils/estimated_histogram", - "nickname":"get_cas_commit_estimated_histogram", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The column family name in keyspace:name format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - } - ], - "models":{ - "mapper":{ - "id":"mapper", - "description":"A key value mapping", - "properties":{ - "key":{ - "type":"string", - "description":"The key" - }, - "value":{ - "type":"string", - "description":"The value" - } - } - }, - "column_family_info":{ - "id":"column_family_info", - "description":"Information about column family", - "properties":{ - "ks":{ - "type":"string", - "description":"The Keyspace" - }, - "cf":{ - "type":"string", - "description":"The column family" - }, - "type":{ - "type":"string", - "description":"The column family type" - } - } - } - } -} diff --git a/scylla/api/api-doc/commitlog.json b/scylla/api/api-doc/commitlog.json deleted file mode 100644 index bc156e7..0000000 --- a/scylla/api/api-doc/commitlog.json +++ /dev/null @@ -1,149 +0,0 @@ -{ - "apiVersion":"0.0.1", - "swaggerVersion":"1.2", - "basePath":"{{Protocol}}://{{Host}}", - "resourcePath":"/commitlog", - "produces":[ - "application/json" - ], - "apis":[ - { - "path":"/commitlog/recover/{path}", - "operations":[ - { - "method":"POST", - "summary":"Recover a single file", - "type":"void", - "nickname":"commitlog_recover", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"path", - "description":"Full path of file or directory", - "required":true, - "allowMultiple":true, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/commitlog/segments/active", - "operations":[ - { - "method":"GET", - "summary":"file names (not full paths) of active commit log segments (segments containing unflushed data)", - "type":"array", - "items":{ - "type":"string" - }, - "nickname":"get_active_segment_names", - "produces":[ - "application/json" - ], - "parameters":[ - - ] - } - ] - }, - { - "path":"/commitlog/segments/archiving", - "operations":[ - { - "method":"GET", - "summary":"Returns files which are pending for archival attempt. Does NOT include failed archive attempts", - "type":"array", - "items":{ - "type":"string" - }, - "nickname":"get_archiving_segment_names", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path": "/commitlog/metrics/completed_tasks", - "operations": [ - { - "method": "GET", - "summary": "Get completed tasks", - "type": "long", - "nickname": "get_completed_tasks", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path": "/commitlog/metrics/pending_tasks", - "operations": [ - { - "method": "GET", - "summary": "Get pending tasks", - "type": "long", - "nickname": "get_pending_tasks", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path": "/commitlog/metrics/total_commit_log_size", - "operations": [ - { - "method": "GET", - "summary": "Get total commit log size", - "type": "long", - "nickname": "get_total_commit_log_size", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path": "/commit_log/metrics/waiting_on_segment_allocation", - "operations": [ - { - "method": "GET", - "summary": "Get waiting on segment allocation", - "$ref": "#/utils/histogram", - "nickname": "get_waiting_on_segment_allocation", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path": "/commit_log/metrics/waiting_on_commit", - "operations": [ - { - "method": "GET", - "summary": "Get waiting on commit", - "$ref": "#/utils/histogram", - "nickname": "get_waiting_on_commit", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - } - ] -} diff --git a/scylla/api/api-doc/compaction_manager.json b/scylla/api/api-doc/compaction_manager.json deleted file mode 100644 index edbd652..0000000 --- a/scylla/api/api-doc/compaction_manager.json +++ /dev/null @@ -1,285 +0,0 @@ -{ - "apiVersion":"0.0.1", - "swaggerVersion":"1.2", - "basePath":"{{Protocol}}://{{Host}}", - "resourcePath":"/compaction_manager", - "produces":[ - "application/json" - ], - "apis":[ - { - "path":"/compaction_manager/compactions", - "operations":[ - { - "method":"GET", - "summary":"get List of running compactions", - "type":"array", - "items":{ - "type":"summary" - }, - "nickname":"get_compactions", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/compaction_manager/compaction_history", - "operations":[ - { - "method":"GET", - "summary":"get List of the compaction history", - "type":"array", - "items":{ - "type":"history" - }, - "nickname":"get_compaction_history", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/compaction_manager/compaction_info", - "operations":[ - { - "method":"GET", - "summary":"get a list of all active compaction info", - "type":"array", - "items":{ - "type":"compaction_info" - }, - "nickname":"get_compaction_info", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/compaction_manager/force_user_defined_compaction", - "operations":[ - { - "method":"POST", - "summary":"Triggers the compaction of user specified sstables. You can specify files from various keyspaces and columnfamilies. If you do so, user defined compaction is performed several times to the groups of files in the same keyspace/columnfamily. must contain keyspace and columnfamily name in path(for 2.1+) or file name itself.", - "type":"void", - "nickname":"force_user_defined_compaction", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"data_files", - "description":"a comma separated list of sstable file to compact. must contain keyspace and columnfamily name in path(for 2.1+) or file name itself", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/compaction_manager/stop_compaction", - "operations":[ - { - "method":"POST", - "summary":"Stop all running compaction-like tasks having the provided type", - "type":"void", - "nickname":"stop_compaction", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"type", - "description":"the type of compaction to stop. Can be one of: - COMPACTION - VALIDATION - CLEANUP - SCRUB - INDEX_BUILD", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"query" - } - ] - } - ] - }, - { - "path": "/compaction_manager/metrics/pending_tasks", - "operations": [ - { - "method": "GET", - "summary": "Get pending tasks", - "type": "int", - "nickname": "get_pending_tasks", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path": "/compaction_manager/metrics/completed_tasks", - "operations": [ - { - "method": "GET", - "summary": "Get completed tasks", - "type": "long", - "nickname": "get_completed_tasks", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path": "/compaction_manager/metrics/total_compactions_completed", - "operations": [ - { - "method": "GET", - "summary": "Get total compactions completed", - "type": "long", - "nickname": "get_total_compactions_completed", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path": "/compaction_manager/metrics/bytes_compacted", - "operations": [ - { - "method": "GET", - "summary": "Get bytes compacted", - "type": "int", - "nickname": "get_bytes_compacted", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - } - ], - "models":{ - "row_merged":{ - "id":"row_merged", - "description":"A row merged information", - "properties":{ - "key":{ - "type":"int", - "description":"The number of sstable" - }, - "value":{ - "type":"long", - "description":"The number or row compacted" - } - } - }, - "compaction_info" :{ - "id": "compaction_info", - "description":"A key value mapping", - "properties":{ - "operation_type":{ - "type":"string", - "description":"The operation type" - }, - "completed":{ - "type":"long", - "description":"The current completed" - }, - "total":{ - "type":"long", - "description":"The total to compact" - }, - "unit":{ - "type":"string", - "description":"The compacted unit" - } - } - }, - "summary":{ - "id":"summary", - "description":"A compaction summary object", - "properties":{ - "id":{ - "type":"string", - "description":"The UUID" - }, - "ks":{ - "type":"string", - "description":"The keyspace name" - }, - "cf":{ - "type":"string", - "description":"The column family name" - }, - "completed":{ - "type":"long", - "description":"The number of units completed" - }, - "total":{ - "type":"long", - "description":"The total number of units" - }, - "task_type":{ - "type":"string", - "description":"The task compaction type" - }, - "unit":{ - "type":"string", - "description":"The units being used" - } - } - }, - "history": { - "id":"history", - "description":"Compaction history information", - "properties":{ - "id":{ - "type":"string", - "description":"The UUID" - }, - "cf":{ - "type":"string", - "description":"The column family name" - }, - "ks":{ - "type":"string", - "description":"The keyspace name" - }, - "compacted_at":{ - "type":"long", - "description":"The time of compaction" - }, - "bytes_in":{ - "type":"long", - "description":"Bytes in" - }, - "bytes_out":{ - "type":"long", - "description":"Bytes out" - }, - "rows_merged":{ - "type":"array", - "items":{ - "type":"row_merged" - }, - "description":"The merged rows" - } - } - } - } -} diff --git a/scylla/api/api-doc/endpoint_snitch_info.json b/scylla/api/api-doc/endpoint_snitch_info.json deleted file mode 100644 index d5c51c5..0000000 --- a/scylla/api/api-doc/endpoint_snitch_info.json +++ /dev/null @@ -1,75 +0,0 @@ -{ - "apiVersion":"0.0.1", - "swaggerVersion":"1.2", - "basePath":"{{Protocol}}://{{Host}}", - "resourcePath":"/snitch", - "produces":[ - "application/json" - ], - "apis":[ - { - "path":"/snitch/datacenter", - "operations":[ - { - "method":"GET", - "summary":"Provides the Datacenter name depending on the respective snitch used, given the hostname/ip", - "type":"string", - "nickname":"get_datacenter", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"host", - "description":"The host name. If absent, the local server broadcast/listen address is used", - "required":false, - "allowMultiple":false, - "type":"string", - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/snitch/rack", - "operations":[ - { - "method":"GET", - "summary":"Provides the Rack name depending on the respective snitch used, given the host name/ip", - "type":"string", - "nickname":"get_rack", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"host", - "description":"The host name. If absent, the local server broadcast/listen address is used", - "required":false, - "allowMultiple":false, - "type":"string", - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/snitch/name", - "operations":[ - { - "method":"GET", - "summary":"Provides the snitch name of the cluster", - "type":"string", - "nickname":"get_snitch_name", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - } - ] -} \ No newline at end of file diff --git a/scylla/api/api-doc/failure_detector.json b/scylla/api/api-doc/failure_detector.json deleted file mode 100644 index be88746..0000000 --- a/scylla/api/api-doc/failure_detector.json +++ /dev/null @@ -1,240 +0,0 @@ -{ - "apiVersion":"0.0.1", - "swaggerVersion":"1.2", - "basePath":"{{Protocol}}://{{Host}}", - "resourcePath":"/failure_detector", - "produces":[ - "application/json" - ], - "apis":[ - { - "path":"/failure_detector/phi", - "operations":[ - { - "method":"GET", - "summary":"Get the phi convict threshold", - "type":"string", - "nickname":"get_phi_convict_threshold", - "produces":[ - "application/json" - ], - "parameters":[ - ] - }, - { - "method":"POST", - "summary":"Set the phi convict threshold", - "type":"double", - "nickname":"set_phi_convict_threshold", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"phi", - "description":"The new phi value", - "required":true, - "allowMultiple":false, - "type":"double", - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/failure_detector/endpoint_phi_values", - "operations":[ - { - "method":"GET", - "summary":"Get end point phi values", - "type":"array", - "items":{ - "type":"endpoint_phi_values" - }, - "nickname":"get_endpoint_phi_values", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/failure_detector/endpoints/", - "operations":[ - { - "method":"GET", - "summary":"Get all endpoint states", - "type":"array", - "items":{ - "type":"endpoint_state" - }, - "nickname":"get_all_endpoint_states", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/failure_detector/endpoints/states/{addr}", - "operations":[ - { - "method":"GET", - "summary":"Get endpoint states", - "type":"string", - "nickname":"get_endpoint_state", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"addr", - "description":"The endpoint address", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/failure_detector/count/endpoint/down", - "operations":[ - { - "method":"GET", - "summary":"Get count down endpoint", - "type":"int", - "nickname":"get_down_endpoint_count", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/failure_detector/count/endpoint/up", - "operations":[ - { - "method":"GET", - "summary":"Get count up endpoint", - "type":"int", - "nickname":"get_up_endpoint_count", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/failure_detector/simple_states", - "operations":[ - { - "method":"GET", - "summary":"Get simple_states", - "type":"array", - "items":{ - "type":"mapper" - }, - "nickname":"get_simple_states", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - } - ], - "models" : { - "mapper": { - "id": "mapper", - "description": "Holds a key value", - "properties": { - "key": { - "type": "string", - "description": "The key" - }, - "value": { - "type": "string", - "description": "The value" - } - } - }, - "endpoint_state": { - "id": "states", - "description": "Holds an endpoint state", - "properties": { - "addrs": { - "type": "string", - "description": "The endpoint address" - }, - "generation": { - "type": "int", - "description": "The heart beat generation" - }, - "version": { - "type": "int", - "description": "The heart beat version" - }, - "update_time": { - "type": "long", - "description": "The update timestamp" - }, - "is_alive": { - "type": "boolean", - "description": "Is the endpoint alive" - }, - "application_state" : { - "type":"array", - "items":{ - "type":"version_value" - }, - "description": "Is the endpoint alive" - } - } - }, - "version_value": { - "id": "version_value", - "description": "Holds a version value for an application state", - "properties": { - "application_state": { - "type": "int", - "description": "The application state enum index" - }, - "value": { - "type": "string", - "description": "The version value" - }, - "version": { - "type": "int", - "description": "The application state version" - } - } - }, - "endpoint_phi_value": { - "id" : "endpoint_phi_value", - "description": "Holds phi value for a single end point", - "properties": { - "phi": { - "type": "double", - "description": "Phi value" - }, - "endpoint": { - "type": "string", - "description": "end point address" - } - } - } - } -} diff --git a/scylla/api/api-doc/gossiper.json b/scylla/api/api-doc/gossiper.json deleted file mode 100644 index f4c162b..0000000 --- a/scylla/api/api-doc/gossiper.json +++ /dev/null @@ -1,153 +0,0 @@ -{ - "apiVersion":"0.0.1", - "swaggerVersion":"1.2", - "basePath":"{{Protocol}}://{{Host}}", - "resourcePath":"/gossiper", - "produces":[ - "application/json" - ], - "apis":[ - { - "path":"/gossiper/endpoint/down/", - "operations":[ - { - "method":"GET", - "summary":"Get the addreses of the down endpoints", - "type":"array", - "items":{ - "type":"string" - }, - "nickname":"get_down_endpoint", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/gossiper/endpoint/live/", - "operations":[ - { - "method":"GET", - "summary":"Get the addreses of live endpoints", - "type":"array", - "items":{ - "type":"string" - }, - "nickname":"get_live_endpoint", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/gossiper/downtime/{addr}", - "operations":[ - { - "method":"GET", - "summary":"Get the downtime of an end point", - "type":"long", - "nickname":"get_endpoint_downtime", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"addr", - "description":"The endpoint address", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/gossiper/generation_number/{addr}", - "operations":[ - { - "method":"GET", - "summary":"Returns files which are pending for archival attempt. Does NOT include failed archive attempts", - "type":"int", - "nickname":"get_current_generation_number", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"addr", - "description":"The endpoint address", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/gossiper/heart_beat_version/{addr}", - "operations":[ - { - "method":"GET", - "summary":"Get heart beat version for a node", - "type":"int", - "nickname":"get_current_heart_beat_version", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"addr", - "description":"The endpoint address", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/gossiper/assassinate/{addr}", - "operations":[ - { - "method":"POST", - "summary":"Assassinate an end point", - "type":"void", - "nickname":"assassinate_endpoint", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"addr", - "description":"The endpoint address", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - }, - { - "name":"unsafe", - "description":"Set to True to perform an unsafe assassination", - "required":false, - "allowMultiple":false, - "type":"boolean", - "paramType":"query" - } - ] - } - ] - } - ] -} diff --git a/scylla/api/api-doc/hinted_handoff.json b/scylla/api/api-doc/hinted_handoff.json deleted file mode 100644 index c5d49b1..0000000 --- a/scylla/api/api-doc/hinted_handoff.json +++ /dev/null @@ -1,145 +0,0 @@ -{ - "apiVersion":"0.0.1", - "swaggerVersion":"1.2", - "basePath":"{{Protocol}}://{{Host}}", - "resourcePath":"/hinted_hand_off_manager", - "produces":[ - "application/json" - ], - "apis":[ - { - "path":"/hinted_handoff/hints", - "operations":[ - { - "method":"GET", - "summary":"List all the endpoints that this node has hints for.", - "type":"array", - "items":{ - "type":"string" - }, - "nickname":"list_endpoints_pending_hints", - "produces":[ - "application/json" - ], - "parameters":[ - ] - }, - { - "method":"DELETE", - "summary":"Truncate all the hints", - "type":"void", - "nickname":"truncate_all_hints", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"host", - "description":"Optional String rep. of endpoint address to delete hints for", - "required":false, - "allowMultiple":false, - "type":"string", - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/hinted_handoff/schedule", - "operations":[ - { - "method":"POST", - "summary":"force hint delivery to an endpoint", - "type":"void", - "nickname":"schedule_hint_delivery", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"host", - "description":" String rep. of endpoint address", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/hinted_handoff/pause", - "operations":[ - { - "method":"POST", - "summary":"pause hints delivery", - "type":"void", - "nickname":"pause_hints_delivery", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"pause", - "description":"pause status", - "required":true, - "allowMultiple":false, - "type":"boolean", - "paramType":"query" - } - ] - } - ] - }, - { - "path": "/hinted_handoff/metrics/create_hint/{addr}", - "operations": [ - { - "method": "GET", - "summary": "Get create hint count", - "type": "int", - "nickname": "get_create_hint_count", - "produces": [ - "application/json" - ], - "parameters": [ - { - "name": "addr", - "description": "The peer address", - "required": true, - "allowMultiple": false, - "type": "string", - "paramType": "path" - } - ] - } - ] - }, - { - "path": "/hinted_handoff/metrics/not_stored_hints/{addr}", - "operations": [ - { - "method": "GET", - "summary": "Get not stored hints count", - "type": "int", - "nickname": "get_not_stored_hints_count", - "produces": [ - "application/json" - ], - "parameters": [ - { - "name": "addr", - "description": "The peer address", - "required": true, - "allowMultiple": false, - "type": "string", - "paramType": "path" - } - ] - } - ] - } - ] -} diff --git a/scylla/api/api-doc/lsa.json b/scylla/api/api-doc/lsa.json deleted file mode 100644 index a33cfdd..0000000 --- a/scylla/api/api-doc/lsa.json +++ /dev/null @@ -1,29 +0,0 @@ -{ - "apiVersion":"0.0.1", - "swaggerVersion":"1.2", - "basePath":"{{Protocol}}://{{Host}}", - "resourcePath":"/lsa", - "produces":[ - "application/json" - ], - "apis":[ - { - "path":"/lsa/compact", - "operations":[ - { - "method":"POST", - "summary":"Force compaction of all regions", - "type":"void", - "nickname":"lsa_compact", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - } - ], - "models":{ - } -} diff --git a/scylla/api/api-doc/messaging_service.json b/scylla/api/api-doc/messaging_service.json deleted file mode 100644 index 248ce6e..0000000 --- a/scylla/api/api-doc/messaging_service.json +++ /dev/null @@ -1,262 +0,0 @@ -{ - "apiVersion":"0.0.1", - "swaggerVersion":"1.2", - "basePath":"{{Protocol}}://{{Host}}", - "resourcePath":"/messaging_service", - "produces":[ - "application/json" - ], - "apis":[ - { - "path":"/messaging_service/messages/timeout", - "operations":[ - { - "method":"GET", - "summary":"Get the number of timeout messages", - "type":"array", - "items":{ - "type":"message_counter" - }, - "nickname":"get_timeout_messages", - "produces":[ - "application/json" - ], - "parameters":[ - - ] - } - ] - }, - { - "path":"/messaging_service/messages/dropped_by_ver", - "operations":[ - { - "method":"GET", - "summary":"Get the number of dropped messages per verb", - "type":"array", - "items":{ - "type":"verb_counter" - }, - "nickname":"get_dropped_messages_by_ver", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/messaging_service/messages/dropped", - "operations":[ - { - "method":"GET", - "summary":"Get the number of messages that were dropped before sending", - "type":"array", - "items":{ - "type":"message_counter" - }, - "nickname":"get_dropped_messages", - "produces":[ - "application/json" - ], - "parameters":[ - - ] - } - ] - }, - { - "path":"/messaging_service/messages/replied", - "operations":[ - { - "method":"GET", - "summary":"Get the number of replied messages", - "type":"array", - "items":{ - "type":"message_counter" - }, - "nickname":"get_completed_messages", - "produces":[ - "application/json" - ], - "parameters":[ - - ] - } - ] - }, - { - "path":"/messaging_service/messages/sent", - "operations":[ - { - "method":"GET", - "summary":"Get the number of sent messages", - "type":"array", - "items":{ - "type":"message_counter" - }, - "nickname":"get_sent_messages", - "produces":[ - "application/json" - ], - "parameters":[ - - ] - } - ] - }, - { - "path":"/messaging_service/messages/pending", - "operations":[ - { - "method":"GET", - "summary":"Get the number of pending messages", - "type":"array", - "items":{ - "type":"message_counter" - }, - "nickname":"get_pending_messages", - "produces":[ - "application/json" - ], - "parameters":[ - - ] - } - ] - }, - { - "path":"/messaging_service/messages/exception", - "operations":[ - { - "method":"GET", - "summary":"Get the number of messages return with an exception", - "type":"array", - "items":{ - "type":"message_counter" - }, - "nickname":"get_exception_messages", - "produces":[ - "application/json" - ], - "parameters":[ - - ] - } - ] - }, - { - "path":"/messaging_service/messages/respond_pending", - "operations":[ - { - "method":"GET", - "summary":"Get the number of messages waiting for respond", - "type":"array", - "items":{ - "type":"message_counter" - }, - "nickname":"get_respond_pending_messages", - "produces":[ - "application/json" - ], - "parameters":[ - - ] - } - ] - }, - { - "path":"/messaging_service/messages/respond_completed", - "operations":[ - { - "method":"GET", - "summary":"Get the number of completed respond messages", - "type":"array", - "items":{ - "type":"message_counter" - }, - "nickname":"get_respond_completed_messages", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/messaging_service/version", - "operations":[ - { - "method":"GET", - "summary":"Get the version number", - "type":"int", - "nickname":"get_version", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"addr", - "description":"Address", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"query" - } - ] - } - ] - } - ], - "models":{ - "message_counter":{ - "id":"message_counter", - "description":"Holds command counters", - "properties":{ - "value":{ - "type":"long" - }, - "key":{ - "type":"string" - } - } - }, - "verb_counter":{ - "id":"verb_counters", - "description":"Holds verb counters", - "properties":{ - "count":{ - "type":"long" - }, - "verb":{ - "type":"string", - "enum":[ - "CLIENT_ID", - "MUTATION", - "MUTATION_DONE", - "READ_DATA", - "READ_MUTATION_DATA", - "READ_DIGEST", - "GOSSIP_ECHO", - "GOSSIP_DIGEST_SYN", - "GOSSIP_DIGEST_ACK2", - "GOSSIP_SHUTDOWN", - "DEFINITIONS_UPDATE", - "TRUNCATE", - "REPLICATION_FINISHED", - "MIGRATION_REQUEST", - "PREPARE_MESSAGE", - "PREPARE_DONE_MESSAGE", - "STREAM_MUTATION", - "STREAM_MUTATION_DONE", - "COMPLETE_MESSAGE", - "REPAIR_CHECKSUM_RANGE", - "GET_SCHEMA_VERSION" - ] - } - } - } - } -} diff --git a/scylla/api/api-doc/storage_proxy.json b/scylla/api/api-doc/storage_proxy.json deleted file mode 100644 index e0c107e..0000000 --- a/scylla/api/api-doc/storage_proxy.json +++ /dev/null @@ -1,1075 +0,0 @@ -{ - "apiVersion":"0.0.1", - "swaggerVersion":"1.2", - "basePath":"{{Protocol}}://{{Host}}", - "resourcePath":"/storage_proxy", - "produces":[ - "application/json" - ], - "apis":[ - { - "path":"/storage_proxy/total_hints", - "operations":[ - { - "method":"GET", - "summary":"Get total hints", - "type":"long", - "nickname":"get_total_hints", - "produces":[ - "application/json" - ], - "parameters":[ - - ] - } - ] - }, - { - "path":"/storage_proxy/hinted_handoff_enabled", - "operations":[ - { - "method":"GET", - "summary":"Return true if hinted handoff enabled", - "type":"boolean", - "nickname":"get_hinted_handoff_enabled", - "produces":[ - "application/json" - ], - "parameters":[ - - ] - }, - { - "method":"POST", - "summary":"Set hinted handoff status", - "type":"void", - "nickname":"set_hinted_handoff_enabled", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"enable", - "description":"Set to true to enable hinted handoff", - "required":true, - "allowMultiple":false, - "type":"boolean", - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/storage_proxy/hinted_handoff_enabled_by_dc", - "operations":[ - { - "method":"GET", - "summary":"Get the hinted handoff enabled by dc", - "type":"array", - "items":{ - "type":"mapper_list" - }, - "nickname":"get_hinted_handoff_enabled_by_dc", - "produces":[ - "application/json" - ], - "parameters":[ - - ] - }, - { - "method":"POST", - "summary":"Set the hinted handoff enabled by dc", - "type":"void", - "nickname":"set_hinted_handoff_enabled_by_dc_list", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"dcs", - "description":"The dcs to enable in the CSV format", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/storage_proxy/max_hint_window", - "operations":[ - { - "method":"GET", - "summary":"Get the max hint window", - "type":"int", - "nickname":"get_max_hint_window", - "produces":[ - "application/json" - ], - "parameters":[ - - ] - }, - { - "method":"POST", - "summary":"Set the max hint window", - "type":"void", - "nickname":"set_max_hint_window", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"ms", - "description":"max hint window in ms", - "required":true, - "allowMultiple":false, - "type":"int", - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/storage_proxy/max_hints_in_progress", - "operations":[ - { - "method":"GET", - "summary":"Get max hints in progress", - "type":"int", - "nickname":"get_max_hints_in_progress", - "produces":[ - "application/json" - ], - "parameters":[ - - ] - }, - { - "method":"POST", - "summary":"Set max hints in progress", - "type":"void", - "nickname":"set_max_hints_in_progress", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"qs", - "description":"max hints in progress", - "required":true, - "allowMultiple":false, - "type":"int", - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/storage_proxy/hints_in_progress", - "operations":[ - { - "method":"GET", - "summary":"get hints in progress", - "type":"int", - "nickname":"get_hints_in_progress", - "produces":[ - "application/json" - ], - "parameters":[ - - ] - } - ] - }, - { - "path":"/storage_proxy/rpc_timeout", - "operations":[ - { - "method":"GET", - "summary":"Get the RPC timeout in seconds", - "type":"double", - "nickname":"get_rpc_timeout", - "produces":[ - "application/json" - ], - "parameters":[ - - ] - }, - { - "method":"POST", - "summary":"Set the RPC timeout", - "type":"void", - "nickname":"set_rpc_timeout", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"timeout", - "description":"Timeout in seconds", - "required":true, - "allowMultiple":false, - "type":"double", - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/storage_proxy/read_rpc_timeout", - "operations":[ - { - "method":"GET", - "summary":"Get the read RPC timeout in seconds", - "type":"double", - "nickname":"get_read_rpc_timeout", - "produces":[ - "application/json" - ], - "parameters":[ - - ] - }, - { - "method":"POST", - "summary":"Set the read RPC timeout", - "type":"void", - "nickname":"set_read_rpc_timeout", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"timeout", - "description":"The timeout in second", - "required":true, - "allowMultiple":false, - "type":"double", - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/storage_proxy/write_rpc_timeout", - "operations":[ - { - "method":"GET", - "summary":"Get the write RPC timeout in seconds", - "type":"double", - "nickname":"get_write_rpc_timeout", - "produces":[ - "application/json" - ], - "parameters":[ - - ] - }, - { - "method":"POST", - "summary":"Set the write RPC timeout", - "type":"void", - "nickname":"set_write_rpc_timeout", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"timeout", - "description":"timeout in seconds", - "required":true, - "allowMultiple":false, - "type":"double", - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/storage_proxy/counter_write_rpc_timeout", - "operations":[ - { - "method":"GET", - "summary":"Get counter write rpc timeout in seconds", - "type":"double", - "nickname":"get_counter_write_rpc_timeout", - "produces":[ - "application/json" - ], - "parameters":[ - - ] - }, - { - "method":"POST", - "summary":"Set counter write rpc timeout", - "type":"void", - "nickname":"set_counter_write_rpc_timeout", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"timeout", - "description":"timeout in seconds", - "required":true, - "allowMultiple":false, - "type":"double", - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/storage_proxy/cas_contention_timeout", - "operations":[ - { - "method":"GET", - "summary":"Get CAS contention timeout in seconds", - "type":"double", - "nickname":"get_cas_contention_timeout", - "produces":[ - "application/json" - ], - "parameters":[ - - ] - }, - { - "method":"POST", - "summary":"Set CAS contention timeout", - "type":"void", - "nickname":"set_cas_contention_timeout", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"timeout", - "description":"timeout in second", - "required":true, - "allowMultiple":false, - "type":"double", - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/storage_proxy/range_rpc_timeout", - "operations":[ - { - "method":"GET", - "summary":"Get range rpc timeout in seconds", - "type":"double", - "nickname":"get_range_rpc_timeout", - "produces":[ - "application/json" - ], - "parameters":[ - - ] - }, - { - "method":"POST", - "summary":"Set range rpc timeout", - "type":"void", - "nickname":"set_range_rpc_timeout", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"timeout", - "description":"timeout in second", - "required":true, - "allowMultiple":false, - "type":"double", - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/storage_proxy/truncate_rpc_timeout", - "operations":[ - { - "method":"GET", - "summary":"Get truncate rpc timeout in seconds", - "type":"double", - "nickname":"get_truncate_rpc_timeout", - "produces":[ - "application/json" - ], - "parameters":[ - - ] - }, - { - "method":"POST", - "summary":"Set truncate rpc timeout", - "type":"void", - "nickname":"set_truncate_rpc_timeout", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"timeout", - "description":"timeout in second", - "required":true, - "allowMultiple":false, - "type":"double", - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/storage_proxy/reload_trigger_classes", - "operations":[ - { - "method":"POST", - "summary":"Reload trigger classes", - "type":"void", - "nickname":"reload_trigger_classes", - "produces":[ - "application/json" - ], - "parameters":[ - - ] - } - ] - }, - { - "path":"/storage_proxy/read_repair_attempted", - "operations":[ - { - "method":"GET", - "summary":"Get read repair attempted", - "type":"long", - "nickname":"get_read_repair_attempted", - "produces":[ - "application/json" - ], - "parameters":[ - - ] - } - ] - }, - { - "path":"/storage_proxy/read_repair_repaired_blocking", - "operations":[ - { - "method":"GET", - "summary":"Get read repair repaired blocking", - "type":"long", - "nickname":"get_read_repair_repaired_blocking", - "produces":[ - "application/json" - ], - "parameters":[ - - ] - } - ] - }, - { - "path":"/storage_proxy/read_repair_repaired_background", - "operations":[ - { - "method":"GET", - "summary":"Get read repair repaired background", - "type":"long", - "nickname":"get_read_repair_repaired_background", - "produces":[ - "application/json" - ], - "parameters":[ - - ] - } - ] - }, - { - "path":"/storage_proxy/schema_versions", - "operations":[ - { - "method":"GET", - "summary":"Get a map of the schema versions", - "type":"array", - "items":{ - "type":"mapper_list" - }, - "nickname":"get_schema_versions", - "produces":[ - "application/json" - ], - "parameters":[ - - ] - } - ] - }, - { - "path":"/storage_proxy/metrics/cas_read/timeouts", - "operations":[ - { - "method":"GET", - "summary":"Get CAS read timeout", - "type":"long", - "nickname":"get_cas_read_timeouts", - "produces":[ - "application/json" - ], - "parameters":[ - - ] - } - ] - }, - { - "path":"/storage_proxy/metrics/cas_read/unavailables", - "operations":[ - { - "method":"GET", - "summary":"Get CAS read unavailables", - "type":"long", - "nickname":"get_cas_read_unavailables", - "produces":[ - "application/json" - ], - "parameters":[ - - ] - } - ] - }, - { - "path":"/storage_proxy/metrics/cas_write/timeouts", - "operations":[ - { - "method":"GET", - "summary":"Get CAS write timeout", - "type":"long", - "nickname":"get_cas_write_timeouts", - "produces":[ - "application/json" - ], - "parameters":[ - - ] - } - ] - }, - { - "path":"/storage_proxy/metrics/cas_write/unavailables", - "operations":[ - { - "method":"GET", - "summary":"Get CAS write unavailables", - "type":"long", - "nickname":"get_cas_write_unavailables", - "produces":[ - "application/json" - ], - "parameters":[ - - ] - } - ] - }, - { - "path": "/storage_proxy/metrics/cas_write/unfinished_commit", - "operations": [ - { - "method": "GET", - "summary": "Get cas write metrics", - "type": "int", - "nickname": "get_cas_write_metrics_unfinished_commit", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path": "/storage_proxy/metrics/cas_write/contention", - "operations": [ - { - "method": "GET", - "summary": "Get cas write metrics", - "type": "array", - "nickname": "get_cas_write_metrics_contention", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path": "/storage_proxy/metrics/cas_write/condition_not_met", - "operations": [ - { - "method": "GET", - "summary": "Get cas write metrics", - "type": "int", - "nickname": "get_cas_write_metrics_condition_not_met", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path": "/storage_proxy/metrics/cas_read/unfinished_commit", - "operations": [ - { - "method": "GET", - "summary": "Get cas read metrics", - "type": "int", - "nickname": "get_cas_read_metrics_unfinished_commit", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path": "/storage_proxy/metrics/cas_read/contention", - "operations": [ - { - "method": "GET", - "summary": "Get cas read metrics", - "type": "array", - "nickname": "get_cas_read_metrics_contention", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path": "/storage_proxy/metrics/cas_read/condition_not_met", - "operations": [ - { - "method": "GET", - "summary": "Get cas read metrics", - "type": "int", - "nickname": "get_cas_read_metrics_condition_not_met", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path": "/storage_proxy/metrics/read/timeouts", - "operations": [ - { - "method": "GET", - "summary": "Get read metrics", - "type": "int", - "nickname": "get_read_metrics_timeouts", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path": "/storage_proxy/metrics/read/unavailables", - "operations": [ - { - "method": "GET", - "summary": "Get read metrics", - "type": "int", - "nickname": "get_read_metrics_unavailables", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path": "/storage_proxy/metrics/read/timeouts_rates", - "operations": [ - { - "method": "GET", - "summary": "Get read metrics rates", - "type": "#/utils/rate_moving_average", - "nickname": "get_read_metrics_timeouts_rates", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path": "/storage_proxy/metrics/read/unavailables_rates", - "operations": [ - { - "method": "GET", - "summary": "Get read metrics rates", - "type": "#/utils/rate_moving_average", - "nickname": "get_read_metrics_unavailables_rates", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path": "/storage_proxy/metrics/read/histogram", - "operations": [ - { - "method": "GET", - "summary": "Get read metrics", - "$ref": "#/utils/histogram", - "nickname": "get_read_metrics_latency_histogram_depricated", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path": "/storage_proxy/metrics/range/histogram", - "operations": [ - { - "method": "GET", - "summary": "Get range metrics", - "$ref": "#/utils/histogram", - "nickname": "get_range_metrics_latency_histogram_depricated", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path": "/storage_proxy/metrics/read/moving_average_histogram", - "operations": [ - { - "method": "GET", - "summary": "Get read metrics", - "$ref": "#/utils/rate_moving_average_and_histogram", - "nickname": "get_read_metrics_latency_histogram", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path": "/storage_proxy/metrics/range/moving_average_histogram", - "operations": [ - { - "method": "GET", - "summary": "Get range metrics rate and histogram", - "$ref": "#/utils/rate_moving_average_and_histogram", - "nickname": "get_range_metrics_latency_histogram", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path": "/storage_proxy/metrics/range/timeouts", - "operations": [ - { - "method": "GET", - "summary": "Get range metrics", - "type": "int", - "nickname": "get_range_metrics_timeouts", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path": "/storage_proxy/metrics/range/unavailables", - "operations": [ - { - "method": "GET", - "summary": "Get range metrics", - "type": "int", - "nickname": "get_range_metrics_unavailables", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path": "/storage_proxy/metrics/range/timeouts_rates", - "operations": [ - { - "method": "GET", - "summary": "Get range metrics rates", - "type": "#/utils/rate_moving_average", - "nickname": "get_range_metrics_timeouts_rates", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path": "/storage_proxy/metrics/range/unavailables_rates", - "operations": [ - { - "method": "GET", - "summary": "Get range metrics rates", - "type": "#/utils/rate_moving_average", - "nickname": "get_range_metrics_unavailables_rates", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path": "/storage_proxy/metrics/write/timeouts", - "operations": [ - { - "method": "GET", - "summary": "Get write metrics", - "type": "int", - "nickname": "get_write_metrics_timeouts", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path": "/storage_proxy/metrics/write/unavailables", - "operations": [ - { - "method": "GET", - "summary": "Get write metrics", - "type": "int", - "nickname": "get_write_metrics_unavailables", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path": "/storage_proxy/metrics/write/timeouts_rates", - "operations": [ - { - "method": "GET", - "summary": "Get write metrics rates", - "type": "#/utils/rate_moving_average", - "nickname": "get_write_metrics_timeouts_rates", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path": "/storage_proxy/metrics/write/unavailables_rates", - "operations": [ - { - "method": "GET", - "summary": "Get write metrics rates", - "type": "#/utils/rate_moving_average", - "nickname": "get_write_metrics_unavailables_rates", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path": "/storage_proxy/metrics/write/histogram", - "operations": [ - { - "method": "GET", - "summary": "Get write metrics", - "$ref": "#/utils/histogram", - "nickname": "get_write_metrics_latency_histogram_depricated", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path": "/storage_proxy/metrics/write/moving_average_histogram", - "operations": [ - { - "method": "GET", - "summary": "Get write metrics", - "$ref": "#/utils/rate_moving_average_and_histogram", - "nickname": "get_write_metrics_latency_histogram", - "produces": [ - "application/json" - ], - "parameters": [] - } - ] - }, - { - "path":"/storage_proxy/metrics/read/estimated_histogram/", - "operations":[ - { - "method":"GET", - "summary":"Get read estimated latency", - "$ref":"#/utils/estimated_histogram", - "nickname":"get_read_estimated_histogram", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/storage_proxy/metrics/read", - "operations":[ - { - "method":"GET", - "summary":"Get read latency", - "type":"int", - "nickname":"get_read_latency", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/storage_proxy/metrics/write/estimated_histogram/", - "operations":[ - { - "method":"GET", - "summary":"Get write estimated latency", - "$ref":"#/utils/estimated_histogram", - "nickname":"get_write_estimated_histogram", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/storage_proxy/metrics/write", - "operations":[ - { - "method":"GET", - "summary":"Get write latency", - "type":"int", - "nickname":"get_write_latency", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/storage_proxy/metrics/range/estimated_histogram/", - "operations":[ - { - "method":"GET", - "summary":"Get range estimated latency", - "$ref":"#/utils/estimated_histogram", - "nickname":"get_range_estimated_histogram", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/storage_proxy/metrics/range", - "operations":[ - { - "method":"GET", - "summary":"Get range latency", - "type":"int", - "nickname":"get_range_latency", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - } - ], - "models":{ - "mapper_list":{ - "id":"mapper_list", - "description":"Holds a key value which is a list", - "properties":{ - "key":{ - "type":"string", - "description":"The key" - }, - "value":{ - "type":"array", - "items":{ - "type":"string" - }, - "description":"The value" - } - } - } - } -} diff --git a/scylla/api/api-doc/storage_service.json b/scylla/api/api-doc/storage_service.json deleted file mode 100644 index 613872b..0000000 --- a/scylla/api/api-doc/storage_service.json +++ /dev/null @@ -1,2260 +0,0 @@ -{ - "apiVersion":"0.0.1", - "swaggerVersion":"1.2", - "basePath":"{{Protocol}}://{{Host}}", - "resourcePath":"/storage_service", - "produces":[ - "application/json" - ], - "apis":[ - { - "path":"/storage_service/hostid/local", - "operations":[ - { - "method":"GET", - "summary":"Returns the local host id", - "type":"string", - "nickname":"local_hostid", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/storage_service/tokens", - "operations":[ - { - "method":"GET", - "summary":"Returns a list of the tokens for this node", - "type":"array", - "items":{ - "type":"string" - }, - "nickname":"get_tokens", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/storage_service/tokens/{endpoint}", - "operations":[ - { - "method":"GET", - "summary":"Returns a list of the tokens for or a specified node", - "type":"array", - "items":{ - "type":"string" - }, - "nickname":"get_node_tokens", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"endpoint", - "description":"The endpoint", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/storage_service/commitlog", - "operations":[ - { - "method":"GET", - "summary":"Returns the location of the commit log files", - "type":"string", - "nickname":"get_commitlog", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/storage_service/tokens_endpoint", - "operations":[ - { - "method":"GET", - "summary":"Returns a list of the tokens endpoint mapping", - "type":"array", - "items":{ - "type":"mapper" - }, - "nickname":"get_token_endpoint", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/storage_service/nodes/leaving", - "operations":[ - { - "method":"GET", - "summary":"Retrieve the list of nodes currently leaving the ring", - "type":"array", - "items":{ - "type":"string" - }, - "nickname":"get_leaving_nodes", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/storage_service/nodes/moving", - "operations":[ - { - "method":"GET", - "summary":"Retrieve the list of nodes currently moving in the ring", - "type":"array", - "items":{ - "type":"string" - }, - "nickname":"get_moving_nodes", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/storage_service/nodes/joining", - "operations":[ - { - "method":"GET", - "summary":"Retrieve the list of nodes currently bootstrapping into the ring", - "type":"array", - "items":{ - "type":"string" - }, - "nickname":"get_joining_nodes", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/storage_service/release_version", - "operations":[ - { - "method":"GET", - "summary":"Fetch a string representation of the Cassandra version.", - "type":"string", - "nickname":"get_release_version", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/storage_service/scylla_release_version", - "operations":[ - { - "method":"GET", - "summary":"Fetch a string representation of the Scylla version.", - "type":"string", - "nickname":"get_scylla_release_version", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/storage_service/schema_version", - "operations":[ - { - "method":"GET", - "summary":"Fetch a string representation of the current Schema version.", - "type":"string", - "nickname":"get_schema_version", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/storage_service/data_file/locations", - "operations":[ - { - "method":"GET", - "summary":"Get the list of all data file locations from conf", - "type":"array", - "items":{ - "type":"string" - }, - "nickname":"get_all_data_file_locations", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/storage_service/saved_caches/location", - "operations":[ - { - "method":"GET", - "summary":"Get location of the saved caches dir", - "type":"string", - "nickname":"get_saved_caches_location", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/storage_service/range_to_endpoint_map/{keyspace}", - "operations":[ - { - "method":"GET", - "summary":"Retrieve a map of range to end points that describe the ring topology of a Cassandra cluster.", - "type":"array", - "items":{ - "type":"maplist_mapper" - }, - "nickname":"get_range_to_endpoint_map", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"keyspace", - "description":"The keyspace to query about", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - }, - { - "name":"rpc", - "description":"When set to true, return the rpc address", - "required":false, - "allowMultiple":false, - "type":"boolean", - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/storage_service/pending_range/{keyspace}", - "operations":[ - { - "method":"GET", - "summary":"Retrieve a map of pending ranges to endpoints that describe the ring topology", - "type":"array", - "items":{ - "type":"maplist_mapper" - }, - "nickname":"get_pending_range_to_endpoint_map", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"keyspace", - "description":"The keyspace to query about", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/storage_service/describe_ring/", - "operations":[ - { - "method":"GET", - "summary":"The TokenRange for a any keyspace", - "type":"array", - "items":{ - "type":"token_range" - }, - "nickname":"describe_any_ring", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/storage_service/describe_ring/{keyspace}", - "operations":[ - { - "method":"GET", - "summary":"The TokenRange for a given keyspace", - "type":"array", - "items":{ - "type":"token_range" - }, - "nickname":"describe_ring", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"keyspace", - "description":"The keyspace to fetch information about", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/storage_service/ownership/{keyspace}", - "operations":[ - { - "method":"GET", - "summary":"Effective ownership is % of the data each node owns given the keyspace", - "type":"array", - "items":{ - "type":"mapper" - }, - "nickname":"get_effective_ownership", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"keyspace", - "description":"The keyspace to fetch information about", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/storage_service/ownership/", - "operations":[ - { - "method":"GET", - "summary":"The mapping from token -> % of cluster owned by that token", - "type":"array", - "items":{ - "type":"mapper" - }, - "nickname":"get_ownership", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/storage_service/host_id", - "operations":[ - { - "method":"GET", - "summary":"Retrieve the mapping of endpoint to host ID", - "type":"array", - "items":{ - "type":"mapper" - }, - "nickname":"get_host_id_map", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/storage_service/load", - "operations":[ - { - "method":"GET", - "summary":"get load value", - "type":"double", - "nickname":"get_load", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/storage_service/load_map", - "operations":[ - { - "method":"GET", - "summary":"load value. Keys are IP addresses", - "type":"array", - "items":{ - "type":"map_string_double" - }, - "nickname":"get_load_map", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/storage_service/generation_number", - "operations":[ - { - "method":"GET", - "summary":"Return the generation value for this node.", - "type":"int", - "nickname":"get_current_generation_number", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/storage_service/natural_endpoints/{keyspace}", - "operations":[ - { - "method":"GET", - "summary":"This method returns the N endpoints that are responsible for storing the specified key i.e for replication. the endpoint responsible for this key", - "type":"array", - "items":{ - "type":"string" - }, - "nickname":"get_natural_endpoints", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"keyspace", - "description":"The keyspace to query about", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - }, - { - "name":"cf", - "description":"Column family name", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"query" - }, - { - "name":"key", - "description":"key for which we need to find the endpoint return value - the endpoint responsible for this key", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/storage_service/snapshots", - "operations":[ - { - "method":"GET", - "summary":"Get the details of all the snapshot", - "type":"array", - "items":{ - "type":"snapshots" - }, - "nickname":"get_snapshot_details", - "produces":[ - "application/json" - ], - "parameters":[ - ] - }, - { - "method":"POST", - "summary":"Takes the snapshot for the given keyspaces. A snapshot name must be specified.", - "type":"void", - "nickname":"take_snapshot", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"tag", - "description":"the tag given to the snapshot", - "required":false, - "allowMultiple":false, - "type":"string", - "paramType":"query" - }, - { - "name":"kn", - "description":"Comma seperated keyspaces name to snapshot", - "required":false, - "allowMultiple":false, - "type":"string", - "paramType":"query" - }, - { - "name":"cf", - "description":"the column family to snapshot", - "required":false, - "allowMultiple":false, - "type":"string", - "paramType":"query" - } - ] - }, - { - "method":"DELETE", - "summary":"Remove the snapshot with the given name from the given keyspaces. If no tag is specified all snapshots will be removed", - "type":"void", - "nickname":"del_snapshot", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"tag", - "description":"the tag given to the snapshot", - "required":false, - "allowMultiple":false, - "type":"string", - "paramType":"query" - }, - { - "name":"kn", - "description":"Comma seperated keyspaces name to snapshot", - "required":false, - "allowMultiple":false, - "type":"string", - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/storage_service/snapshots/size/true", - "operations":[ - { - "method":"GET", - "summary":"Get the true size taken by all snapshots across all keyspaces.", - "type":"long", - "nickname":"true_snapshots_size", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/storage_service/keyspace_compaction/{keyspace}", - "operations":[ - { - "method":"POST", - "summary":"Forces major compaction of a single keyspace", - "type":"void", - "nickname":"force_keyspace_compaction", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"keyspace", - "description":"The keyspace to query about", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - }, - { - "name":"cf", - "description":"Comma seperated column family names", - "required":false, - "allowMultiple":false, - "type":"string", - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/storage_service/keyspace_cleanup/{keyspace}", - "operations":[ - { - "method":"POST", - "summary":"Trigger a cleanup of keys on a single keyspace", - "type":"int", - "nickname":"force_keyspace_cleanup", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"keyspace", - "description":"The keyspace to query about", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - }, - { - "name":"cf", - "description":"Comma seperated column family names", - "required":false, - "allowMultiple":false, - "type":"string", - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/storage_service/keyspace_scrub/{keyspace}", - "operations":[ - { - "method":"GET", - "summary":"Scrub (deserialize + reserialize at the latest version, skipping bad rows if any) the given keyspace. If columnFamilies array is empty, all CFs are scrubbed. Scrubbed CFs will be snapshotted first, if disableSnapshot is false", - "type":"int", - "nickname":"scrub", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"disable_snapshot", - "description":"When set to true, disable snapshot", - "required":false, - "allowMultiple":false, - "type":"boolean", - "paramType":"query" - }, - { - "name":"skip_corrupted", - "description":"When set to true, skip corrupted", - "required":false, - "allowMultiple":false, - "type":"boolean", - "paramType":"query" - }, - { - "name":"keyspace", - "description":"The keyspace to query about", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - }, - { - "name":"cf", - "description":"Comma seperated column family names", - "required":false, - "allowMultiple":false, - "type":"string", - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/storage_service/keyspace_upgrade_sstables/{keyspace}", - "operations":[ - { - "method":"GET", - "summary":"Rewrite all sstables to the latest version. Unlike scrub, it doesn't skip bad rows and do not snapshot sstables first.", - "type":"int", - "nickname":"upgrade_sstables", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"keyspace", - "description":"The keyspace", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - }, - { - "name":"exclude_current_version", - "description":"When set to true exclude current version", - "required":false, - "allowMultiple":false, - "type":"boolean", - "paramType":"query" - }, - { - "name":"cf", - "description":"Comma seperated column family names", - "required":false, - "allowMultiple":false, - "type":"string", - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/storage_service/keyspace_flush/{keyspace}", - "operations":[ - { - "method":"POST", - "summary":"Flush all memtables for the given column families, or all columnfamilies for the given keyspace if none are explicitly listed.", - "type":"void", - "nickname":"force_keyspace_flush", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"keyspace", - "description":"The keyspace to flush", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - }, - { - "name":"cf", - "description":"Comma seperated column family names", - "required":false, - "allowMultiple":false, - "type":"string", - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/storage_service/repair_async/{keyspace}", - "operations":[ - { - "method":"POST", - "summary":"Invoke repair asynchronously. You can track repair progress by using the get supplying id", - "type":"int", - "nickname":"repair_async", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"keyspace", - "description":"The keyspace to repair", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - }, - { - "name":"primaryRange", - "description":"If the value is the string 'true' with any capitalization, repair only the first range returned by the partitioner.", - "required":false, - "allowMultiple":false, - "type":"string", - "paramType":"query" - }, - { - "name":"parallelism", - "description":"Repair parallelism, can be 0 (sequential), 1 (parallel) or 2 (datacenter-aware).", - "required":false, - "allowMultiple":false, - "type":"string", - "paramType":"query" - }, - { - "name":"incremental", - "description":"If the value is the string 'true' with any capitalization, perform incremental repair.", - "required":false, - "allowMultiple":false, - "type":"string", - "paramType":"query" - }, - { - "name":"jobThreads", - "description":"An integer specifying the parallelism on each node.", - "required":false, - "allowMultiple":false, - "type":"string", - "paramType":"query" - }, - { - "name":"ranges", - "description":"An explicit list of ranges to repair, overriding the default choice. Each range is expressed as token1:token2, and multiple ranges can be given as a comma separated list.", - "required":false, - "allowMultiple":false, - "type":"string", - "paramType":"query" - }, - { - "name":"startToken", - "description":"Token on which to begin repair", - "required":false, - "allowMultiple":false, - "type":"string", - "paramType":"query" - }, - { - "name":"endToken", - "description":"Token on which to end repair", - "required":false, - "allowMultiple":false, - "type":"string", - "paramType":"query" - }, - { - "name":"columnFamilies", - "description":"Which column families to repair in the given keyspace. Multiple columns families can be named separated by commas. If this option is missing, all column families in the keyspace are repaired.", - "required":false, - "allowMultiple":false, - "type":"string", - "paramType":"query" - }, - { - "name":"dataCenters", - "description":"Which data centers are to participate in this repair. Multiple data centers can be listed separated by commas.", - "required":false, - "allowMultiple":false, - "type":"string", - "paramType":"query" - }, - { - "name":"hosts", - "description":"Which hosts are to participate in this repair. Multiple hosts can be listed separated by commas.", - "required":false, - "allowMultiple":false, - "type":"string", - "paramType":"query" - }, - { - "name":"trace", - "description":"If the value is the string 'true' with any capitalization, enable tracing of the repair.", - "required":false, - "allowMultiple":false, - "type":"string", - "paramType":"query" - } - ] - }, - { - "method":"GET", - "summary":"Track already running repair progress", - "type":"string", - "enum":[ - "RUNNING", - "SUCCESSFUL", - "FAILED" - ], - "nickname":"repair_async_status", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"keyspace", - "description":"The keyspace repair is running on", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - }, - { - "name":"id", - "description":"The repair ID to check for status", - "required":true, - "allowMultiple":false, - "type":"int", - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/storage_service/force_terminate", - "operations":[ - { - "method":"POST", - "summary":"Force terminate all repair sessions", - "type":"void", - "nickname":"force_terminate_all_repair_sessions", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/storage_service/decommission", - "operations":[ - { - "method":"POST", - "summary":"transfer this node's data to other machines and remove it from service.", - "type":"void", - "nickname":"decommission", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/storage_service/move", - "operations":[ - { - "method":"POST", - "summary":"This node will unload its data onto its neighbors, and bootstrap to the new token.", - "type":"void", - "nickname":"move", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"new_token", - "description":"token to move this node to", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/storage_service/remove_node", - "operations":[ - { - "method":"POST", - "summary":"Removes token (and all data associated with enpoint that had it) from the ring", - "type":"void", - "nickname":"remove_node", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"host_id", - "description":"Remove the node with host_id from the cluster", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/storage_service/removal_status", - "operations":[ - { - "method":"GET", - "summary":"Get the status of a token removal.", - "type":"string", - "nickname":"get_removal_status", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/storage_service/force_remove_completion", - "operations":[ - { - "method":"POST", - "summary":"Force a remove operation to finish.", - "type":"void", - "nickname":"force_remove_completion", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/storage_service/logging_level", - "operations":[ - { - "method":"POST", - "summary":"set the logging level at runtime

If both classQualifer and level are empty/null, it will reload the configuration to reset.
If classQualifer is not empty but level is empty/null, it will set the level to null for the defined classQualifer
If level cannot be parsed, then the level will be defaulted to DEBUG

The logback configuration should have < jmxConfigurator /> set", - "type":"void", - "nickname":"set_logging_level", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"class_qualifier", - "description":"The logger's classQualifer", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"query" - }, - { - "name":"level", - "description":"The log level", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"query" - } - ] - }, - { - "method":"GET", - "summary":"get the runtime logging levels", - "type":"array", - "items":{ - "type":"mapper" - }, - "nickname":"get_logging_levels", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/storage_service/operation_mode", - "operations":[ - { - "method":"GET", - "summary":"Get the operational mode (leaving, joining, normal, decommissioned, client)", - "type":"string", - "nickname":"get_operation_mode", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/storage_service/is_starting", - "operations":[ - { - "method":"GET", - "summary":"Returns whether the storage service is starting or not", - "type":"boolean", - "nickname":"is_starting", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/storage_service/drain", - "operations":[ - { - "method":"GET", - "summary":"Get the progress of a drain operation", - "type":"string", - "nickname":"get_drain_progress", - "produces":[ - "application/json" - ], - "parameters":[ - ] - }, - { - "method":"POST", - "summary":"makes node unavailable for writes, flushes memtables and replays commitlog", - "type":"void", - "nickname":"drain", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/storage_service/truncate/{keyspace}", - "operations":[ - { - "method":"POST", - "summary":"Truncates (deletes) the given columnFamily from the provided keyspace. Calling truncate results in actual deletion of all data in the cluster under the given columnFamily and it will fail unless all hosts are up. All data in the given column family will be deleted, but its definition will not be affected.", - "type":"void", - "nickname":"truncate", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"keyspace", - "description":"The keyspace", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - }, - { - "name":"cf", - "description":"Column family name", - "required":false, - "allowMultiple":false, - "type":"string", - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/storage_service/keyspaces", - "operations":[ - { - "method":"GET", - "summary":"Get the keyspaces", - "type":"array", - "items":{ - "type":"string" - }, - "nickname":"get_keyspaces", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"type", - "description":"Which keyspaces to return", - "required":false, - "allowMultiple":false, - "type":"string", - "enum": [ "all", "user", "non_local_strategy" ], - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/storage_service/update_snitch", - "operations":[ - { - "method":"POST", - "summary":"Change endpointsnitch class and dynamic-ness (and dynamic attributes) at runtime", - "type":"void", - "nickname":"update_snitch", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"ep_snitch_class_name", - "description":"The canonical path name for a class implementing IEndpointSnitch", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"query" - }, - { - "name":"dynamic", - "description":"When true dynamicsnitch is used", - "required":true, - "allowMultiple":false, - "type":"boolean", - "paramType":"query" - }, - { - "name":"dynamic_update_interval", - "description":"integer, in ms (default 100)", - "required":false, - "allowMultiple":false, - "type":"integer", - "paramType":"query" - }, - { - "name":"dynamic_reset_interval", - "description":"integer, in ms (default 600,000)", - "required":false, - "allowMultiple":false, - "type":"integer", - "paramType":"query" - }, - { - "name":"dynamic_badness_threshold", - "description":"Dynamic badness threshold, (default 0.0)", - "required":false, - "allowMultiple":false, - "type":"double", - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/storage_service/gossiping", - "operations":[ - { - "method":"DELETE", - "summary":"allows a user to forcibly 'kill' a sick node", - "type":"void", - "nickname":"stop_gossiping", - "produces":[ - "application/json" - ], - "parameters":[ - ] - }, - { - "method":"POST", - "summary":"allows a user to recover a forcibly 'killed' node", - "type":"void", - "nickname":"start_gossiping", - "produces":[ - "application/json" - ], - "parameters":[ - ] - }, - { - "method":"GET", - "summary":"allows a user to see whether gossip is running or not", - "type":"boolean", - "nickname":"is_gossip_running", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/storage_service/stop_daemon", - "operations":[ - { - "method":"POST", - "summary":"allows a user to forcibly completely stop cassandra", - "type":"void", - "nickname":"stop_daemon", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/storage_service/is_initialized", - "operations":[ - { - "method":"GET", - "summary":"Determine if gossip is enable", - "type":"boolean", - "nickname":"is_initialized", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/storage_service/rpc_server", - "operations":[ - { - "method":"DELETE", - "summary":"Allows a user to disable thrift", - "type":"void", - "nickname":"stop_rpc_server", - "produces":[ - "application/json" - ], - "parameters":[ - ] - }, - { - "method":"POST", - "summary":"allows a user to reenable thrift", - "type":"void", - "nickname":"start_rpc_server", - "produces":[ - "application/json" - ], - "parameters":[ - ] - }, - { - "method":"GET", - "summary":"Determine if thrift is running", - "type":"boolean", - "nickname":"is_rpc_server_running", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/storage_service/native_transport", - "operations":[ - { - "method":"POST", - "summary":"Start native transport", - "type":"void", - "nickname":"start_native_transport", - "produces":[ - "application/json" - ], - "parameters":[ - ] - }, - { - "method":"DELETE", - "summary":"Stop native transport", - "type":"void", - "nickname":"stop_native_transport", - "produces":[ - "application/json" - ], - "parameters":[ - ] - }, - { - "method":"GET", - "summary":"Is native transport running", - "type":"boolean", - "nickname":"is_native_transport_running", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/storage_service/join_ring", - "operations":[ - { - "method":"POST", - "summary":"Allows a node that have been started without joining the ring to join it", - "type":"void", - "nickname":"join_ring", - "produces":[ - "application/json" - ], - "parameters":[ - ] - }, - { - "method":"GET", - "summary":"", - "type":"boolean", - "nickname":"is_joined", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/storage_service/stream_throughput", - "operations":[ - { - "method":"POST", - "summary":"set stream throughput mb per sec", - "type":"void", - "nickname":"set_stream_throughput_mb_per_sec", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"value", - "description":"Stream throughput", - "required":true, - "allowMultiple":false, - "type":"int", - "paramType":"query" - } - ] - }, - { - "method":"GET", - "summary":"Get stream throughput mb per sec", - "type":"int", - "nickname":"get_stream_throughput_mb_per_sec", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/storage_service/compaction_throughput", - "operations":[ - { - "method":"GET", - "summary":"get compaction throughput mb per sec", - "type":"int", - "nickname":"get_compaction_throughput_mb_per_sec", - "produces":[ - "application/json" - ], - "parameters":[ - ] - }, - { - "method":"POST", - "summary":"Set compaction throughput mb per sec", - "type":"void", - "nickname":"set_compaction_throughput_mb_per_sec", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"value", - "description":"compaction throughput", - "required":true, - "allowMultiple":false, - "type":"int", - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/storage_service/incremental_backups", - "operations":[ - { - "method":"GET", - "summary":"Check if incremental backup is enabled", - "type":"boolean", - "nickname":"is_incremental_backups_enabled", - "produces":[ - "application/json" - ], - "parameters":[ - ] - }, - { - "method":"POST", - "summary":"", - "type":"void", - "nickname":"set_incremental_backups_enabled", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"value", - "description":"Set to true for incremental backup enabled", - "required":true, - "allowMultiple":false, - "type":"boolean", - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/storage_service/rebuild", - "operations":[ - { - "method":"POST", - "summary":"Initiate a process of streaming data for which we are responsible from other nodes. It is similar to bootstrap except meant to be used on a node which is already in the cluster (typically containing no data) as an alternative to running repair.", - "type":"void", - "nickname":"rebuild", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"source_dc", - "description":"Name of DC from which to select sources for streaming or none to pick any node", - "required":false, - "allowMultiple":false, - "type":"string", - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/storage_service/bulk_load/{path}", - "operations":[ - { - "method":"POST", - "summary":"Starts a bulk load and blocks until it completes", - "type":"void", - "nickname":"bulk_load", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"path", - "description":"Path to directory to load from", - "required":true, - "allowMultiple":true, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/storage_service/bulk_load_async/{path}", - "operations":[ - { - "method":"GET", - "summary":"Starts a bulk load asynchronously and returns the String representation of the planID for the new streaming session.", - "type":"string", - "nickname":"bulk_load_async", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"path", - "description":"Path to directory to load from", - "required":true, - "allowMultiple":true, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/storage_service/reschedule_failed_deletions", - "operations":[ - { - "method":"POST", - "summary":"Reschedule failed deletions", - "type":"void", - "nickname":"reschedule_failed_deletions", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/storage_service/sstables/{keyspace}", - "operations":[ - { - "method":"POST", - "summary":"Load new SSTables to the given keyspace/columnFamily", - "type":"void", - "nickname":"load_new_ss_tables", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"keyspace", - "description":"The keyspace", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - }, - { - "name":"cf", - "description":"Column family name", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/storage_service/sample_key_range", - "operations":[ - { - "method":"GET", - "summary":"Return a List of Tokens representing a sample of keys across all ColumnFamilyStores.", - "type":"array", - "items":{ - "type":"string" - }, - "nickname":"sample_key_range", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/storage_service/relocal_schema", - "operations":[ - { - "method":"POST", - "summary":"Reset local schema", - "type":"void", - "nickname":"reset_local_schema", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/storage_service/trace_probability", - "operations":[ - { - "method":"POST", - "summary":"Enables/Disables tracing for the whole system. Only thrift requests can start tracing currently", - "type":"void", - "nickname":"set_trace_probability", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"probability", - "description":"[0,1] will enable tracing on a partial number of requests with the provided probability. 0 will disable tracing and 1 will enable tracing for all requests (which mich severely cripple the system)", - "required":true, - "allowMultiple":false, - "type":"double", - "paramType":"query" - } - ] - }, - { - "method":"GET", - "summary":"Returns the configured tracing probability.", - "type":"double", - "nickname":"get_trace_probability", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/storage_service/slow_query", - "operations":[ - { - "method":"POST", - "summary":"Set slow query parameter", - "type":"void", - "nickname":"set_slow_query", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"enable", - "description":"set it to true to enable, anything else to disable", - "required":false, - "allowMultiple":false, - "type":"boolean", - "paramType":"query" - }, - { - "name":"ttl", - "description":"TTL in seconds", - "required":false, - "allowMultiple":false, - "type":"long", - "paramType":"query" - }, - { - "name":"threshold", - "description":"Slow query record threshold in microseconds", - "required":false, - "allowMultiple":false, - "type":"long", - "paramType":"query" - } - ] - }, - { - "method":"GET", - "summary":"Returns the slow query record configuration.", - "type":"slow_query_info", - "nickname":"get_slow_query_info", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/storage_service/auto_compaction/{keyspace}", - "operations":[ - { - "method":"POST", - "summary":"Enable auto compaction", - "type":"void", - "nickname":"enable_auto_compaction", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"keyspace", - "description":"The keyspace", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - }, - { - "name":"cf", - "description":"Comma seperated column family names", - "required":false, - "allowMultiple":false, - "type":"string", - "paramType":"query" - } - ] - }, - { - "method":"DELETE", - "summary":"Disable auto compaction", - "type":"void", - "nickname":"disable_auto_compaction", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"keyspace", - "description":"The keyspace", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - }, - { - "name":"cf", - "description":"Comma seperated column family names", - "required":false, - "allowMultiple":false, - "type":"string", - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/storage_service/deliver_hints", - "operations":[ - { - "method":"POST", - "summary":"", - "type":"void", - "nickname":"deliver_hints", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"host", - "description":"The host name", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/storage_service/cluster_name", - "operations":[ - { - "method":"GET", - "summary":"Returns the name of the cluster", - "type":"string", - "nickname":"get_cluster_name", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/storage_service/partitioner_name", - "operations":[ - { - "method":"GET", - "summary":"Returns the cluster partitioner", - "type":"string", - "nickname":"get_partitioner_name", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/storage_service/tombstone_warn_threshold", - "operations":[ - { - "method":"GET", - "summary":"Returns the threshold for warning of queries with many tombstones", - "type":"int", - "nickname":"get_tombstone_warn_threshold", - "produces":[ - "application/json" - ], - "parameters":[ - ] - }, - { - "method":"POST", - "summary":"Sets the threshold for warning queries with many tombstones", - "type":"void", - "nickname":"set_tombstone_warn_threshold", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"debug_threshold", - "description":"tombstone debug threshold", - "required":true, - "allowMultiple":false, - "type":"int", - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/storage_service/tombstone_failure_threshold", - "operations":[ - { - "method":"GET", - "summary":"", - "type":"int", - "nickname":"get_tombstone_failure_threshold", - "produces":[ - "application/json" - ], - "parameters":[ - ] - }, - { - "method":"POST", - "summary":"", - "type":"void", - "nickname":"set_tombstone_failure_threshold", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"tombstone_debug_threshold", - "description":"tombstone debug threshold", - "required":true, - "allowMultiple":false, - "type":"int", - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/storage_service/batch_size_failure_threshold", - "operations":[ - { - "method":"GET", - "summary":"Returns the threshold for rejecting queries due to a large batch size", - "type":"int", - "nickname":"get_batch_size_failure_threshold", - "produces":[ - "application/json" - ], - "parameters":[ - ] - }, - { - "method":"POST", - "summary":"Sets the threshold for rejecting queries due to a large batch size", - "type":"void", - "nickname":"set_batch_size_failure_threshold", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"threshold", - "description":"batch size debug threshold", - "required":true, - "allowMultiple":false, - "type":"int", - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/storage_service/hinted_handoff", - "operations":[ - { - "method":"POST", - "summary":"Sets the hinted handoff throttle in kb per second, per delivery thread", - "type":"void", - "nickname":"set_hinted_handoff_throttle_in_kb", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"throttle", - "description":"throttle in kb", - "required":true, - "allowMultiple":false, - "type":"int", - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/storage_service/metrics/load", - "operations":[ - { - "method":"GET", - "summary":"Get load", - "type":"int", - "nickname":"get_metrics_load", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/storage_service/metrics/exceptions", - "operations":[ - { - "method":"GET", - "summary":"Get exceptions", - "type":"int", - "nickname":"get_exceptions", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/storage_service/metrics/hints_in_progress", - "operations":[ - { - "method":"GET", - "summary":"Get total hints in progress", - "type":"int", - "nickname":"get_total_hints_in_progress", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/storage_service/metrics/total_hints", - "operations":[ - { - "method":"GET", - "summary":"Get total hints", - "type":"int", - "nickname":"get_total_hints", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - } - ], - "models":{ - "mapper":{ - "id":"mapper", - "description":"A key value mapping", - "properties":{ - "key":{ - "type":"string", - "description":"The key" - }, - "value":{ - "type":"string", - "description":"The value" - } - } - }, - "map_string_double":{ - "id":"map_string_double", - "description":"A key value mapping between a string and a double", - "properties":{ - "key":{ - "type":"string", - "description":"The key" - }, - "value":{ - "type":"double", - "description":"The value" - } - } - }, - "maplist_mapper":{ - "id":"maplist_mapper", - "description":"A key value mapping, where key and value are list", - "properties":{ - "key":{ - "type":"array", - "items":{ - "type":"string" - }, - "description":"The key" - }, - "value":{ - "type":"array", - "items":{ - "type":"string" - }, - "description":"The value" - } - } - }, - "snapshot":{ - "id":"snapshot", - "description":"Snapshot detail", - "properties":{ - "ks":{ - "type":"string", - "description":"The key space snapshot key" - }, - "cf":{ - "type":"string", - "description":"The column family" - }, - "total":{ - "type":"int", - "description":"The total snapshot size" - }, - "live":{ - "type":"int", - "description":"The live snapshot size" - } - } - }, - "snapshots":{ - "id":"snapshots", - "description":"List of Snapshot detail", - "properties":{ - "key":{ - "type":"string", - "description":"The snapshot key" - }, - "value":{ - "type":"array", - "items":{ - "type":"snapshot" - }, - "description":"The column family" - } - } - }, - "slow_query_info": { - "id":"slow_query_info", - "description":"Slow query triggering information", - "properties":{ - "enable":{ - "type":"boolean", - "description":"Is slow query logging enable or disable" - }, - "ttl":{ - "type":"long", - "description":"The slow query TTL in seconds" - }, - "threshold":{ - "type":"long", - "description":"The slow query logging threshold in microseconds. Queries that takes longer, will be logged" - } - } - }, - "endpoint_detail":{ - "id":"endpoint_detail", - "description":"Endpoint detail", - "properties":{ - "host":{ - "type":"string", - "description":"The endpoint host" - }, - "datacenter":{ - "type":"string", - "description":"The endpoint datacenter" - }, - "rack":{ - "type":"string", - "description":"The endpoint rack" - } - } - }, - "token_range":{ - "id":"token_range", - "description":"Endpoint range information", - "properties":{ - "start_token":{ - "type":"string", - "description":"The range start token" - }, - "end_token":{ - "type":"string", - "description":"The range start token" - }, - "endpoints":{ - "type":"array", - "items":{ - "type":"string" - }, - "description":"The endpoints" - }, - "rpc_endpoints":{ - "type":"array", - "items":{ - "type":"string" - }, - "description":"The rpc endpoints" - }, - "endpoint_details":{ - "type":"array", - "items":{ - "type":"endpoint_detail" - }, - "description":"The endpoint details" - } - } - } - } -} diff --git a/scylla/api/api-doc/stream_manager.json b/scylla/api/api-doc/stream_manager.json deleted file mode 100644 index 1463fe9..0000000 --- a/scylla/api/api-doc/stream_manager.json +++ /dev/null @@ -1,271 +0,0 @@ -{ - "apiVersion":"0.0.1", - "swaggerVersion":"1.2", - "basePath":"{{Protocol}}://{{Host}}", - "resourcePath":"/stream_manager", - "produces":[ - "application/json" - ], - "apis":[ - { - "path":"/stream_manager/", - "operations":[ - { - "method":"GET", - "summary":"Returns the current state of all ongoing streams.", - "type":"array", - "items":{ - "type":"stream_state" - }, - "nickname":"get_current_streams", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/stream_manager/metrics/outbound", - "operations":[ - { - "method":"GET", - "summary":"Get number of active outbound streams", - "type":"int", - "nickname":"get_all_active_streams_outbound", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/stream_manager/metrics/incoming/{peer}", - "operations":[ - { - "method":"GET", - "summary":"Get total incoming bytes", - "type":"int", - "nickname":"get_total_incoming_bytes", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"peer", - "description":"The stream peer", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/stream_manager/metrics/incoming", - "operations":[ - { - "method":"GET", - "summary":"Get all total incoming bytes", - "type":"int", - "nickname":"get_all_total_incoming_bytes", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - }, - { - "path":"/stream_manager/metrics/outgoing/{peer}", - "operations":[ - { - "method":"GET", - "summary":"Get total outgoing bytes", - "type":"int", - "nickname":"get_total_outgoing_bytes", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"peer", - "description":"The stream peer", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - } - ] - }, - { - "path":"/stream_manager/metrics/outgoing", - "operations":[ - { - "method":"GET", - "summary":"Get all total outgoing bytes", - "type":"int", - "nickname":"get_all_total_outgoing_bytes", - "produces":[ - "application/json" - ], - "parameters":[ - ] - } - ] - } - ], - "models":{ - "stream_state":{ - "id":"stream_state", - "description":"Current snapshot of streaming progress", - "properties":{ - "plan_id":{ - "type":"string", - "description":"Plan UUID" - }, - "description":{ - "type":"string", - "description":"The stream description" - }, - "sessions":{ - "type":"array", - "description":"The sessions info", - "items":{ - "type":"stream_info" - } - } - } - }, - "stream_info":{ - "id":"stream_info", - "description":"Stream session info", - "properties":{ - "peer":{ - "type":"string", - "description":"The peer" - }, - "session_index":{ - "type":"int", - "description":"The session index" - }, - "connecting":{ - "type":"string" - }, - "receiving_summaries":{ - "type":"array", - "items":{ - "type":"stream_summary" - }, - "description":"Receiving summaries" - }, - "sending_summaries":{ - "type":"array", - "items":{ - "type":"stream_summary" - }, - "description":"Sending summaries" - }, - "state":{ - "type":"string", - "description":"Current session state", - "enum":[ - "INITIALIZED", - "PREPARING", - "STREAMING", - "WAIT_COMPLETE", - "COMPLETE", - "FAILED" - ] - }, - "receiving_files":{ - "type":"array", - "items":{ - "type":"progress_info_mapper" - }, - "description":"Receiving files" - }, - "sending_files":{ - "type":"array", - "items":{ - "type":"progress_info_mapper" - }, - "description":"Sending files" - } - } - }, - "stream_summary":{ - "id":"stream_summary", - "description":"Stream summary info", - "properties":{ - "cf_id":{ - "type":"string", - "description":"The ID" - }, - "files":{ - "type":"int", - "description":"Number of files to transfer. Can be 0 if nothing to transfer for some streaming request." - }, - "total_size":{ - "type":"long" - } - } - }, - "progress_info_mapper":{ - "id":"progress_info_mapper", - "description":"A mapping between file and its progress info", - "properties":{ - "key":{ - "type":"string", - "description":"The key" - }, - "value":{ - "type":"progress_info", - "description":"The progress info" - } - } - }, - "progress_info":{ - "id":"progress_info", - "description":"File transfer progress", - "properties":{ - "peer":{ - "type":"string", - "description":"The peer address" - }, - "session_index":{ - "type":"int", - "description":"The session index" - }, - "file_name":{ - "type":"string", - "description":"The file name" - }, - "direction":{ - "type":"string", - "description":"The file name", - "enum":[ - "OUT", - "IN" - ] - }, - "current_bytes":{ - "type":"long", - "description":"The current bytes" - }, - "total_bytes":{ - "type":"long", - "description":"The total bytes" - } - } - } - } -} \ No newline at end of file diff --git a/scylla/api/api-doc/system.json b/scylla/api/api-doc/system.json deleted file mode 100644 index bb56a4d..0000000 --- a/scylla/api/api-doc/system.json +++ /dev/null @@ -1,114 +0,0 @@ -{ - "apiVersion":"0.0.1", - "swaggerVersion":"1.2", - "basePath":"{{Protocol}}://{{Host}}", - "resourcePath":"/system", - "produces":[ - "application/json" - ], - "apis":[ - { - "path":"/system/logger", - "operations":[ - { - "method":"GET", - "summary":"Get all logger names", - "type":"array", - "items":{ - "type":"string" - }, - "nickname":"get_all_logger_names", - "produces":[ - "application/json" - ], - "parameters":[ - ] - }, - { - "method":"POST", - "summary":"Set all logger level", - "type":"void", - "nickname":"set_all_logger_level", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"level", - "description":"The new log level", - "required":true, - "allowMultiple":false, - "type":"string", - "enum":[ - "error", - "warn", - "info", - "debug", - "trace" - ], - "paramType":"query" - } - ] - } - ] - }, - { - "path":"/system/logger/{name}", - "operations":[ - { - "method":"GET", - "summary":"Get logger level", - "type":"string", - "nickname":"get_logger_level", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The logger to query about", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - } - ] - }, - { - "method":"POST", - "summary":"Set logger level", - "type":"void", - "nickname":"set_logger_level", - "produces":[ - "application/json" - ], - "parameters":[ - { - "name":"name", - "description":"The logger to query about", - "required":true, - "allowMultiple":false, - "type":"string", - "paramType":"path" - }, - { - "name":"level", - "description":"The new log level", - "required":true, - "allowMultiple":false, - "type":"string", - "enum":[ - "error", - "warn", - "info", - "debug", - "trace" - ], - "paramType":"query" - } - ] - } - ] - } - ] -} diff --git a/scylla/api/api-doc/utils.json b/scylla/api/api-doc/utils.json deleted file mode 100644 index b580c0f..0000000 --- a/scylla/api/api-doc/utils.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "apiVersion":"0.0.1", - "swaggerVersion":"1.2", - "basePath":"{{Protocol}}://{{Host}}", - "resourcePath":"/utils", - "produces":[ - "application/json" - ], - "apis":[ - ], - "models":{ - "histogram":{ - "id":"histogram", - "description":"A histogram values", - "properties":{ - "count":{ - "type":"long", - "description":"Total count so far" - }, - "sum":{ - "type":"long", - "description":"Total sum so far" - }, - "min":{ - "type":"long", - "description":"The min so far" - }, - "max":{ - "type":"long", - "description":"The max so far" - }, - "variance":{ - "type":"double", - "description":"The variance" - }, - "mean":{ - "type":"double", - "description":"The mean" - }, - "sample":{ - "type":"array", - "items":{ - "type":"long" - }, - "description":"A sample containing the last n elements" - } - } - }, - "estimated_histogram":{ - "id":"estimated_histogram", - "description":"An estimated histogram values", - "properties":{ - "buckets":{ - "type":"array", - "items":{ - "type":"long" - }, - "description":"The histogram buckets" - }, - "bucket_offsets":{ - "type":"array", - "items":{ - "type":"long" - }, - "description":"The series of values to which the counts in `buckets` correspond" - } - } - }, - "rate_moving_average": { - "id":"rate_moving_average", - "description":"A meter metric which measures mean throughput and one, five, and fifteen-minute exponentially-weighted moving average throughputs", - "properties":{ - "rates": { - "type":"array", - "items":{ - "type":"double" - }, - "description":"One, five and fifteen mintues rates" - }, - "mean_rate": { - "type":"double", - "description":"The mean rate from startup" - }, - "count": { - "type":"long", - "description":"Total number of events from startup" - } - } - }, - "rate_moving_average_and_histogram": { - "id":"rate_moving_average_and_histogram", - "description":"A timer metric which aggregates timing durations and provides duration statistics, plus throughput statistics", - "properties":{ - "meter": { - "type":"rate_moving_average", - "description":"The metric rate moving average" - }, - "hist": { - "type":"histogram", - "description":"The metric histogram" - } - } - } - } -} diff --git a/scylla/api/api.cc b/scylla/api/api.cc deleted file mode 100644 index 976dd3f..0000000 --- a/scylla/api/api.cc +++ /dev/null @@ -1,151 +0,0 @@ -/* - * Copyright 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "api.hh" -#include "http/file_handler.hh" -#include "http/transformers.hh" -#include "http/api_docs.hh" -#include "storage_service.hh" -#include "commitlog.hh" -#include "gossiper.hh" -#include "failure_detector.hh" -#include "column_family.hh" -#include "lsa.hh" -#include "messaging_service.hh" -#include "storage_proxy.hh" -#include "cache_service.hh" -#include "collectd.hh" -#include "endpoint_snitch.hh" -#include "compaction_manager.hh" -#include "hinted_handoff.hh" -#include "http/exception.hh" -#include "stream_manager.hh" -#include "system.hh" - -namespace api { - -static std::unique_ptr exception_reply(std::exception_ptr eptr) { - try { - std::rethrow_exception(eptr); - } catch (const no_such_keyspace& ex) { - throw bad_param_exception(ex.what()); - } - // We never going to get here - return std::make_unique(); -} - -future<> set_server_init(http_context& ctx) { - auto rb = std::make_shared < api_registry_builder > (ctx.api_doc); - - return ctx.http_server.set_routes([rb, &ctx](routes& r) { - r.register_exeption_handler(exception_reply); - r.put(GET, "/ui", new httpd::file_handler(ctx.api_dir + "/index.html", - new content_replace("html"))); - r.add(GET, url("/ui").remainder("path"), new httpd::directory_handler(ctx.api_dir, - new content_replace("html"))); - rb->set_api_doc(r); - rb->register_function(r, "system", - "The system related API"); - set_system(ctx, r); - }); -} - -static future<> register_api(http_context& ctx, const sstring& api_name, - const sstring api_desc, - std::function f) { - auto rb = std::make_shared < api_registry_builder > (ctx.api_doc); - - return ctx.http_server.set_routes([rb, &ctx, api_name, api_desc, f](routes& r) { - rb->register_function(r, api_name, api_desc); - f(ctx,r); - }); -} - -future<> set_server_storage_service(http_context& ctx) { - return register_api(ctx, "storage_service", "The storage service API", set_storage_service); -} - -future<> set_server_snitch(http_context& ctx) { - return register_api(ctx, "endpoint_snitch_info", "The endpoint snitch info API", set_endpoint_snitch); -} - -future<> set_server_gossip(http_context& ctx) { - return register_api(ctx, "gossiper", - "The gossiper API", set_gossiper); -} - -future<> set_server_load_sstable(http_context& ctx) { - return register_api(ctx, "column_family", - "The column family API", set_column_family); -} - -future<> set_server_messaging_service(http_context& ctx) { - return register_api(ctx, "messaging_service", - "The messaging service API", set_messaging_service); -} - -future<> set_server_storage_proxy(http_context& ctx) { - return register_api(ctx, "storage_proxy", - "The storage proxy API", set_storage_proxy); -} - -future<> set_server_stream_manager(http_context& ctx) { - return register_api(ctx, "stream_manager", - "The stream manager API", set_stream_manager); -} - -future<> set_server_gossip_settle(http_context& ctx) { - auto rb = std::make_shared < api_registry_builder > (ctx.api_doc); - - return ctx.http_server.set_routes([rb, &ctx](routes& r) { - rb->register_function(r, "failure_detector", - "The failure detector API"); - set_failure_detector(ctx,r); - rb->register_function(r, "cache_service", - "The cache service API"); - set_cache_service(ctx,r); - }); -} - -future<> set_server_done(http_context& ctx) { - auto rb = std::make_shared < api_registry_builder > (ctx.api_doc); - - return ctx.http_server.set_routes([rb, &ctx](routes& r) { - rb->register_function(r, "compaction_manager", - "The Compaction manager API"); - set_compaction_manager(ctx, r); - rb->register_function(r, "lsa", "Log-structured allocator API"); - set_lsa(ctx, r); - - rb->register_function(r, "commitlog", - "The commit log API"); - set_commitlog(ctx,r); - rb->register_function(r, "hinted_handoff", - "The hinted handoff API"); - set_hinted_handoff(ctx, r); - rb->register_function(r, "collectd", - "The collectd API"); - set_collectd(ctx, r); - }); -} - -} - diff --git a/scylla/api/api.hh b/scylla/api/api.hh deleted file mode 100644 index 2743359..0000000 --- a/scylla/api/api.hh +++ /dev/null @@ -1,219 +0,0 @@ -/* - * Copyright 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "json/json_elements.hh" -#include -#include -#include -#include "api/api-doc/utils.json.hh" -#include "utils/histogram.hh" -#include "http/exception.hh" -#include "api_init.hh" -#include "seastarx.hh" - -namespace api { - -template -std::vector container_to_vec(const T& container) { - std::vector res; - for (auto i : container) { - res.push_back(boost::lexical_cast(i)); - } - return res; -} - -template -std::vector map_to_key_value(const std::map& map) { - std::vector res; - for (auto i : map) { - res.push_back(T()); - res.back().key = i.first; - res.back().value = i.second; - } - return res; -} - -template -std::vector& map_to_key_value(const MAP& map, std::vector& res) { - for (auto i : map) { - T val; - val.key = boost::lexical_cast(i.first); - val.value = boost::lexical_cast(i.second); - res.push_back(val); - } - return res; -} -template -T map_sum(T&& dest, const S& src) { - for (auto i : src) { - dest[i.first] += i.second; - } - return dest; -} - -template -std::vector map_keys(const MAP& map) { - std::vector res; - for (const auto& i : map) { - res.push_back(boost::lexical_cast(i.first)); - } - return res; -} - -/** - * General sstring splitting function - */ -inline std::vector split(const sstring& text, const char* separator) { - if (text == "") { - return std::vector(); - } - std::vector tokens; - return boost::split(tokens, text, boost::is_any_of(separator)); -} - -/** - * Split a column family parameter - */ -inline std::vector split_cf(const sstring& cf) { - return split(cf, ","); -} - -/** - * A helper function to sum values on an a distributed object that - * has a get_stats method. - * - */ -template -future sum_stats(distributed& d, V F::*f) { - return d.map_reduce0([f](const T& p) {return p.get_stats().*f;}, 0, - std::plus()).then([](V val) { - return make_ready_future(val); - }); -} - - - -inline -httpd::utils_json::histogram to_json(const utils::ihistogram& val) { - httpd::utils_json::histogram h; - h = val; - h.sum = val.estimated_sum(); - return h; -} - -inline -httpd::utils_json::rate_moving_average meter_to_json(const utils::rate_moving_average& val) { - httpd::utils_json::rate_moving_average m; - m = val; - return m; -} - -inline -httpd::utils_json::rate_moving_average_and_histogram timer_to_json(const utils::rate_moving_average_and_histogram& val) { - httpd::utils_json::rate_moving_average_and_histogram h; - h.hist = to_json(val.hist); - h.meter = meter_to_json(val.rate); - return h; -} - -template -future sum_histogram_stats(distributed& d, utils::timed_rate_moving_average_and_histogram F::*f) { - - return d.map_reduce0([f](const T& p) {return (p.get_stats().*f).hist;}, utils::ihistogram(), - std::plus()).then([](const utils::ihistogram& val) { - return make_ready_future(to_json(val)); - }); -} - -template -future sum_timer_stats(distributed& d, utils::timed_rate_moving_average_and_histogram F::*f) { - - return d.map_reduce0([f](const T& p) {return (p.get_stats().*f).rate();}, utils::rate_moving_average_and_histogram(), - std::plus()).then([](const utils::rate_moving_average_and_histogram& val) { - return make_ready_future(timer_to_json(val)); - }); -} - -inline int64_t min_int64(int64_t a, int64_t b) { - return std::min(a,b); -} - -inline int64_t max_int64(int64_t a, int64_t b) { - return std::max(a,b); -} - -/** - * A helper struct for ratio calculation - * It combine total and the sub set for the ratio and its - * to_json method return the ration sub/total - */ -template -struct basic_ratio_holder : public json::jsonable { - T total = 0; - T sub = 0; - virtual std::string to_json() const { - if (total == 0) { - return "0"; - } - return std::to_string(sub/total); - } - basic_ratio_holder() = default; - basic_ratio_holder& add(T _total, T _sub) { - total += _total; - sub += _sub; - return *this; - } - basic_ratio_holder(T _total, T _sub) { - total = _total; - sub = _sub; - } - basic_ratio_holder& operator+=(const basic_ratio_holder& a) { - return add(a.total, a.sub); - } - friend basic_ratio_holder operator+(basic_ratio_holder a, const basic_ratio_holder& b) { - return a += b; - } -}; - -typedef basic_ratio_holder ratio_holder; -typedef basic_ratio_holder integral_ratio_holder; - -class unimplemented_exception : public base_exception { -public: - unimplemented_exception() - : base_exception("API call is not supported yet", reply::status_type::internal_server_error) { - } -}; - -inline void unimplemented() { - throw unimplemented_exception(); -} - -template -std::vector concat(std::vector a, std::vector&& b) { - a.reserve( a.size() + b.size()); - a.insert(a.end(), b.begin(), b.end()); - return a; -} - -} diff --git a/scylla/api/api_init.hh b/scylla/api/api_init.hh deleted file mode 100644 index b026ae2..0000000 --- a/scylla/api/api_init.hh +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright 2016 ScylaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ -#pragma once -#include "database.hh" -#include "service/storage_proxy.hh" -#include "http/httpd.hh" - -namespace api { - -struct http_context { - sstring api_dir; - sstring api_doc; - httpd::http_server_control http_server; - distributed& db; - distributed& sp; - http_context(distributed& _db, - distributed& _sp) - : db(_db), sp(_sp) { - } -}; - -future<> set_server_init(http_context& ctx); -future<> set_server_snitch(http_context& ctx); -future<> set_server_storage_service(http_context& ctx); -future<> set_server_gossip(http_context& ctx); -future<> set_server_load_sstable(http_context& ctx); -future<> set_server_messaging_service(http_context& ctx); -future<> set_server_storage_proxy(http_context& ctx); -future<> set_server_stream_manager(http_context& ctx); -future<> set_server_gossip_settle(http_context& ctx); -future<> set_server_done(http_context& ctx); - - -} diff --git a/scylla/api/cache_service.cc b/scylla/api/cache_service.cc deleted file mode 100644 index 65bc9da..0000000 --- a/scylla/api/cache_service.cc +++ /dev/null @@ -1,329 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "cache_service.hh" -#include "api/api-doc/cache_service.json.hh" -#include "column_family.hh" - -namespace api { -using namespace json; -namespace cs = httpd::cache_service_json; - -void set_cache_service(http_context& ctx, routes& r) { - cs::get_row_cache_save_period_in_seconds.set(r, [](std::unique_ptr req) { - // We never save the cache - // Origin uses 0 for never - return make_ready_future(0); - }); - - cs::set_row_cache_save_period_in_seconds.set(r, [](std::unique_ptr req) { - // TBD - unimplemented(); - auto period = req->get_query_param("period"); - return make_ready_future(json_void()); - }); - - cs::get_key_cache_save_period_in_seconds.set(r, [](std::unique_ptr req) { - // We never save the cache - // Origin uses 0 for never - return make_ready_future(0); - }); - - cs::set_key_cache_save_period_in_seconds.set(r, [](std::unique_ptr req) { - // TBD - unimplemented(); - auto period = req->get_query_param("period"); - return make_ready_future(json_void()); - }); - - cs::get_counter_cache_save_period_in_seconds.set(r, [](std::unique_ptr req) { - // We never save the cache - // Origin uses 0 for never - return make_ready_future(0); - }); - - cs::set_counter_cache_save_period_in_seconds.set(r, [](std::unique_ptr req) { - // TBD - unimplemented(); - auto ccspis = req->get_query_param("ccspis"); - return make_ready_future(json_void()); - }); - - cs::get_row_cache_keys_to_save.set(r, [](std::unique_ptr req) { - // TBD - unimplemented(); - return make_ready_future(0); - }); - - cs::set_row_cache_keys_to_save.set(r, [](std::unique_ptr req) { - // TBD - unimplemented(); - auto rckts = req->get_query_param("rckts"); - return make_ready_future(json_void()); - }); - - cs::get_key_cache_keys_to_save.set(r, [](std::unique_ptr req) { - // TBD - unimplemented(); - return make_ready_future(0); - }); - - cs::set_key_cache_keys_to_save.set(r, [](std::unique_ptr req) { - // TBD - unimplemented(); - auto kckts = req->get_query_param("kckts"); - return make_ready_future(json_void()); - }); - - cs::get_counter_cache_keys_to_save.set(r, [](std::unique_ptr req) { - // TBD - unimplemented(); - return make_ready_future(0); - }); - - cs::set_counter_cache_keys_to_save.set(r, [](std::unique_ptr req) { - // TBD - unimplemented(); - auto cckts = req->get_query_param("cckts"); - return make_ready_future(json_void()); - }); - - cs::invalidate_key_cache.set(r, [](std::unique_ptr req) { - // TBD - unimplemented(); - return make_ready_future(json_void()); - }); - - cs::invalidate_counter_cache.set(r, [](std::unique_ptr req) { - // TBD - unimplemented(); - return make_ready_future(json_void()); - }); - - cs::set_row_cache_capacity_in_mb.set(r, [](std::unique_ptr req) { - // TBD - unimplemented(); - auto capacity = req->get_query_param("capacity"); - return make_ready_future(json_void()); - }); - - cs::set_key_cache_capacity_in_mb.set(r, [](std::unique_ptr req) { - // TBD - unimplemented(); - auto period = req->get_query_param("period"); - return make_ready_future(json_void()); - }); - - cs::set_counter_cache_capacity_in_mb.set(r, [](std::unique_ptr req) { - // TBD - unimplemented(); - auto capacity = req->get_query_param("capacity"); - return make_ready_future(json_void()); - }); - - cs::save_caches.set(r, [](std::unique_ptr req) { - // TBD - unimplemented(); - return make_ready_future(json_void()); - }); - - cs::get_key_capacity.set(r, [] (std::unique_ptr req) { - // TBD - // FIXME - // we don't support keys cache, - // so currently returning a 0 for capacity is ok - return make_ready_future(0); - }); - - cs::get_key_hits.set(r, [] (std::unique_ptr req) { - // TBD - // FIXME - // we don't support keys cache, - // so currently returning a 0 for hits is ok - return make_ready_future(0); - }); - - cs::get_key_requests.set(r, [] (std::unique_ptr req) { - // TBD - // FIXME - // we don't support keys cache, - // so currently returning a 0 for request is ok - return make_ready_future(0); - }); - - cs::get_key_hit_rate.set(r, [] (std::unique_ptr req) { - // TBD - // FIXME - // we don't support keys cache, - // so currently returning a 0 for rate is ok - return make_ready_future(0); - }); - - cs::get_key_hits_moving_avrage.set(r, [&ctx] (std::unique_ptr req) { - // TBD - // FIXME - // See above - return make_ready_future(meter_to_json(utils::rate_moving_average())); - }); - - cs::get_key_requests_moving_avrage.set(r, [&ctx] (std::unique_ptr req) { - // TBD - // FIXME - // See above - return make_ready_future(meter_to_json(utils::rate_moving_average())); - }); - - cs::get_key_size.set(r, [] (std::unique_ptr req) { - // TBD - // FIXME - // we don't support keys cache, - // so currently returning a 0 for size is ok - return make_ready_future(0); - }); - - cs::get_key_entries.set(r, [] (std::unique_ptr req) { - // TBD - // FIXME - // we don't support keys cache, - // so currently returning a 0 for key entries is ok - return make_ready_future(0); - }); - - cs::get_row_capacity.set(r, [&ctx] (std::unique_ptr req) { - return map_reduce_cf(ctx, uint64_t(0), [](const column_family& cf) { - return cf.get_row_cache().get_cache_tracker().region().occupancy().used_space(); - }, std::plus()); - }); - - cs::get_row_hits.set(r, [&ctx] (std::unique_ptr req) { - return map_reduce_cf(ctx, uint64_t(0), [](const column_family& cf) { - return cf.get_row_cache().stats().hits.count(); - }, std::plus()); - }); - - cs::get_row_requests.set(r, [&ctx] (std::unique_ptr req) { - return map_reduce_cf(ctx, uint64_t(0), [](const column_family& cf) { - return cf.get_row_cache().stats().hits.count() + cf.get_row_cache().stats().misses.count(); - }, std::plus()); - }); - - cs::get_row_hit_rate.set(r, [&ctx] (std::unique_ptr req) { - return map_reduce_cf(ctx, ratio_holder(), [](const column_family& cf) { - return ratio_holder(cf.get_row_cache().stats().hits.count() + cf.get_row_cache().stats().misses.count(), - cf.get_row_cache().stats().hits.count()); - }, std::plus()); - }); - - cs::get_row_hits_moving_avrage.set(r, [&ctx] (std::unique_ptr req) { - return map_reduce_cf_raw(ctx, utils::rate_moving_average(), [](const column_family& cf) { - return cf.get_row_cache().stats().hits.rate(); - }, std::plus()).then([](const utils::rate_moving_average& m) { - return make_ready_future(meter_to_json(m)); - }); - }); - - cs::get_row_requests_moving_avrage.set(r, [&ctx] (std::unique_ptr req) { - return map_reduce_cf_raw(ctx, utils::rate_moving_average(), [](const column_family& cf) { - return cf.get_row_cache().stats().hits.rate() + cf.get_row_cache().stats().misses.rate(); - }, std::plus()).then([](const utils::rate_moving_average& m) { - return make_ready_future(meter_to_json(m)); - }); - }); - - cs::get_row_size.set(r, [&ctx] (std::unique_ptr req) { - // In origin row size is the weighted size. - // We currently do not support weights, so we use num entries instead - return map_reduce_cf(ctx, 0, [](const column_family& cf) { - return cf.get_row_cache().partitions(); - }, std::plus()); - }); - - cs::get_row_entries.set(r, [&ctx] (std::unique_ptr req) { - return map_reduce_cf(ctx, 0, [](const column_family& cf) { - return cf.get_row_cache().partitions(); - }, std::plus()); - }); - - cs::get_counter_capacity.set(r, [] (std::unique_ptr req) { - // TBD - // FIXME - // we don't support counter cache, - // so currently returning a 0 for rate is ok - return make_ready_future(0); - }); - - cs::get_counter_hits.set(r, [] (std::unique_ptr req) { - // TBD - // FIXME - // we don't support counter cache, - // so currently returning a 0 for hits is ok - return make_ready_future(0); - }); - - cs::get_counter_requests.set(r, [] (std::unique_ptr req) { - // TBD - // FIXME - // we don't support counter cache, - // so currently returning a 0 for hits is ok - return make_ready_future(0); - }); - - cs::get_counter_hit_rate.set(r, [] (std::unique_ptr req) { - // TBD - // FIXME - // we don't support counter cache, - // so currently returning a 0 for rate is ok - return make_ready_future(0); - }); - - cs::get_counter_hits_moving_avrage.set(r, [&ctx] (std::unique_ptr req) { - // TBD - // FIXME - // See above - return make_ready_future(meter_to_json(utils::rate_moving_average())); - }); - - cs::get_counter_requests_moving_avrage.set(r, [&ctx] (std::unique_ptr req) { - // TBD - // FIXME - // See above - return make_ready_future(meter_to_json(utils::rate_moving_average())); - }); - - cs::get_counter_size.set(r, [] (std::unique_ptr req) { - // TBD - // FIXME - // we don't support counter cache, - // so currently returning a 0 for size is ok - return make_ready_future(0); - }); - - cs::get_counter_entries.set(r, [] (std::unique_ptr req) { - // TBD - // FIXME - // we don't support counter cache, - // so currently returning a 0 for entries is ok - return make_ready_future(0); - }); -} - -} - diff --git a/scylla/api/cache_service.hh b/scylla/api/cache_service.hh deleted file mode 100644 index 6c15907..0000000 --- a/scylla/api/cache_service.hh +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "api.hh" - -namespace api { - -void set_cache_service(http_context& ctx, routes& r); - -} diff --git a/scylla/api/collectd.cc b/scylla/api/collectd.cc deleted file mode 100644 index 8dce92c..0000000 --- a/scylla/api/collectd.cc +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "collectd.hh" -#include "api/api-doc/collectd.json.hh" -#include "core/scollectd.hh" -#include "core/scollectd_api.hh" -#include "endian.h" -#include -#include - -namespace api { - -using namespace scollectd; -using namespace httpd; - -using namespace json; -namespace cd = httpd::collectd_json; - -static auto transformer(const std::vector& values) { - cd::collectd_value collected_value; - for (auto v: values) { - switch (v._type) { - case scollectd::data_type::GAUGE: - collected_value.values.push(v.d()); - break; - case scollectd::data_type::DERIVE: - collected_value.values.push(v.i()); - break; - default: - collected_value.values.push(v.ui()); - break; - } - } - return collected_value; -} - - -static const char* str_to_regex(const sstring& v) { - if (v != "") { - return v.c_str(); - } - return ".*"; -} - -void set_collectd(http_context& ctx, routes& r) { - cd::get_collectd.set(r, [&ctx](std::unique_ptr req) { - - auto id = make_shared(req->param["pluginid"], - req->get_query_param("instance"), req->get_query_param("type"), - req->get_query_param("type_instance")); - - - return do_with(std::vector(), [id] (auto& vec) { - vec.resize(smp::count); - return parallel_for_each(boost::irange(0u, smp::count), [&vec, id] (auto cpu) { - return smp::submit_to(cpu, [id = *id] { - return scollectd::get_collectd_value(id); - }).then([&vec, cpu] (auto res) { - vec[cpu] = transformer(res); - }); - }).then([&vec] { - return make_ready_future(vec); - }); - }); - }); - - cd::get_collectd_items.set(r, [](const_req req) { - std::vector res; - auto ids = scollectd::get_collectd_ids(); - for (auto i: ids) { - cd::type_instance_id id; - id.plugin = i.plugin(); - id.plugin_instance = i.plugin_instance(); - id.type = i.type(); - id.type_instance = i.type_instance(); - cd::collectd_metric_status it; - it.id = id; - it.enable = scollectd::is_enabled(i); - res.push_back(it); - } - return res; - }); - - cd::enable_collectd.set(r, [](std::unique_ptr req) -> future { - std::regex plugin(req->param["pluginid"].c_str()); - std::regex instance(str_to_regex(req->get_query_param("instance"))); - std::regex type(str_to_regex(req->get_query_param("type"))); - std::regex type_instance(str_to_regex(req->get_query_param("type_instance"))); - bool enable = strcasecmp(req->get_query_param("enable").c_str(), "true") == 0; - return smp::invoke_on_all([enable, plugin, instance, type, type_instance]() { - for (auto id: scollectd::get_collectd_ids()) { - if (std::regex_match(std::string(id.plugin()), plugin) && - std::regex_match(std::string(id.plugin_instance()), instance) && - std::regex_match(std::string(id.type()), type) && - std::regex_match(std::string(id.type_instance()), type_instance)) { - scollectd::enable(id, enable); - } - } - }).then([] { - return json::json_return_type(json_void()); - }); - }); - - cd::enable_all_collectd.set(r, [](std::unique_ptr req) -> future { - bool enable = strcasecmp(req->get_query_param("enable").c_str(), "true") == 0; - return smp::invoke_on_all([enable] { - for (auto id: scollectd::get_collectd_ids()) { - scollectd::enable(id, enable); - } - }).then([] { - return json::json_return_type(json_void()); - }); - }); -} - -} - diff --git a/scylla/api/collectd.hh b/scylla/api/collectd.hh deleted file mode 100644 index bba870c..0000000 --- a/scylla/api/collectd.hh +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "api.hh" - -namespace api { - -void set_collectd(http_context& ctx, routes& r); - -} diff --git a/scylla/api/column_family.cc b/scylla/api/column_family.cc deleted file mode 100644 index 04b9bdb..0000000 --- a/scylla/api/column_family.cc +++ /dev/null @@ -1,909 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "column_family.hh" -#include "api/api-doc/column_family.json.hh" -#include -#include "http/exception.hh" -#include "sstables/sstables.hh" -#include "utils/estimated_histogram.hh" -#include - -namespace api { -using namespace httpd; - -using namespace std; -using namespace json; -namespace cf = httpd::column_family_json; - -const utils::UUID& get_uuid(const sstring& name, const database& db) { - auto pos = name.find("%3A"); - size_t end; - if (pos == sstring::npos) { - pos = name.find(":"); - if (pos == sstring::npos) { - throw bad_param_exception("Column family name should be in keyspace:column_family format"); - } - end = pos + 1; - } else { - end = pos + 3; - } - try { - return db.find_uuid(name.substr(0, pos), name.substr(end)); - } catch (std::out_of_range& e) { - throw bad_param_exception("Column family '" + name.substr(0, pos) + ":" - + name.substr(end) + "' not found"); - } -} - -future<> foreach_column_family(http_context& ctx, const sstring& name, function f) { - auto uuid = get_uuid(name, ctx.db.local()); - - return ctx.db.invoke_on_all([f, uuid](database& db) { - f(db.find_column_family(uuid)); - }); -} - -future get_cf_stats(http_context& ctx, const sstring& name, - int64_t column_family::stats::*f) { - return map_reduce_cf(ctx, name, int64_t(0), [f](const column_family& cf) { - return cf.get_stats().*f; - }, std::plus()); -} - -future get_cf_stats(http_context& ctx, - int64_t column_family::stats::*f) { - return map_reduce_cf(ctx, int64_t(0), [f](const column_family& cf) { - return cf.get_stats().*f; - }, std::plus()); -} - -static future get_cf_stats_count(http_context& ctx, const sstring& name, - utils::timed_rate_moving_average_and_histogram column_family::stats::*f) { - return map_reduce_cf(ctx, name, int64_t(0), [f](const column_family& cf) { - return (cf.get_stats().*f).hist.count; - }, std::plus()); -} - -static future get_cf_stats_sum(http_context& ctx, const sstring& name, - utils::timed_rate_moving_average_and_histogram column_family::stats::*f) { - auto uuid = get_uuid(name, ctx.db.local()); - return ctx.db.map_reduce0([uuid, f](database& db) { - // Histograms information is sample of the actual load - // so to get an estimation of sum, we multiply the mean - // with count. The information is gather in nano second, - // but reported in micro - column_family& cf = db.find_column_family(uuid); - return ((cf.get_stats().*f).hist.count/1000.0) * (cf.get_stats().*f).hist.mean; - }, 0.0, std::plus()).then([](double res) { - return make_ready_future((int64_t)res); - }); -} - - -static future get_cf_stats_count(http_context& ctx, - utils::timed_rate_moving_average_and_histogram column_family::stats::*f) { - return map_reduce_cf(ctx, int64_t(0), [f](const column_family& cf) { - return (cf.get_stats().*f).hist.count; - }, std::plus()); -} - -static future get_cf_histogram(http_context& ctx, const sstring& name, - utils::timed_rate_moving_average_and_histogram column_family::stats::*f) { - utils::UUID uuid = get_uuid(name, ctx.db.local()); - return ctx.db.map_reduce0([f, uuid](const database& p) { - return (p.find_column_family(uuid).get_stats().*f).hist;}, - utils::ihistogram(), - std::plus()) - .then([](const utils::ihistogram& val) { - return make_ready_future(to_json(val)); - }); -} - -static future get_cf_histogram(http_context& ctx, utils::timed_rate_moving_average_and_histogram column_family::stats::*f) { - std::function fun = [f] (const database& db) { - utils::ihistogram res; - for (auto i : db.get_column_families()) { - res += (i.second->get_stats().*f).hist; - } - return res; - }; - return ctx.db.map(fun).then([](const std::vector &res) { - std::vector r; - boost::copy(res | boost::adaptors::transformed(to_json), std::back_inserter(r)); - return make_ready_future(r); - }); -} - -static future get_cf_rate_and_histogram(http_context& ctx, const sstring& name, - utils::timed_rate_moving_average_and_histogram column_family::stats::*f) { - utils::UUID uuid = get_uuid(name, ctx.db.local()); - return ctx.db.map_reduce0([f, uuid](const database& p) { - return (p.find_column_family(uuid).get_stats().*f).rate();}, - utils::rate_moving_average_and_histogram(), - std::plus()) - .then([](const utils::rate_moving_average_and_histogram& val) { - return make_ready_future(timer_to_json(val)); - }); -} - -static future get_cf_rate_and_histogram(http_context& ctx, utils::timed_rate_moving_average_and_histogram column_family::stats::*f) { - std::function fun = [f] (const database& db) { - utils::rate_moving_average_and_histogram res; - for (auto i : db.get_column_families()) { - res += (i.second->get_stats().*f).rate(); - } - return res; - }; - return ctx.db.map(fun).then([](const std::vector &res) { - std::vector r; - boost::copy(res | boost::adaptors::transformed(timer_to_json), std::back_inserter(r)); - return make_ready_future(r); - }); -} - -static future get_cf_unleveled_sstables(http_context& ctx, const sstring& name) { - return map_reduce_cf(ctx, name, int64_t(0), [](const column_family& cf) { - return cf.get_unleveled_sstables(); - }, std::plus()); -} - -static int64_t min_row_size(column_family& cf) { - int64_t res = INT64_MAX; - for (auto i: *cf.get_sstables() ) { - res = std::min(res, i->get_stats_metadata().estimated_row_size.min()); - } - return (res == INT64_MAX) ? 0 : res; -} - -static int64_t max_row_size(column_family& cf) { - int64_t res = 0; - for (auto i: *cf.get_sstables() ) { - res = std::max(i->get_stats_metadata().estimated_row_size.max(), res); - } - return res; -} - -static integral_ratio_holder mean_row_size(column_family& cf) { - integral_ratio_holder res; - for (auto i: *cf.get_sstables() ) { - auto c = i->get_stats_metadata().estimated_row_size.count(); - res.sub += i->get_stats_metadata().estimated_row_size.mean() * c; - res.total += c; - } - return res; -} - -static std::unordered_map merge_maps(std::unordered_map a, - const std::unordered_map& b) { - a.insert(b.begin(), b.end()); - return a; -} - -static json::json_return_type sum_map(const std::unordered_map& val) { - uint64_t res = 0; - for (auto i : val) { - res += i.second; - } - return res; -} - -static future sum_sstable(http_context& ctx, const sstring name, bool total) { - auto uuid = get_uuid(name, ctx.db.local()); - return ctx.db.map_reduce0([uuid, total](database& db) { - std::unordered_map m; - auto sstables = (total) ? db.find_column_family(uuid).get_sstables_including_compacted_undeleted() : - db.find_column_family(uuid).get_sstables(); - for (auto t : *sstables) { - m[t->get_filename()] = t->bytes_on_disk(); - } - return m; - }, std::unordered_map(), merge_maps). - then([](const std::unordered_map& val) { - return sum_map(val); - }); -} - - -static future sum_sstable(http_context& ctx, bool total) { - return map_reduce_cf_raw(ctx, std::unordered_map(), [total](column_family& cf) { - std::unordered_map m; - auto sstables = (total) ? cf.get_sstables_including_compacted_undeleted() : - cf.get_sstables(); - for (auto t : *sstables) { - m[t->get_filename()] = t->bytes_on_disk(); - } - return m; - },merge_maps).then([](const std::unordered_map& val) { - return sum_map(val); - }); -} - -template -class sum_ratio { - uint64_t _n = 0; - T _total = 0; -public: - future<> operator()(T value) { - if (value > 0) { - _total += value; - _n++; - } - return make_ready_future<>(); - } - // Returns average value of all registered ratios. - T get() && { - return _n ? (_total / _n) : T(0); - } -}; - -static double get_compression_ratio(column_family& cf) { - sum_ratio result; - for (auto i : *cf.get_sstables()) { - auto compression_ratio = i->get_compression_ratio(); - if (compression_ratio != sstables::metadata_collector::NO_COMPRESSION_RATIO) { - result(compression_ratio); - } - } - return std::move(result).get(); -} - -static std::vector concat_sstable_count_per_level(std::vector a, std::vector&& b) { - a.resize(std::max(a.size(), b.size()), 0UL); - for (auto i = 0U; i < b.size(); i++) { - a[i] += b[i]; - } - return a; -} - -ratio_holder filter_false_positive_as_ratio_holder(const sstables::shared_sstable& sst) { - double f = sst->filter_get_false_positive(); - return ratio_holder(f + sst->filter_get_true_positive(), f); -} - -ratio_holder filter_recent_false_positive_as_ratio_holder(const sstables::shared_sstable& sst) { - double f = sst->filter_get_recent_false_positive(); - return ratio_holder(f + sst->filter_get_recent_true_positive(), f); -} - -void set_column_family(http_context& ctx, routes& r) { - cf::get_column_family_name.set(r, [&ctx] (const_req req){ - vector res; - for (auto i: ctx.db.local().get_column_families_mapping()) { - res.push_back(i.first.first + ":" + i.first.second); - } - return res; - }); - - cf::get_column_family.set(r, [&ctx] (const_req req){ - vector res; - for (auto i: ctx.db.local().get_column_families_mapping()) { - cf::column_family_info info; - info.ks = i.first.first; - info.cf = i.first.second; - info.type = "ColumnFamilies"; - res.push_back(info); - } - return res; - }); - - cf::get_column_family_name_keyspace.set(r, [&ctx] (const_req req){ - vector res; - for (auto i = ctx.db.local().get_keyspaces().cbegin(); i!= ctx.db.local().get_keyspaces().cend(); i++) { - res.push_back(i->first); - } - return res; - }); - - cf::get_memtable_columns_count.set(r, [&ctx] (std::unique_ptr req) { - return map_reduce_cf(ctx, req->param["name"], 0, [](column_family& cf) { - return cf.active_memtable().partition_count(); - }, std::plus()); - }); - - cf::get_all_memtable_columns_count.set(r, [&ctx] (std::unique_ptr req) { - return map_reduce_cf(ctx, 0, [](column_family& cf) { - return cf.active_memtable().partition_count(); - }, std::plus()); - }); - - cf::get_memtable_on_heap_size.set(r, [] (const_req req) { - return 0; - }); - - cf::get_all_memtable_on_heap_size.set(r, [] (const_req req) { - return 0; - }); - - cf::get_memtable_off_heap_size.set(r, [&ctx] (std::unique_ptr req) { - return map_reduce_cf(ctx, req->param["name"], int64_t(0), [](column_family& cf) { - return cf.active_memtable().region().occupancy().total_space(); - }, std::plus()); - }); - - cf::get_all_memtable_off_heap_size.set(r, [&ctx] (std::unique_ptr req) { - return map_reduce_cf(ctx, int64_t(0), [](column_family& cf) { - return cf.active_memtable().region().occupancy().total_space(); - }, std::plus()); - }); - - cf::get_memtable_live_data_size.set(r, [&ctx] (std::unique_ptr req) { - return map_reduce_cf(ctx, req->param["name"], int64_t(0), [](column_family& cf) { - return cf.active_memtable().region().occupancy().used_space(); - }, std::plus()); - }); - - cf::get_all_memtable_live_data_size.set(r, [&ctx] (std::unique_ptr req) { - return map_reduce_cf(ctx, int64_t(0), [](column_family& cf) { - return cf.active_memtable().region().occupancy().used_space(); - }, std::plus()); - }); - - cf::get_cf_all_memtables_on_heap_size.set(r, [] (const_req req) { - return 0; - }); - - cf::get_all_cf_all_memtables_on_heap_size.set(r, [] (const_req req) { - return 0; - }); - - cf::get_cf_all_memtables_off_heap_size.set(r, [&ctx] (std::unique_ptr req) { - warn(unimplemented::cause::INDEXES); - return map_reduce_cf(ctx, req->param["name"], int64_t(0), [](column_family& cf) { - return cf.occupancy().total_space(); - }, std::plus()); - }); - - cf::get_all_cf_all_memtables_off_heap_size.set(r, [&ctx] (std::unique_ptr req) { - warn(unimplemented::cause::INDEXES); - return ctx.db.map_reduce0([](const database& db){ - return db.dirty_memory_region_group().memory_used(); - }, int64_t(0), std::plus()).then([](int res) { - return make_ready_future(res); - }); - }); - - cf::get_cf_all_memtables_live_data_size.set(r, [&ctx] (std::unique_ptr req) { - warn(unimplemented::cause::INDEXES); - return map_reduce_cf(ctx, req->param["name"], int64_t(0), [](column_family& cf) { - return cf.occupancy().used_space(); - }, std::plus()); - }); - - cf::get_all_cf_all_memtables_live_data_size.set(r, [&ctx] (std::unique_ptr req) { - warn(unimplemented::cause::INDEXES); - return map_reduce_cf(ctx, int64_t(0), [](column_family& cf) { - return cf.active_memtable().region().occupancy().used_space(); - }, std::plus()); - }); - - cf::get_memtable_switch_count.set(r, [&ctx] (std::unique_ptr req) { - return get_cf_stats(ctx,req->param["name"] ,&column_family::stats::memtable_switch_count); - }); - - cf::get_all_memtable_switch_count.set(r, [&ctx] (std::unique_ptr req) { - return get_cf_stats(ctx, &column_family::stats::memtable_switch_count); - }); - - cf::get_estimated_row_size_histogram.set(r, [&ctx] (std::unique_ptr req) { - return map_reduce_cf(ctx, req->param["name"], utils::estimated_histogram(0), [](column_family& cf) { - utils::estimated_histogram res(0); - for (auto i: *cf.get_sstables() ) { - res.merge(i->get_stats_metadata().estimated_row_size); - } - return res; - }, - utils::estimated_histogram_merge, utils_json::estimated_histogram()); - }); - - cf::get_estimated_row_count.set(r, [&ctx] (std::unique_ptr req) { - return map_reduce_cf(ctx, req->param["name"], int64_t(0), [](column_family& cf) { - uint64_t res = 0; - for (auto i: *cf.get_sstables() ) { - res += i->get_stats_metadata().estimated_row_size.count(); - } - return res; - }, - std::plus()); - }); - - cf::get_estimated_column_count_histogram.set(r, [&ctx] (std::unique_ptr req) { - return map_reduce_cf(ctx, req->param["name"], utils::estimated_histogram(0), [](column_family& cf) { - utils::estimated_histogram res(0); - for (auto i: *cf.get_sstables() ) { - res.merge(i->get_stats_metadata().estimated_column_count); - } - return res; - }, - utils::estimated_histogram_merge, utils_json::estimated_histogram()); - }); - - cf::get_all_compression_ratio.set(r, [] (std::unique_ptr req) { - //TBD - unimplemented(); - return make_ready_future(0); - }); - - cf::get_pending_flushes.set(r, [&ctx] (std::unique_ptr req) { - return get_cf_stats(ctx,req->param["name"] ,&column_family::stats::pending_flushes); - }); - - cf::get_all_pending_flushes.set(r, [&ctx] (std::unique_ptr req) { - return get_cf_stats(ctx, &column_family::stats::pending_flushes); - }); - - cf::get_read.set(r, [&ctx] (std::unique_ptr req) { - return get_cf_stats_count(ctx,req->param["name"] ,&column_family::stats::reads); - }); - - cf::get_all_read.set(r, [&ctx] (std::unique_ptr req) { - return get_cf_stats_count(ctx, &column_family::stats::reads); - }); - - cf::get_write.set(r, [&ctx] (std::unique_ptr req) { - return get_cf_stats_count(ctx, req->param["name"] ,&column_family::stats::writes); - }); - - cf::get_all_write.set(r, [&ctx] (std::unique_ptr req) { - return get_cf_stats_count(ctx, &column_family::stats::writes); - }); - - cf::get_read_latency_histogram_depricated.set(r, [&ctx] (std::unique_ptr req) { - return get_cf_histogram(ctx, req->param["name"], &column_family::stats::reads); - }); - - cf::get_read_latency_histogram.set(r, [&ctx] (std::unique_ptr req) { - return get_cf_rate_and_histogram(ctx, req->param["name"], &column_family::stats::reads); - }); - - cf::get_read_latency.set(r, [&ctx] (std::unique_ptr req) { - return get_cf_stats_sum(ctx,req->param["name"] ,&column_family::stats::reads); - }); - - cf::get_write_latency.set(r, [&ctx] (std::unique_ptr req) { - return get_cf_stats_sum(ctx, req->param["name"] ,&column_family::stats::writes); - }); - - cf::get_all_read_latency_histogram_depricated.set(r, [&ctx] (std::unique_ptr req) { - return get_cf_histogram(ctx, &column_family::stats::writes); - }); - - cf::get_all_read_latency_histogram.set(r, [&ctx] (std::unique_ptr req) { - return get_cf_rate_and_histogram(ctx, &column_family::stats::writes); - }); - - cf::get_write_latency_histogram_depricated.set(r, [&ctx] (std::unique_ptr req) { - return get_cf_histogram(ctx, req->param["name"], &column_family::stats::writes); - }); - - cf::get_write_latency_histogram.set(r, [&ctx] (std::unique_ptr req) { - return get_cf_rate_and_histogram(ctx, req->param["name"], &column_family::stats::writes); - }); - - cf::get_all_write_latency_histogram_depricated.set(r, [&ctx] (std::unique_ptr req) { - return get_cf_histogram(ctx, &column_family::stats::writes); - }); - - cf::get_all_write_latency_histogram.set(r, [&ctx] (std::unique_ptr req) { - return get_cf_rate_and_histogram(ctx, &column_family::stats::writes); - }); - - cf::get_pending_compactions.set(r, [&ctx] (std::unique_ptr req) { - return map_reduce_cf(ctx, req->param["name"], int64_t(0), [](column_family& cf) { - return cf.get_compaction_strategy().estimated_pending_compactions(cf); - }, std::plus()); - }); - - cf::get_all_pending_compactions.set(r, [&ctx] (std::unique_ptr req) { - return map_reduce_cf(ctx, int64_t(0), [](column_family& cf) { - return cf.get_compaction_strategy().estimated_pending_compactions(cf); - }, std::plus()); - }); - - cf::get_live_ss_table_count.set(r, [&ctx] (std::unique_ptr req) { - return get_cf_stats(ctx, req->param["name"], &column_family::stats::live_sstable_count); - }); - - cf::get_all_live_ss_table_count.set(r, [&ctx] (std::unique_ptr req) { - return get_cf_stats(ctx, &column_family::stats::live_sstable_count); - }); - - cf::get_unleveled_sstables.set(r, [&ctx] (std::unique_ptr req) { - return get_cf_unleveled_sstables(ctx, req->param["name"]); - }); - - cf::get_live_disk_space_used.set(r, [&ctx] (std::unique_ptr req) { - return sum_sstable(ctx, req->param["name"], false); - }); - - cf::get_all_live_disk_space_used.set(r, [&ctx] (std::unique_ptr req) { - return sum_sstable(ctx, false); - }); - - cf::get_total_disk_space_used.set(r, [&ctx] (std::unique_ptr req) { - return sum_sstable(ctx, req->param["name"], true); - }); - - cf::get_all_total_disk_space_used.set(r, [&ctx] (std::unique_ptr req) { - return sum_sstable(ctx, true); - }); - - cf::get_min_row_size.set(r, [&ctx] (std::unique_ptr req) { - return map_reduce_cf(ctx, req->param["name"], INT64_MAX, min_row_size, min_int64); - }); - - cf::get_all_min_row_size.set(r, [&ctx] (std::unique_ptr req) { - return map_reduce_cf(ctx, INT64_MAX, min_row_size, min_int64); - }); - - cf::get_max_row_size.set(r, [&ctx] (std::unique_ptr req) { - return map_reduce_cf(ctx, req->param["name"], int64_t(0), max_row_size, max_int64); - }); - - cf::get_all_max_row_size.set(r, [&ctx] (std::unique_ptr req) { - return map_reduce_cf(ctx, int64_t(0), max_row_size, max_int64); - }); - - cf::get_mean_row_size.set(r, [&ctx] (std::unique_ptr req) { - // Cassandra 3.x mean values are truncated as integrals. - return map_reduce_cf(ctx, req->param["name"], integral_ratio_holder(), mean_row_size, std::plus()); - }); - - cf::get_all_mean_row_size.set(r, [&ctx] (std::unique_ptr req) { - // Cassandra 3.x mean values are truncated as integrals. - return map_reduce_cf(ctx, integral_ratio_holder(), mean_row_size, std::plus()); - }); - - cf::get_bloom_filter_false_positives.set(r, [&ctx] (std::unique_ptr req) { - return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (column_family& cf) { - return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) { - return s + sst->filter_get_false_positive(); - }); - }, std::plus()); - }); - - cf::get_all_bloom_filter_false_positives.set(r, [&ctx] (std::unique_ptr req) { - return map_reduce_cf(ctx, uint64_t(0), [] (column_family& cf) { - return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) { - return s + sst->filter_get_false_positive(); - }); - }, std::plus()); - }); - - cf::get_recent_bloom_filter_false_positives.set(r, [&ctx] (std::unique_ptr req) { - return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (column_family& cf) { - return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) { - return s + sst->filter_get_recent_false_positive(); - }); - }, std::plus()); - }); - - cf::get_all_recent_bloom_filter_false_positives.set(r, [&ctx] (std::unique_ptr req) { - return map_reduce_cf(ctx, uint64_t(0), [] (column_family& cf) { - return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) { - return s + sst->filter_get_recent_false_positive(); - }); - }, std::plus()); - }); - - cf::get_bloom_filter_false_ratio.set(r, [&ctx] (std::unique_ptr req) { - return map_reduce_cf(ctx, req->param["name"], ratio_holder(), [] (column_family& cf) { - return boost::accumulate(*cf.get_sstables() | boost::adaptors::transformed(filter_false_positive_as_ratio_holder), ratio_holder()); - }, std::plus<>()); - }); - - cf::get_all_bloom_filter_false_ratio.set(r, [&ctx] (std::unique_ptr req) { - return map_reduce_cf(ctx, ratio_holder(), [] (column_family& cf) { - return boost::accumulate(*cf.get_sstables() | boost::adaptors::transformed(filter_false_positive_as_ratio_holder), ratio_holder()); - }, std::plus<>()); - }); - - cf::get_recent_bloom_filter_false_ratio.set(r, [&ctx] (std::unique_ptr req) { - return map_reduce_cf(ctx, req->param["name"], ratio_holder(), [] (column_family& cf) { - return boost::accumulate(*cf.get_sstables() | boost::adaptors::transformed(filter_recent_false_positive_as_ratio_holder), ratio_holder()); - }, std::plus<>()); - }); - - cf::get_all_recent_bloom_filter_false_ratio.set(r, [&ctx] (std::unique_ptr req) { - return map_reduce_cf(ctx, ratio_holder(), [] (column_family& cf) { - return boost::accumulate(*cf.get_sstables() | boost::adaptors::transformed(filter_recent_false_positive_as_ratio_holder), ratio_holder()); - }, std::plus<>()); - }); - - cf::get_bloom_filter_disk_space_used.set(r, [&ctx] (std::unique_ptr req) { - return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (column_family& cf) { - return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) { - return sst->filter_size(); - }); - }, std::plus()); - }); - - cf::get_all_bloom_filter_disk_space_used.set(r, [&ctx] (std::unique_ptr req) { - return map_reduce_cf(ctx, uint64_t(0), [] (column_family& cf) { - return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) { - return sst->filter_size(); - }); - }, std::plus()); - }); - - cf::get_bloom_filter_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr req) { - return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (column_family& cf) { - return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) { - return sst->filter_memory_size(); - }); - }, std::plus()); - }); - - cf::get_all_bloom_filter_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr req) { - return map_reduce_cf(ctx, uint64_t(0), [] (column_family& cf) { - return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) { - return sst->filter_memory_size(); - }); - }, std::plus()); - }); - - cf::get_index_summary_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr req) { - return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (column_family& cf) { - return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) { - return sst->get_summary().memory_footprint(); - }); - }, std::plus()); - }); - - cf::get_all_index_summary_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr req) { - return map_reduce_cf(ctx, uint64_t(0), [] (column_family& cf) { - return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) { - return sst->get_summary().memory_footprint(); - }); - }, std::plus()); - }); - - cf::get_compression_metadata_off_heap_memory_used.set(r, [] (std::unique_ptr req) { - //TBD - // FIXME - // We are missing the off heap memory calculation - // Return 0 is the wrong value. It's a work around - // until the memory calculation will be available - //auto id = get_uuid(req->param["name"], ctx.db.local()); - return make_ready_future(0); - }); - - cf::get_all_compression_metadata_off_heap_memory_used.set(r, [] (std::unique_ptr req) { - //TBD - unimplemented(); - return make_ready_future(0); - }); - - cf::get_speculative_retries.set(r, [] (std::unique_ptr req) { - //TBD - unimplemented(); - //auto id = get_uuid(req->param["name"], ctx.db.local()); - return make_ready_future(0); - }); - - cf::get_all_speculative_retries.set(r, [] (std::unique_ptr req) { - //TBD - unimplemented(); - return make_ready_future(0); - }); - - cf::get_key_cache_hit_rate.set(r, [] (std::unique_ptr req) { - //TBD - unimplemented(); - //auto id = get_uuid(req->param["name"], ctx.db.local()); - return make_ready_future(0); - }); - - cf::get_true_snapshots_size.set(r, [&ctx] (std::unique_ptr req) { - auto uuid = get_uuid(req->param["name"], ctx.db.local()); - return ctx.db.local().find_column_family(uuid).get_snapshot_details().then([]( - const std::unordered_map& sd) { - int64_t res = 0; - for (auto i : sd) { - res += i.second.total; - } - return make_ready_future(res); - }); - }); - - cf::get_all_true_snapshots_size.set(r, [] (std::unique_ptr req) { - //TBD - unimplemented(); - return make_ready_future(0); - }); - - cf::get_row_cache_hit_out_of_range.set(r, [] (std::unique_ptr req) { - //TBD - unimplemented(); - //auto id = get_uuid(req->param["name"], ctx.db.local()); - return make_ready_future(0); - }); - - cf::get_all_row_cache_hit_out_of_range.set(r, [] (std::unique_ptr req) { - //TBD - unimplemented(); - return make_ready_future(0); - }); - - cf::get_row_cache_hit.set(r, [&ctx] (std::unique_ptr req) { - return map_reduce_cf_raw(ctx, req->param["name"], utils::rate_moving_average(), [](const column_family& cf) { - return cf.get_row_cache().stats().hits.rate(); - }, std::plus()).then([](const utils::rate_moving_average& m) { - return make_ready_future(meter_to_json(m)); - }); - }); - - cf::get_all_row_cache_hit.set(r, [&ctx] (std::unique_ptr req) { - return map_reduce_cf_raw(ctx, utils::rate_moving_average(), [](const column_family& cf) { - return cf.get_row_cache().stats().hits.rate(); - }, std::plus()).then([](const utils::rate_moving_average& m) { - return make_ready_future(meter_to_json(m)); - }); - }); - - cf::get_row_cache_miss.set(r, [&ctx] (std::unique_ptr req) { - return map_reduce_cf_raw(ctx, req->param["name"], utils::rate_moving_average(), [](const column_family& cf) { - return cf.get_row_cache().stats().misses.rate(); - }, std::plus()).then([](const utils::rate_moving_average& m) { - return make_ready_future(meter_to_json(m)); - }); - }); - - cf::get_all_row_cache_miss.set(r, [&ctx] (std::unique_ptr req) { - return map_reduce_cf_raw(ctx, utils::rate_moving_average(), [](const column_family& cf) { - return cf.get_row_cache().stats().misses.rate(); - }, std::plus()).then([](const utils::rate_moving_average& m) { - return make_ready_future(meter_to_json(m)); - }); - - }); - - cf::get_cas_prepare.set(r, [] (std::unique_ptr req) { - //TBD - unimplemented(); - //auto id = get_uuid(req->param["name"], ctx.db.local()); - return make_ready_future(0); - }); - - cf::get_cas_propose.set(r, [] (std::unique_ptr req) { - //TBD - unimplemented(); - //auto id = get_uuid(req->param["name"], ctx.db.local()); - return make_ready_future(0); - }); - - cf::get_cas_commit.set(r, [] (std::unique_ptr req) { - //TBD - unimplemented(); - //auto id = get_uuid(req->param["name"], ctx.db.local()); - return make_ready_future(0); - }); - - cf::get_sstables_per_read_histogram.set(r, [&ctx] (std::unique_ptr req) { - return map_reduce_cf(ctx, req->param["name"], utils::estimated_histogram(0), [](column_family& cf) { - return cf.get_stats().estimated_sstable_per_read; - }, - utils::estimated_histogram_merge, utils_json::estimated_histogram()); - }); - - cf::get_tombstone_scanned_histogram.set(r, [&ctx] (std::unique_ptr req) { - return get_cf_histogram(ctx, req->param["name"], &column_family::stats::tombstone_scanned); - }); - - cf::get_live_scanned_histogram.set(r, [&ctx] (std::unique_ptr req) { - return get_cf_histogram(ctx, req->param["name"], &column_family::stats::live_scanned); - }); - - cf::get_col_update_time_delta_histogram.set(r, [] (std::unique_ptr req) { - //TBD - unimplemented(); - //auto id = get_uuid(req->param["name"], ctx.db.local()); - std::vector res; - return make_ready_future(res); - }); - - cf::is_auto_compaction_disabled.set(r, [] (const_req req) { - // FIXME - // currently auto compaction is disable - // it should be changed when it would have an API - return true; - }); - - cf::get_built_indexes.set(r, [](const_req) { - // FIXME - // Currently there are no index support - return std::vector(); - }); - - - cf::get_compression_metadata_off_heap_memory_used.set(r, [](const_req) { - // FIXME - // Currently there are no information on the compression - // metadata, so we return 0 - return 0; - }); - - cf::get_compression_parameters.set(r, [](const_req) { - // FIXME - // Currently there are no compression parameters available - // so we return an empty map - return std::vector(); - }); - - cf::get_compression_ratio.set(r, [&ctx](std::unique_ptr req) { - auto uuid = get_uuid(req->param["name"], ctx.db.local()); - - return ctx.db.map_reduce(sum_ratio(), [uuid](database& db) { - column_family& cf = db.find_column_family(uuid); - return make_ready_future(get_compression_ratio(cf)); - }).then([] (const double& result) { - return make_ready_future(result); - }); - }); - - cf::get_read_latency_estimated_histogram.set(r, [&ctx](std::unique_ptr req) { - return map_reduce_cf(ctx, req->param["name"], utils::estimated_histogram(0), [](column_family& cf) { - return cf.get_stats().estimated_read; - }, - utils::estimated_histogram_merge, utils_json::estimated_histogram()); - }); - - cf::get_write_latency_estimated_histogram.set(r, [&ctx](std::unique_ptr req) { - return map_reduce_cf(ctx, req->param["name"], utils::estimated_histogram(0), [](column_family& cf) { - return cf.get_stats().estimated_write; - }, - utils::estimated_histogram_merge, utils_json::estimated_histogram()); - }); - - cf::set_compaction_strategy_class.set(r, [&ctx](std::unique_ptr req) { - sstring strategy = req->get_query_param("class_name"); - return foreach_column_family(ctx, req->param["name"], [strategy](column_family& cf) { - cf.set_compaction_strategy(sstables::compaction_strategy::type(strategy)); - }).then([] { - return make_ready_future(json_void()); - }); - }); - - cf::get_compaction_strategy_class.set(r, [&ctx](const_req req) { - return ctx.db.local().find_column_family(get_uuid(req.param["name"], ctx.db.local())).get_compaction_strategy().name(); - }); - - cf::set_compression_parameters.set(r, [&ctx](std::unique_ptr req) { - // TBD - unimplemented(); - return make_ready_future(json_void()); - }); - - cf::set_crc_check_chance.set(r, [&ctx](std::unique_ptr req) { - // TBD - unimplemented(); - return make_ready_future(json_void()); - }); - - cf::get_sstable_count_per_level.set(r, [&ctx](std::unique_ptr req) { - return map_reduce_cf_raw(ctx, req->param["name"], std::vector(), [](const column_family& cf) { - return cf.sstable_count_per_level(); - }, concat_sstable_count_per_level).then([](const std::vector& res) { - return make_ready_future(res); - }); - }); -} -} diff --git a/scylla/api/column_family.hh b/scylla/api/column_family.hh deleted file mode 100644 index 00d173e..0000000 --- a/scylla/api/column_family.hh +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "api.hh" -#include "api/api-doc/column_family.json.hh" -#include "database.hh" - -namespace api { - -void set_column_family(http_context& ctx, routes& r); - -const utils::UUID& get_uuid(const sstring& name, const database& db); -future<> foreach_column_family(http_context& ctx, const sstring& name, std::function f); - - -template -future map_reduce_cf_raw(http_context& ctx, const sstring& name, I init, - Mapper mapper, Reducer reducer) { - auto uuid = get_uuid(name, ctx.db.local()); - return ctx.db.map_reduce0([mapper, uuid](database& db) { - return mapper(db.find_column_family(uuid)); - }, init, reducer); -} - - -template -future map_reduce_cf(http_context& ctx, const sstring& name, I init, - Mapper mapper, Reducer reducer) { - return map_reduce_cf_raw(ctx, name, init, mapper, reducer).then([](const I& res) { - return make_ready_future(res); - }); -} - -template -future map_reduce_cf_raw(http_context& ctx, const sstring& name, I init, - Mapper mapper, Reducer reducer, Result result) { - auto uuid = get_uuid(name, ctx.db.local()); - return ctx.db.map_reduce0([mapper, uuid](database& db) { - return mapper(db.find_column_family(uuid)); - }, init, reducer); -} - - -template -future map_reduce_cf(http_context& ctx, const sstring& name, I init, - Mapper mapper, Reducer reducer, Result result) { - return map_reduce_cf_raw(ctx, name, init, mapper, reducer, result).then([result](const I& res) mutable { - result = res; - return make_ready_future(result); - }); -} - -template -future map_reduce_cf_raw(http_context& ctx, I init, - Mapper mapper, Reducer reducer) { - return ctx.db.map_reduce0([mapper, init, reducer](database& db) { - auto res = init; - for (auto i : db.get_column_families()) { - res = reducer(res, mapper(*i.second.get())); - } - return res; - }, init, reducer); -} - - -template -future map_reduce_cf(http_context& ctx, I init, - Mapper mapper, Reducer reducer) { - return map_reduce_cf_raw(ctx, init, mapper, reducer).then([](const I& res) { - return make_ready_future(res); - }); -} - -future get_cf_stats(http_context& ctx, const sstring& name, - int64_t column_family::stats::*f); - -future get_cf_stats(http_context& ctx, - int64_t column_family::stats::*f); - -} diff --git a/scylla/api/commitlog.cc b/scylla/api/commitlog.cc deleted file mode 100644 index b54a883..0000000 --- a/scylla/api/commitlog.cc +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "commitlog.hh" -#include -#include "api/api-doc/commitlog.json.hh" -#include - -namespace api { - -template -static auto acquire_cl_metric(http_context& ctx, Func&& func) { - typedef std::result_of_t ret_type; - - return ctx.db.map_reduce0([func = std::forward(func)](database& db) { - if (db.commitlog() == nullptr) { - return make_ready_future(); - } - return make_ready_future(func(db.commitlog())); - }, ret_type(), std::plus()).then([](ret_type res) { - return make_ready_future(res); - }); -} - -void set_commitlog(http_context& ctx, routes& r) { - httpd::commitlog_json::get_active_segment_names.set(r, - [&ctx](std::unique_ptr req) { - auto res = make_shared>(); - return ctx.db.map_reduce([res](std::vector names) { - res->insert(res->end(), names.begin(), names.end()); - }, [](database& db) { - if (db.commitlog() == nullptr) { - return make_ready_future>(std::vector()); - } - return make_ready_future>(db.commitlog()->get_active_segment_names()); - }).then([res] { - return make_ready_future(*res.get()); - }); - }); - - // We currently do not support archive segments - httpd::commitlog_json::get_archiving_segment_names.set(r, [](const_req req) { - std::vector res; - return res; - }); - - httpd::commitlog_json::get_completed_tasks.set(r, [&ctx](std::unique_ptr req) { - return acquire_cl_metric(ctx, std::bind(&db::commitlog::get_completed_tasks, std::placeholders::_1)); - }); - - httpd::commitlog_json::get_pending_tasks.set(r, [&ctx](std::unique_ptr req) { - return acquire_cl_metric(ctx, std::bind(&db::commitlog::get_pending_tasks, std::placeholders::_1)); - }); - - httpd::commitlog_json::get_total_commit_log_size.set(r, [&ctx](std::unique_ptr req) { - return acquire_cl_metric(ctx, std::bind(&db::commitlog::get_total_size, std::placeholders::_1)); - }); -} - -} diff --git a/scylla/api/commitlog.hh b/scylla/api/commitlog.hh deleted file mode 100644 index f727aec..0000000 --- a/scylla/api/commitlog.hh +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "api.hh" - -namespace api { - -void set_commitlog(http_context& ctx, routes& r); - -} diff --git a/scylla/api/compaction_manager.cc b/scylla/api/compaction_manager.cc deleted file mode 100644 index b01b3d7..0000000 --- a/scylla/api/compaction_manager.cc +++ /dev/null @@ -1,141 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "compaction_manager.hh" -#include "api/api-doc/compaction_manager.json.hh" -#include "db/system_keyspace.hh" -#include "column_family.hh" - -namespace api { - -namespace cm = httpd::compaction_manager_json; -using namespace json; - -static future get_cm_stats(http_context& ctx, - int64_t compaction_manager::stats::*f) { - return ctx.db.map_reduce0([f](database& db) { - return db.get_compaction_manager().get_stats().*f; - }, int64_t(0), std::plus()).then([](const int64_t& res) { - return make_ready_future(res); - }); -} - -void set_compaction_manager(http_context& ctx, routes& r) { - cm::get_compactions.set(r, [&ctx] (std::unique_ptr req) { - return ctx.db.map_reduce0([](database& db) { - std::vector summaries; - const compaction_manager& cm = db.get_compaction_manager(); - - for (const auto& c : cm.get_compactions()) { - cm::summary s; - s.ks = c->ks; - s.cf = c->cf; - s.unit = "keys"; - s.task_type = sstables::compaction_name(c->type); - s.completed = c->total_keys_written; - s.total = c->total_partitions; - summaries.push_back(std::move(s)); - } - return summaries; - }, std::vector(), concat).then([](const std::vector& res) { - return make_ready_future(res); - }); - }); - - cm::force_user_defined_compaction.set(r, [] (std::unique_ptr req) { - //TBD - // FIXME - warn(unimplemented::cause::API); - return make_ready_future(json_void()); - }); - - cm::stop_compaction.set(r, [&ctx] (std::unique_ptr req) { - auto type = req->get_query_param("type"); - return ctx.db.invoke_on_all([type] (database& db) { - auto& cm = db.get_compaction_manager(); - cm.stop_compaction(type); - }).then([] { - return make_ready_future(json_void()); - }); - }); - - cm::get_pending_tasks.set(r, [&ctx] (std::unique_ptr req) { - return map_reduce_cf(ctx, int64_t(0), [](column_family& cf) { - return cf.get_compaction_strategy().estimated_pending_compactions(cf); - }, std::plus()); - }); - - cm::get_completed_tasks.set(r, [&ctx] (std::unique_ptr req) { - return get_cm_stats(ctx, &compaction_manager::stats::completed_tasks); - }); - - cm::get_total_compactions_completed.set(r, [] (std::unique_ptr req) { - // FIXME - // We are currently dont have an API for compaction - // so returning a 0 as the number of total compaction is ok - return make_ready_future(0); - }); - - cm::get_bytes_compacted.set(r, [] (std::unique_ptr req) { - //TBD - // FIXME - warn(unimplemented::cause::API); - return make_ready_future(0); - }); - - cm::get_compaction_history.set(r, [] (std::unique_ptr req) { - return db::system_keyspace::get_compaction_history().then([] (std::vector history) { - std::vector res; - res.reserve(history.size()); - - for (auto& entry : history) { - cm::history h; - h.id = entry.id.to_sstring(); - h.ks = std::move(entry.ks); - h.cf = std::move(entry.cf); - h.compacted_at = entry.compacted_at; - h.bytes_in = entry.bytes_in; - h.bytes_out = entry.bytes_out; - for (auto it : entry.rows_merged) { - httpd::compaction_manager_json::row_merged e; - e.key = it.first; - e.value = it.second; - h.rows_merged.push(std::move(e)); - } - res.push_back(std::move(h)); - } - - return make_ready_future(res); - }); - }); - - cm::get_compaction_info.set(r, [] (std::unique_ptr req) { - //TBD - // FIXME - warn(unimplemented::cause::API); - std::vector res; - return make_ready_future(res); - }); - -} - -} - diff --git a/scylla/api/compaction_manager.hh b/scylla/api/compaction_manager.hh deleted file mode 100644 index 7fc6261..0000000 --- a/scylla/api/compaction_manager.hh +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "api.hh" - -namespace api { - -void set_compaction_manager(http_context& ctx, routes& r); - -} diff --git a/scylla/api/endpoint_snitch.cc b/scylla/api/endpoint_snitch.cc deleted file mode 100644 index f9c7fe7..0000000 --- a/scylla/api/endpoint_snitch.cc +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "locator/snitch_base.hh" -#include "endpoint_snitch.hh" -#include "api/api-doc/endpoint_snitch_info.json.hh" -#include "utils/fb_utilities.hh" - -namespace api { - -void set_endpoint_snitch(http_context& ctx, routes& r) { - static auto host_or_broadcast = [](const_req req) { - auto host = req.get_query_param("host"); - return host.empty() ? gms::inet_address(utils::fb_utilities::get_broadcast_address()) : gms::inet_address(host); - }; - - httpd::endpoint_snitch_info_json::get_datacenter.set(r, [](const_req req) { - return locator::i_endpoint_snitch::get_local_snitch_ptr()->get_datacenter(host_or_broadcast(req)); - }); - - httpd::endpoint_snitch_info_json::get_rack.set(r, [](const_req req) { - return locator::i_endpoint_snitch::get_local_snitch_ptr()->get_rack(host_or_broadcast(req)); - }); - - httpd::endpoint_snitch_info_json::get_snitch_name.set(r, [] (const_req req) { - return locator::i_endpoint_snitch::get_local_snitch_ptr()->get_name(); - }); -} - -} diff --git a/scylla/api/endpoint_snitch.hh b/scylla/api/endpoint_snitch.hh deleted file mode 100644 index 340f453..0000000 --- a/scylla/api/endpoint_snitch.hh +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "api.hh" - -namespace api { - -void set_endpoint_snitch(http_context& ctx, routes& r); - -} diff --git a/scylla/api/failure_detector.cc b/scylla/api/failure_detector.cc deleted file mode 100644 index fdbc6e8..0000000 --- a/scylla/api/failure_detector.cc +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "failure_detector.hh" -#include "api/api-doc/failure_detector.json.hh" -#include "gms/failure_detector.hh" -#include "gms/application_state.hh" -#include "gms/gossiper.hh" -namespace api { - -namespace fd = httpd::failure_detector_json; - -void set_failure_detector(http_context& ctx, routes& r) { - fd::get_all_endpoint_states.set(r, [](std::unique_ptr req) { - std::vector res; - for (auto i : gms::get_local_gossiper().endpoint_state_map) { - fd::endpoint_state val; - val.addrs = boost::lexical_cast(i.first); - val.is_alive = i.second.is_alive(); - val.generation = i.second.get_heart_beat_state().get_generation(); - val.version = i.second.get_heart_beat_state().get_heart_beat_version(); - val.update_time = i.second.get_update_timestamp().time_since_epoch().count(); - for (auto a : i.second.get_application_state_map()) { - fd::version_value version_val; - // We return the enum index and not it's name to stay compatible to origin - // method that the state index are static but the name can be changed. - version_val.application_state = static_cast::type>(a.first); - version_val.value = a.second.value; - version_val.version = a.second.version; - val.application_state.push(version_val); - } - res.push_back(val); - } - return make_ready_future(res); - }); - - fd::get_up_endpoint_count.set(r, [](std::unique_ptr req) { - return gms::get_up_endpoint_count().then([](int res) { - return make_ready_future(res); - }); - }); - - fd::get_down_endpoint_count.set(r, [](std::unique_ptr req) { - return gms::get_down_endpoint_count().then([](int res) { - return make_ready_future(res); - }); - }); - - fd::get_phi_convict_threshold.set(r, [] (std::unique_ptr req) { - return gms::get_phi_convict_threshold().then([](double res) { - return make_ready_future(res); - }); - }); - - fd::get_simple_states.set(r, [] (std::unique_ptr req) { - return gms::get_simple_states().then([](const std::map& map) { - return make_ready_future(map_to_key_value(map)); - }); - }); - - fd::set_phi_convict_threshold.set(r, [](std::unique_ptr req) { - double phi = atof(req->get_query_param("phi").c_str()); - return gms::set_phi_convict_threshold(phi).then([]() { - return make_ready_future(""); - }); - }); - - fd::get_endpoint_state.set(r, [](std::unique_ptr req) { - return gms::get_endpoint_state(req->param["addr"]).then([](const sstring& state) { - return make_ready_future(state); - }); - }); - - fd::get_endpoint_phi_values.set(r, [](std::unique_ptr req) { - return gms::get_arrival_samples().then([](std::map map) { - std::vector res; - auto now = gms::arrival_window::clk::now(); - for (auto& p : map) { - fd::endpoint_phi_value val; - val.endpoint = p.first.to_sstring(); - val.phi = p.second.phi(now); - res.emplace_back(std::move(val)); - } - return make_ready_future(res); - }); - }); -} - -} - diff --git a/scylla/api/failure_detector.hh b/scylla/api/failure_detector.hh deleted file mode 100644 index affc44e..0000000 --- a/scylla/api/failure_detector.hh +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "api.hh" - -namespace api { - -void set_failure_detector(http_context& ctx, routes& r); - -} diff --git a/scylla/api/gossiper.cc b/scylla/api/gossiper.cc deleted file mode 100644 index 9f7f7fe..0000000 --- a/scylla/api/gossiper.cc +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "gossiper.hh" -#include "api/api-doc/gossiper.json.hh" -#include - -namespace api { -using namespace json; - -void set_gossiper(http_context& ctx, routes& r) { - httpd::gossiper_json::get_down_endpoint.set(r, [] (const_req req) { - auto res = gms::get_local_gossiper().get_unreachable_members(); - return container_to_vec(res); - }); - - httpd::gossiper_json::get_live_endpoint.set(r, [] (const_req req) { - auto res = gms::get_local_gossiper().get_live_members(); - return container_to_vec(res); - }); - - httpd::gossiper_json::get_endpoint_downtime.set(r, [] (const_req req) { - gms::inet_address ep(req.param["addr"]); - return gms::get_local_gossiper().get_endpoint_downtime(ep); - }); - - httpd::gossiper_json::get_current_generation_number.set(r, [] (std::unique_ptr req) { - gms::inet_address ep(req->param["addr"]); - return gms::get_local_gossiper().get_current_generation_number(ep).then([] (int res) { - return make_ready_future(res); - }); - }); - - httpd::gossiper_json::get_current_heart_beat_version.set(r, [] (std::unique_ptr req) { - gms::inet_address ep(req->param["addr"]); - return gms::get_local_gossiper().get_current_heart_beat_version(ep).then([] (int res) { - return make_ready_future(res); - }); - }); - - httpd::gossiper_json::assassinate_endpoint.set(r, [](std::unique_ptr req) { - if (req->get_query_param("unsafe") != "True") { - return gms::get_local_gossiper().assassinate_endpoint(req->param["addr"]).then([] { - return make_ready_future(json_void()); - }); - } - return gms::get_local_gossiper().unsafe_assassinate_endpoint(req->param["addr"]).then([] { - return make_ready_future(json_void()); - }); - }); -} - -} diff --git a/scylla/api/gossiper.hh b/scylla/api/gossiper.hh deleted file mode 100644 index ed95fcb..0000000 --- a/scylla/api/gossiper.hh +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "api.hh" - -namespace api { - -void set_gossiper(http_context& ctx, routes& r); - -} diff --git a/scylla/api/hinted_handoff.cc b/scylla/api/hinted_handoff.cc deleted file mode 100644 index 2f93e77..0000000 --- a/scylla/api/hinted_handoff.cc +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "hinted_handoff.hh" -#include "api/api-doc/hinted_handoff.json.hh" - -namespace api { - -using namespace json; -namespace hh = httpd::hinted_handoff_json; - -void set_hinted_handoff(http_context& ctx, routes& r) { - hh::list_endpoints_pending_hints.set(r, [] (std::unique_ptr req) { - //TBD - unimplemented(); - std::vector res; - return make_ready_future(res); - }); - - hh::truncate_all_hints.set(r, [] (std::unique_ptr req) { - //TBD - unimplemented(); - sstring host = req->get_query_param("host"); - return make_ready_future(json_void()); - }); - - hh::schedule_hint_delivery.set(r, [] (std::unique_ptr req) { - //TBD - unimplemented(); - sstring host = req->get_query_param("host"); - return make_ready_future(json_void()); - }); - - hh::pause_hints_delivery.set(r, [] (std::unique_ptr req) { - //TBD - unimplemented(); - sstring pause = req->get_query_param("pause"); - return make_ready_future(json_void()); - }); - - hh::get_create_hint_count.set(r, [] (std::unique_ptr req) { - //TBD - unimplemented(); - sstring host = req->get_query_param("host"); - return make_ready_future(0); - }); - - hh::get_not_stored_hints_count.set(r, [] (std::unique_ptr req) { - //TBD - unimplemented(); - sstring host = req->get_query_param("host"); - return make_ready_future(0); - }); -} - -} - diff --git a/scylla/api/hinted_handoff.hh b/scylla/api/hinted_handoff.hh deleted file mode 100644 index 5b39a6e..0000000 --- a/scylla/api/hinted_handoff.hh +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "api.hh" - -namespace api { - -void set_hinted_handoff(http_context& ctx, routes& r); - -} diff --git a/scylla/api/lsa.cc b/scylla/api/lsa.cc deleted file mode 100644 index c6bb624..0000000 --- a/scylla/api/lsa.cc +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "api/api-doc/lsa.json.hh" -#include "api/lsa.hh" -#include "api/api.hh" - -#include "http/exception.hh" -#include "utils/logalloc.hh" -#include "log.hh" - -namespace api { - -static logging::logger alogger("lsa-api"); - -void set_lsa(http_context& ctx, routes& r) { - httpd::lsa_json::lsa_compact.set(r, [&ctx](std::unique_ptr req) { - alogger.info("Triggering compaction"); - return ctx.db.invoke_on_all([] (database&) { - logalloc::shard_tracker().reclaim(std::numeric_limits::max()); - }).then([] { - return json::json_return_type(json::json_void()); - }); - }); -} - -} diff --git a/scylla/api/lsa.hh b/scylla/api/lsa.hh deleted file mode 100644 index 60e9330..0000000 --- a/scylla/api/lsa.hh +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "api.hh" - -namespace api { - -void set_lsa(http_context& ctx, routes& r); - -} diff --git a/scylla/api/messaging_service.cc b/scylla/api/messaging_service.cc deleted file mode 100644 index a646d45..0000000 --- a/scylla/api/messaging_service.cc +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "messaging_service.hh" -#include "message/messaging_service.hh" -#include "rpc/rpc_types.hh" -#include "api/api-doc/messaging_service.json.hh" -#include -#include - -using namespace httpd::messaging_service_json; -using namespace netw; - -namespace api { - -using shard_info = messaging_service::shard_info; -using msg_addr = messaging_service::msg_addr; - -static const int32_t num_verb = static_cast(messaging_verb::LAST); - -std::vector map_to_message_counters( - const std::unordered_map& map) { - std::vector res; - for (auto i : map) { - res.push_back(message_counter()); - res.back().key = boost::lexical_cast(i.first); - res.back().value = i.second; - } - return res; -} - -/** - * Return a function that performs a map_reduce on messaging_service - * For each instance it calls its foreach_client method set the value - * according to a function that it gets as a parameter. - * - */ -future_json_function get_client_getter(std::function f) { - return [f](std::unique_ptr req) { - using map_type = std::unordered_map; - auto get_shard_map = [f](messaging_service& ms) { - std::unordered_map map; - ms.foreach_client([&map, f] (const msg_addr& id, const shard_info& info) { - map[id.addr] = f(info); - }); - return map; - }; - return get_messaging_service().map_reduce0(get_shard_map, map_type(), map_sum). - then([](map_type&& map) { - return make_ready_future(map_to_message_counters(map)); - }); - }; -} - -future_json_function get_server_getter(std::function f) { - return [f](std::unique_ptr req) { - using map_type = std::unordered_map; - auto get_shard_map = [f](messaging_service& ms) { - std::unordered_map map; - ms.foreach_server_connection_stats([&map, f] (const rpc::client_info& info, const rpc::stats& stats) mutable { - map[gms::inet_address(net::ipv4_address(info.addr))] = f(stats); - }); - return map; - }; - return get_messaging_service().map_reduce0(get_shard_map, map_type(), map_sum). - then([](map_type&& map) { - return make_ready_future(map_to_message_counters(map)); - }); - }; -} - -void set_messaging_service(http_context& ctx, routes& r) { - get_timeout_messages.set(r, get_client_getter([](const shard_info& c) { - return c.get_stats().timeout; - })); - - get_sent_messages.set(r, get_client_getter([](const shard_info& c) { - return c.get_stats().sent_messages; - })); - - get_dropped_messages.set(r, get_client_getter([](const shard_info& c) { - // We don't have the same drop message mechanism - // as origin has. - // hence we can always return 0 - return 0; - })); - - get_exception_messages.set(r, get_client_getter([](const shard_info& c) { - return c.get_stats().exception_received; - })); - - get_pending_messages.set(r, get_client_getter([](const shard_info& c) { - return c.get_stats().pending; - })); - - get_respond_pending_messages.set(r, get_server_getter([](const rpc::stats& c) { - return c.pending; - })); - - get_respond_completed_messages.set(r, get_server_getter([](const rpc::stats& c) { - return c.sent_messages; - })); - - get_version.set(r, [](const_req req) { - return netw::get_local_messaging_service().get_raw_version(req.get_query_param("addr")); - }); - - get_dropped_messages_by_ver.set(r, [](std::unique_ptr req) { - shared_ptr> map = make_shared>(num_verb); - - return netw::get_messaging_service().map_reduce([map](const uint64_t* local_map) mutable { - for (auto i = 0; i < num_verb; i++) { - (*map)[i]+= local_map[i]; - } - },[](messaging_service& ms) { - return make_ready_future(ms.get_dropped_messages()); - }).then([map]{ - std::vector res; - for (auto i : verb_counter::verb_wrapper::all_items()) { - verb_counter c; - messaging_verb v = i; // for type safety we use messaging_verb values - auto idx = static_cast(v); - if (idx >= map->size()) { - throw std::runtime_error(sprint("verb index out of bounds: %lu, map size: %lu", idx, map->size())); - } - if ((*map)[idx] > 0) { - c.count = (*map)[idx]; - c.verb = i; - res.push_back(c); - } - } - return make_ready_future(res); - }); - }); -} -} - diff --git a/scylla/api/messaging_service.hh b/scylla/api/messaging_service.hh deleted file mode 100644 index 0786afa..0000000 --- a/scylla/api/messaging_service.hh +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "api.hh" - -namespace api { - -void set_messaging_service(http_context& ctx, routes& r); - -} diff --git a/scylla/api/storage_proxy.cc b/scylla/api/storage_proxy.cc deleted file mode 100644 index 03dfcbc..0000000 --- a/scylla/api/storage_proxy.cc +++ /dev/null @@ -1,408 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "storage_proxy.hh" -#include "service/storage_proxy.hh" -#include "api/api-doc/storage_proxy.json.hh" -#include "api/api-doc/utils.json.hh" -#include "service/storage_service.hh" -#include "db/config.hh" -#include "utils/histogram.hh" - -namespace api { - -namespace sp = httpd::storage_proxy_json; -using proxy = service::storage_proxy; -using namespace json; - -static future sum_timed_rate(distributed& d, utils::timed_rate_moving_average proxy::stats::*f) { - return d.map_reduce0([f](const proxy& p) {return (p.get_stats().*f).rate();}, utils::rate_moving_average(), - std::plus()); -} - -static future sum_timed_rate_as_obj(distributed& d, utils::timed_rate_moving_average proxy::stats::*f) { - return sum_timed_rate(d, f).then([](const utils::rate_moving_average& val) { - httpd::utils_json::rate_moving_average m; - m = val; - return make_ready_future(m); - }); -} - -static future sum_timed_rate_as_long(distributed& d, utils::timed_rate_moving_average proxy::stats::*f) { - return sum_timed_rate(d, f).then([](const utils::rate_moving_average& val) { - return make_ready_future(val.count); - }); -} - -static future sum_estimated_histogram(http_context& ctx, utils::estimated_histogram proxy::stats::*f) { - return ctx.sp.map_reduce0([f](const proxy& p) {return p.get_stats().*f;}, utils::estimated_histogram(), - utils::estimated_histogram_merge).then([](const utils::estimated_histogram& val) { - utils_json::estimated_histogram res; - res = val; - return make_ready_future(res); - }); -} - -static future total_latency(http_context& ctx, utils::timed_rate_moving_average_and_histogram proxy::stats::*f) { - return ctx.sp.map_reduce0([f](const proxy& p) {return (p.get_stats().*f).hist.mean * (p.get_stats().*f).hist.count;}, 0.0, - std::plus()).then([](double val) { - int64_t res = val; - return make_ready_future(res); - }); -} - -void set_storage_proxy(http_context& ctx, routes& r) { - sp::get_total_hints.set(r, [](std::unique_ptr req) { - //TBD - unimplemented(); - return make_ready_future(0); - }); - - sp::get_hinted_handoff_enabled.set(r, [](std::unique_ptr req) { - //TBD - // FIXME - // hinted handoff is not supported currently, - // so we should return false - return make_ready_future(false); - }); - - sp::set_hinted_handoff_enabled.set(r, [](std::unique_ptr req) { - //TBD - unimplemented(); - auto enable = req->get_query_param("enable"); - return make_ready_future(json_void()); - }); - - sp::get_hinted_handoff_enabled_by_dc.set(r, [](std::unique_ptr req) { - //TBD - unimplemented(); - std::vector res; - return make_ready_future(res); - }); - - sp::set_hinted_handoff_enabled_by_dc_list.set(r, [](std::unique_ptr req) { - //TBD - unimplemented(); - auto enable = req->get_query_param("dcs"); - return make_ready_future(json_void()); - }); - - sp::get_max_hint_window.set(r, [](std::unique_ptr req) { - //TBD - unimplemented(); - return make_ready_future(0); - }); - - sp::set_max_hint_window.set(r, [](std::unique_ptr req) { - //TBD - unimplemented(); - auto enable = req->get_query_param("ms"); - return make_ready_future(json_void()); - }); - - sp::get_max_hints_in_progress.set(r, [](std::unique_ptr req) { - //TBD - unimplemented(); - return make_ready_future(1); - }); - - sp::set_max_hints_in_progress.set(r, [](std::unique_ptr req) { - //TBD - unimplemented(); - auto enable = req->get_query_param("qs"); - return make_ready_future(json_void()); - }); - - sp::get_hints_in_progress.set(r, [](std::unique_ptr req) { - //TBD - unimplemented(); - return make_ready_future(0); - }); - - sp::get_rpc_timeout.set(r, [&ctx](const_req req) { - return ctx.db.local().get_config().request_timeout_in_ms()/1000.0; - }); - - sp::set_rpc_timeout.set(r, [](std::unique_ptr req) { - //TBD - unimplemented(); - auto enable = req->get_query_param("timeout"); - return make_ready_future(json_void()); - }); - - sp::get_read_rpc_timeout.set(r, [&ctx](const_req req) { - return ctx.db.local().get_config().read_request_timeout_in_ms()/1000.0; - }); - - sp::set_read_rpc_timeout.set(r, [](std::unique_ptr req) { - //TBD - unimplemented(); - auto enable = req->get_query_param("timeout"); - return make_ready_future(json_void()); - }); - - sp::get_write_rpc_timeout.set(r, [&ctx](const_req req) { - return ctx.db.local().get_config().write_request_timeout_in_ms()/1000.0; - }); - - sp::set_write_rpc_timeout.set(r, [](std::unique_ptr req) { - //TBD - unimplemented(); - auto enable = req->get_query_param("timeout"); - return make_ready_future(json_void()); - }); - - sp::get_counter_write_rpc_timeout.set(r, [&ctx](const_req req) { - return ctx.db.local().get_config().counter_write_request_timeout_in_ms()/1000.0; - }); - - sp::set_counter_write_rpc_timeout.set(r, [](std::unique_ptr req) { - //TBD - unimplemented(); - auto enable = req->get_query_param("timeout"); - return make_ready_future(json_void()); - }); - - sp::get_cas_contention_timeout.set(r, [&ctx](const_req req) { - return ctx.db.local().get_config().cas_contention_timeout_in_ms()/1000.0; - }); - - sp::set_cas_contention_timeout.set(r, [](std::unique_ptr req) { - //TBD - unimplemented(); - auto enable = req->get_query_param("timeout"); - return make_ready_future(json_void()); - }); - - sp::get_range_rpc_timeout.set(r, [&ctx](const_req req) { - return ctx.db.local().get_config().range_request_timeout_in_ms()/1000.0; - }); - - sp::set_range_rpc_timeout.set(r, [](std::unique_ptr req) { - //TBD - unimplemented(); - auto enable = req->get_query_param("timeout"); - return make_ready_future(json_void()); - }); - - sp::get_truncate_rpc_timeout.set(r, [&ctx](const_req req) { - return ctx.db.local().get_config().truncate_request_timeout_in_ms()/1000.0; - }); - - sp::set_truncate_rpc_timeout.set(r, [](std::unique_ptr req) { - //TBD - unimplemented(); - auto enable = req->get_query_param("timeout"); - return make_ready_future(json_void()); - }); - - sp::reload_trigger_classes.set(r, [](std::unique_ptr req) { - //TBD - unimplemented(); - return make_ready_future(json_void()); - }); - - sp::get_read_repair_attempted.set(r, [&ctx](std::unique_ptr req) { - return sum_stats(ctx.sp, &proxy::stats::read_repair_attempts); - }); - - sp::get_read_repair_repaired_blocking.set(r, [&ctx](std::unique_ptr req) { - return sum_stats(ctx.sp, &proxy::stats::read_repair_repaired_blocking); - }); - - sp::get_read_repair_repaired_background.set(r, [&ctx](std::unique_ptr req) { - return sum_stats(ctx.sp, &proxy::stats::read_repair_repaired_background); - }); - - sp::get_schema_versions.set(r, [](std::unique_ptr req) { - return service::get_local_storage_service().describe_schema_versions().then([] (auto result) { - std::vector res; - for (auto e : result) { - sp::mapper_list entry; - entry.key = std::move(e.first); - entry.value = std::move(e.second); - res.emplace_back(std::move(entry)); - } - return make_ready_future(std::move(res)); - }); - }); - - sp::get_cas_read_timeouts.set(r, [](std::unique_ptr req) { - //TBD - // FIXME - // cas is not supported yet, so just return 0 - return make_ready_future(0); - }); - - sp::get_cas_read_unavailables.set(r, [](std::unique_ptr req) { - //TBD - // FIXME - // cas is not supported yet, so just return 0 - return make_ready_future(0); - }); - - sp::get_cas_write_timeouts.set(r, [](std::unique_ptr req) { - //TBD - // FIXME - // cas is not supported yet, so just return 0 - return make_ready_future(0); - }); - - sp::get_cas_write_unavailables.set(r, [](std::unique_ptr req) { - //TBD - // FIXME - // cas is not supported yet, so just return 0 - return make_ready_future(0); - }); - - sp::get_cas_write_metrics_unfinished_commit.set(r, [](std::unique_ptr req) { - //TBD - unimplemented(); - return make_ready_future(0); - }); - - sp::get_cas_write_metrics_contention.set(r, [](std::unique_ptr req) { - //TBD - unimplemented(); - return make_ready_future(0); - }); - - sp::get_cas_write_metrics_condition_not_met.set(r, [](std::unique_ptr req) { - //TBD - unimplemented(); - return make_ready_future(0); - }); - - sp::get_cas_read_metrics_unfinished_commit.set(r, [](std::unique_ptr req) { - //TBD - unimplemented(); - return make_ready_future(0); - }); - - sp::get_cas_read_metrics_contention.set(r, [](std::unique_ptr req) { - //TBD - unimplemented(); - return make_ready_future(0); - }); - - sp::get_cas_read_metrics_condition_not_met.set(r, [](std::unique_ptr req) { - //TBD - unimplemented(); - return make_ready_future(0); - }); - - sp::get_read_metrics_timeouts.set(r, [&ctx](std::unique_ptr req) { - return sum_timed_rate_as_long(ctx.sp, &proxy::stats::read_timeouts); - }); - - sp::get_read_metrics_unavailables.set(r, [&ctx](std::unique_ptr req) { - return sum_timed_rate_as_long(ctx.sp, &proxy::stats::read_unavailables); - }); - - sp::get_range_metrics_timeouts.set(r, [&ctx](std::unique_ptr req) { - return sum_timed_rate_as_long(ctx.sp, &proxy::stats::range_slice_timeouts); - }); - - sp::get_range_metrics_unavailables.set(r, [&ctx](std::unique_ptr req) { - return sum_timed_rate_as_long(ctx.sp, &proxy::stats::range_slice_unavailables); - }); - - sp::get_write_metrics_timeouts.set(r, [&ctx](std::unique_ptr req) { - return sum_timed_rate_as_long(ctx.sp, &proxy::stats::write_timeouts); - }); - - sp::get_write_metrics_unavailables.set(r, [&ctx](std::unique_ptr req) { - return sum_timed_rate_as_long(ctx.sp, &proxy::stats::write_unavailables); - }); - - sp::get_read_metrics_timeouts_rates.set(r, [&ctx](std::unique_ptr req) { - return sum_timed_rate_as_obj(ctx.sp, &proxy::stats::read_timeouts); - }); - - sp::get_read_metrics_unavailables_rates.set(r, [&ctx](std::unique_ptr req) { - return sum_timed_rate_as_obj(ctx.sp, &proxy::stats::read_unavailables); - }); - - sp::get_range_metrics_timeouts_rates.set(r, [&ctx](std::unique_ptr req) { - return sum_timed_rate_as_obj(ctx.sp, &proxy::stats::range_slice_timeouts); - }); - - sp::get_range_metrics_unavailables_rates.set(r, [&ctx](std::unique_ptr req) { - return sum_timed_rate_as_obj(ctx.sp, &proxy::stats::range_slice_unavailables); - }); - - sp::get_write_metrics_timeouts_rates.set(r, [&ctx](std::unique_ptr req) { - return sum_timed_rate_as_obj(ctx.sp, &proxy::stats::write_timeouts); - }); - - sp::get_write_metrics_unavailables_rates.set(r, [&ctx](std::unique_ptr req) { - return sum_timed_rate_as_obj(ctx.sp, &proxy::stats::write_unavailables); - }); - - sp::get_range_metrics_latency_histogram_depricated.set(r, [&ctx](std::unique_ptr req) { - return sum_histogram_stats(ctx.sp, &proxy::stats::range); - }); - - sp::get_write_metrics_latency_histogram_depricated.set(r, [&ctx](std::unique_ptr req) { - return sum_histogram_stats(ctx.sp, &proxy::stats::write); - }); - - sp::get_read_metrics_latency_histogram_depricated.set(r, [&ctx](std::unique_ptr req) { - return sum_histogram_stats(ctx.sp, &proxy::stats::read); - }); - - sp::get_range_metrics_latency_histogram.set(r, [&ctx](std::unique_ptr req) { - return sum_timer_stats(ctx.sp, &proxy::stats::range); - }); - - sp::get_write_metrics_latency_histogram.set(r, [&ctx](std::unique_ptr req) { - return sum_timer_stats(ctx.sp, &proxy::stats::write); - }); - - sp::get_read_metrics_latency_histogram.set(r, [&ctx](std::unique_ptr req) { - return sum_timer_stats(ctx.sp, &proxy::stats::read); - }); - - sp::get_read_estimated_histogram.set(r, [&ctx](std::unique_ptr req) { - return sum_estimated_histogram(ctx, &proxy::stats::estimated_read); - }); - - sp::get_read_latency.set(r, [&ctx](std::unique_ptr req) { - return total_latency(ctx, &proxy::stats::read); - }); - sp::get_write_estimated_histogram.set(r, [&ctx](std::unique_ptr req) { - return sum_estimated_histogram(ctx, &proxy::stats::estimated_write); - }); - - sp::get_write_latency.set(r, [&ctx](std::unique_ptr req) { - return total_latency(ctx, &proxy::stats::write); - }); - - sp::get_range_estimated_histogram.set(r, [&ctx](std::unique_ptr req) { - return sum_timer_stats(ctx.sp, &proxy::stats::read); - }); - - sp::get_range_latency.set(r, [&ctx](std::unique_ptr req) { - return total_latency(ctx, &proxy::stats::range); - }); -} - -} diff --git a/scylla/api/storage_proxy.hh b/scylla/api/storage_proxy.hh deleted file mode 100644 index 4337adb..0000000 --- a/scylla/api/storage_proxy.hh +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "api.hh" - -namespace api { - -void set_storage_proxy(http_context& ctx, routes& r); - -} diff --git a/scylla/api/storage_service.cc b/scylla/api/storage_service.cc deleted file mode 100644 index 4b994e5..0000000 --- a/scylla/api/storage_service.cc +++ /dev/null @@ -1,841 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "storage_service.hh" -#include "api/api-doc/storage_service.json.hh" -#include "db/config.hh" -#include -#include -#include -#include -#include -#include -#include "http/exception.hh" -#include "repair/repair.hh" -#include "locator/snitch_base.hh" -#include "column_family.hh" -#include "log.hh" -#include "release.hh" - -namespace api { - -namespace ss = httpd::storage_service_json; -using namespace json; - -static sstring validate_keyspace(http_context& ctx, const parameters& param) { - if (ctx.db.local().has_keyspace(param["keyspace"])) { - return param["keyspace"]; - } - throw bad_param_exception("Keyspace " + param["keyspace"] + " Does not exist"); -} - - -static std::vector describe_ring(const sstring& keyspace) { - std::vector res; - for (auto d : service::get_local_storage_service().describe_ring(keyspace)) { - ss::token_range r; - r.start_token = d._start_token; - r.end_token = d._end_token; - r.endpoints = d._endpoints; - r.rpc_endpoints = d._rpc_endpoints; - for (auto det : d._endpoint_details) { - ss::endpoint_detail ed; - ed.host = det._host; - ed.datacenter = det._datacenter; - if (det._rack != "") { - ed.rack = det._rack; - } - r.endpoint_details.push(ed); - } - res.push_back(r); - } - return res; -} - -void set_storage_service(http_context& ctx, routes& r) { - ss::local_hostid.set(r, [](std::unique_ptr req) { - return db::system_keyspace::get_local_host_id().then([](const utils::UUID& id) { - return make_ready_future(id.to_sstring()); - }); - }); - - ss::get_tokens.set(r, [] (const_req req) { - auto tokens = service::get_local_storage_service().get_token_metadata().sorted_tokens(); - return container_to_vec(tokens); - }); - - ss::get_node_tokens.set(r, [] (const_req req) { - gms::inet_address addr(req.param["endpoint"]); - auto tokens = service::get_local_storage_service().get_token_metadata().get_tokens(addr); - return container_to_vec(tokens); - }); - - ss::get_commitlog.set(r, [&ctx](const_req req) { - return ctx.db.local().commitlog()->active_config().commit_log_location; - }); - - ss::get_token_endpoint.set(r, [] (const_req req) { - auto token_to_ep = service::get_local_storage_service().get_token_to_endpoint_map(); - std::vector res; - return map_to_key_value(token_to_ep, res); - }); - - ss::get_leaving_nodes.set(r, [](const_req req) { - return container_to_vec(service::get_local_storage_service().get_token_metadata().get_leaving_endpoints()); - }); - - ss::get_moving_nodes.set(r, [](const_req req) { - auto points = service::get_local_storage_service().get_token_metadata().get_moving_endpoints(); - std::unordered_set addr; - for (auto i: points) { - addr.insert(boost::lexical_cast(i.second)); - } - return container_to_vec(addr); - }); - - ss::get_joining_nodes.set(r, [](const_req req) { - auto points = service::get_local_storage_service().get_token_metadata().get_bootstrap_tokens(); - std::unordered_set addr; - for (auto i: points) { - addr.insert(boost::lexical_cast(i.second)); - } - return container_to_vec(addr); - }); - - ss::get_release_version.set(r, [](const_req req) { - return service::get_local_storage_service().get_release_version(); - }); - - ss::get_scylla_release_version.set(r, [](const_req req) { - return scylla_version(); - }); - ss::get_schema_version.set(r, [](const_req req) { - return service::get_local_storage_service().get_schema_version(); - }); - - ss::get_all_data_file_locations.set(r, [&ctx](const_req req) { - return container_to_vec(ctx.db.local().get_config().data_file_directories()); - }); - - ss::get_saved_caches_location.set(r, [&ctx](const_req req) { - return ctx.db.local().get_config().saved_caches_directory(); - }); - - ss::get_range_to_endpoint_map.set(r, [&ctx](std::unique_ptr req) { - //TBD - unimplemented(); - auto keyspace = validate_keyspace(ctx, req->param); - std::vector res; - return make_ready_future(res); - }); - - ss::get_pending_range_to_endpoint_map.set(r, [&ctx](std::unique_ptr req) { - //TBD - unimplemented(); - auto keyspace = validate_keyspace(ctx, req->param); - std::vector res; - return make_ready_future(res); - }); - - ss::describe_any_ring.set(r, [&ctx](const_req req) { - return describe_ring(""); - }); - - ss::describe_ring.set(r, [&ctx](const_req req) { - auto keyspace = validate_keyspace(ctx, req.param); - return describe_ring(keyspace); - }); - - ss::get_host_id_map.set(r, [](const_req req) { - std::vector res; - return map_to_key_value(service::get_local_storage_service(). - get_token_metadata().get_endpoint_to_host_id_map_for_reading(), res); - }); - - ss::get_load.set(r, [&ctx](std::unique_ptr req) { - return get_cf_stats(ctx, &column_family::stats::live_disk_space_used); - }); - - ss::get_load_map.set(r, [] (std::unique_ptr req) { - return service::get_local_storage_service().get_load_map().then([] (auto&& load_map) { - std::vector res; - for (auto i : load_map) { - ss::map_string_double val; - val.key = i.first; - val.value = i.second; - res.push_back(val); - } - return make_ready_future(res); - }); - }); - - ss::get_current_generation_number.set(r, [](std::unique_ptr req) { - gms::inet_address ep(utils::fb_utilities::get_broadcast_address()); - return gms::get_local_gossiper().get_current_generation_number(ep).then([](int res) { - return make_ready_future(res); - }); - }); - - ss::get_natural_endpoints.set(r, [&ctx](const_req req) { - auto keyspace = validate_keyspace(ctx, req.param); - return container_to_vec(service::get_local_storage_service().get_natural_endpoints(keyspace, req.get_query_param("cf"), - req.get_query_param("key"))); - }); - - ss::get_snapshot_details.set(r, [](std::unique_ptr req) { - return service::get_local_storage_service().get_snapshot_details().then([] (auto result) { - std::vector res; - for (auto& map: result) { - ss::snapshots all_snapshots; - all_snapshots.key = map.first; - - std::vector snapshot; - for (auto& cf: map.second) { - ss::snapshot s; - s.ks = cf.ks; - s.cf = cf.cf; - s.live = cf.live; - s.total = cf.total; - snapshot.push_back(std::move(s)); - } - all_snapshots.value = std::move(snapshot); - res.push_back(std::move(all_snapshots)); - } - return make_ready_future(std::move(res)); - }); - }); - - ss::take_snapshot.set(r, [](std::unique_ptr req) { - auto tag = req->get_query_param("tag"); - auto column_family = req->get_query_param("cf"); - - std::vector keynames = split(req->get_query_param("kn"), ","); - - auto resp = make_ready_future<>(); - if (column_family.empty()) { - resp = service::get_local_storage_service().take_snapshot(tag, keynames); - } else { - if (keynames.size() > 1) { - throw httpd::bad_param_exception("Only one keyspace allowed when specifying a column family"); - } - resp = service::get_local_storage_service().take_column_family_snapshot(keynames[0], column_family, tag); - } - return resp.then([] { - return make_ready_future(json_void()); - }); - }); - - ss::del_snapshot.set(r, [](std::unique_ptr req) { - auto tag = req->get_query_param("tag"); - - std::vector keynames = split(req->get_query_param("kn"), ","); - return service::get_local_storage_service().clear_snapshot(tag, keynames).then([] { - return make_ready_future(json_void()); - }); - }); - - ss::true_snapshots_size.set(r, [](std::unique_ptr req) { - return service::get_local_storage_service().true_snapshots_size().then([] (int64_t size) { - return make_ready_future(size); - }); - }); - - ss::force_keyspace_compaction.set(r, [&ctx](std::unique_ptr req) { - auto keyspace = validate_keyspace(ctx, req->param); - auto column_families = split_cf(req->get_query_param("cf")); - if (column_families.empty()) { - column_families = map_keys(ctx.db.local().find_keyspace(keyspace).metadata().get()->cf_meta_data()); - } - return ctx.db.invoke_on_all([keyspace, column_families] (database& db) { - std::vector column_families_vec; - for (auto cf : column_families) { - column_families_vec.push_back(&db.find_column_family(keyspace, cf)); - } - return parallel_for_each(column_families_vec, [] (column_family* cf) { - return cf->compact_all_sstables(); - }); - }).then([]{ - return make_ready_future(json_void()); - }); - }); - - ss::force_keyspace_cleanup.set(r, [&ctx](std::unique_ptr req) { - auto keyspace = validate_keyspace(ctx, req->param); - auto column_families = split_cf(req->get_query_param("cf")); - if (column_families.empty()) { - column_families = map_keys(ctx.db.local().find_keyspace(keyspace).metadata().get()->cf_meta_data()); - } - return ctx.db.invoke_on_all([keyspace, column_families] (database& db) { - std::vector column_families_vec; - auto& cm = db.get_compaction_manager(); - for (auto cf : column_families) { - column_families_vec.push_back(&db.find_column_family(keyspace, cf)); - } - return parallel_for_each(column_families_vec, [&cm] (column_family* cf) { - return cm.perform_cleanup(cf); - }); - }).then([]{ - return make_ready_future(0); - }); - }); - - ss::scrub.set(r, [&ctx](std::unique_ptr req) { - //TBD - unimplemented(); - auto keyspace = validate_keyspace(ctx, req->param); - auto column_family = req->get_query_param("cf"); - auto disable_snapshot = req->get_query_param("disable_snapshot"); - auto skip_corrupted = req->get_query_param("skip_corrupted"); - return make_ready_future(json_void()); - }); - - ss::upgrade_sstables.set(r, [&ctx](std::unique_ptr req) { - //TBD - unimplemented(); - auto keyspace = validate_keyspace(ctx, req->param); - auto column_family = req->get_query_param("cf"); - auto exclude_current_version = req->get_query_param("exclude_current_version"); - return make_ready_future(json_void()); - }); - - ss::force_keyspace_flush.set(r, [&ctx](std::unique_ptr req) { - auto keyspace = validate_keyspace(ctx, req->param); - auto column_families = split_cf(req->get_query_param("cf")); - if (column_families.empty()) { - column_families = map_keys(ctx.db.local().find_keyspace(keyspace).metadata().get()->cf_meta_data()); - } - return ctx.db.invoke_on_all([keyspace, column_families] (database& db) { - return parallel_for_each(column_families, [&db, keyspace](const sstring& cf) mutable { - return db.find_column_family(keyspace, cf).flush(); - }); - }).then([]{ - return make_ready_future(json_void()); - }); - }); - - - ss::repair_async.set(r, [&ctx](std::unique_ptr req) { - static std::vector options = {"primaryRange", "parallelism", "incremental", - "jobThreads", "ranges", "columnFamilies", "dataCenters", "hosts", "trace", - "startToken", "endToken" }; - std::unordered_map options_map; - for (auto o : options) { - auto s = req->get_query_param(o); - if (s != "") { - options_map[o] = s; - } - } - - // The repair process is asynchronous: repair_start only starts it and - // returns immediately, not waiting for the repair to finish. The user - // then has other mechanisms to track the ongoing repair's progress, - // or stop it. - return repair_start(ctx.db, validate_keyspace(ctx, req->param), - options_map).then([] (int i) { - return make_ready_future(i); - }); - }); - - ss::repair_async_status.set(r, [&ctx](std::unique_ptr req) { - return repair_get_status(ctx.db, boost::lexical_cast( req->get_query_param("id"))) - .then_wrapped([] (future&& fut) { - ss::ns_repair_async_status::return_type_wrapper res; - try { - res = fut.get0(); - } catch(std::runtime_error& e) { - return make_ready_future(json_exception(httpd::bad_param_exception(e.what()))); - } - return make_ready_future(json::json_return_type(res)); - }); - }); - - ss::force_terminate_all_repair_sessions.set(r, [](std::unique_ptr req) { - //TBD - unimplemented(); - return make_ready_future(json_void()); - }); - - ss::decommission.set(r, [](std::unique_ptr req) { - return service::get_local_storage_service().decommission().then([] { - return make_ready_future(json_void()); - }); - }); - - ss::move.set(r, [] (std::unique_ptr req) { - auto new_token = req->get_query_param("new_token"); - return service::get_local_storage_service().move(new_token).then([] { - return make_ready_future(json_void()); - }); - }); - - ss::remove_node.set(r, [](std::unique_ptr req) { - auto host_id = req->get_query_param("host_id"); - return service::get_local_storage_service().removenode(host_id).then([] { - return make_ready_future(json_void()); - }); - }); - - ss::get_removal_status.set(r, [](std::unique_ptr req) { - return service::get_local_storage_service().get_removal_status().then([] (auto status) { - return make_ready_future(status); - }); - }); - - ss::force_remove_completion.set(r, [](std::unique_ptr req) { - return service::get_local_storage_service().force_remove_completion().then([] { - return make_ready_future(json_void()); - }); - }); - - ss::set_logging_level.set(r, [](std::unique_ptr req) { - //TBD - unimplemented(); - auto class_qualifier = req->get_query_param("class_qualifier"); - auto level = req->get_query_param("level"); - return make_ready_future(json_void()); - }); - - ss::get_logging_levels.set(r, [](std::unique_ptr req) { - std::vector res; - for (auto i : logging::logger_registry().get_all_logger_names()) { - ss::mapper log; - log.key = i; - log.value = logging::level_name(logging::logger_registry().get_logger_level(i)); - res.push_back(log); - } - return make_ready_future(res); - }); - - ss::get_operation_mode.set(r, [](std::unique_ptr req) { - return service::get_local_storage_service().get_operation_mode().then([] (auto mode) { - return make_ready_future(mode); - }); - }); - - ss::is_starting.set(r, [](std::unique_ptr req) { - return service::get_local_storage_service().is_starting().then([] (auto starting) { - return make_ready_future(starting); - }); - }); - - ss::get_drain_progress.set(r, [](std::unique_ptr req) { - return service::get_storage_service().map_reduce(adder(), [] (auto& ss) { - return ss.get_drain_progress(); - }).then([] (auto&& progress) { - auto progress_str = sprint("Drained %s/%s ColumnFamilies", progress.remaining_cfs, progress.total_cfs); - return make_ready_future(std::move(progress_str)); - }); - }); - - ss::drain.set(r, [](std::unique_ptr req) { - return service::get_local_storage_service().drain().then([] { - return make_ready_future(json_void()); - }); - }); - ss::truncate.set(r, [&ctx](std::unique_ptr req) { - //TBD - unimplemented(); - auto keyspace = validate_keyspace(ctx, req->param); - auto column_family = req->get_query_param("cf"); - return make_ready_future(json_void()); - }); - - ss::get_keyspaces.set(r, [&ctx](const_req req) { - auto type = req.get_query_param("type"); - if (type == "user") { - return ctx.db.local().get_non_system_keyspaces(); - } else if (type == "non_local_strategy") { - return map_keys(ctx.db.local().get_keyspaces() | boost::adaptors::filtered([](const auto& p) { - return p.second.get_replication_strategy().get_type() != locator::replication_strategy_type::local; - })); - } - return map_keys(ctx.db.local().get_keyspaces()); - }); - - ss::update_snitch.set(r, [](std::unique_ptr req) { - auto ep_snitch_class_name = req->get_query_param("ep_snitch_class_name"); - return locator::i_endpoint_snitch::reset_snitch(ep_snitch_class_name).then([] { - return make_ready_future(json_void()); - }); - }); - - ss::stop_gossiping.set(r, [](std::unique_ptr req) { - return service::get_local_storage_service().stop_gossiping().then([] { - return make_ready_future(json_void()); - }); - }); - - ss::start_gossiping.set(r, [](std::unique_ptr req) { - return service::get_local_storage_service().start_gossiping().then([] { - return make_ready_future(json_void()); - }); - }); - - ss::is_gossip_running.set(r, [](std::unique_ptr req) { - return service::get_local_storage_service().is_gossip_running().then([] (bool running){ - return make_ready_future(running); - }); - }); - - - ss::stop_daemon.set(r, [](std::unique_ptr req) { - //TBD - unimplemented(); - return make_ready_future(json_void()); - }); - - ss::is_initialized.set(r, [](std::unique_ptr req) { - return service::get_local_storage_service().is_initialized().then([] (bool initialized) { - return make_ready_future(initialized); - }); - }); - - ss::stop_rpc_server.set(r, [](std::unique_ptr req) { - return service::get_local_storage_service().stop_rpc_server().then([] { - return make_ready_future(json_void()); - }); - }); - - ss::start_rpc_server.set(r, [](std::unique_ptr req) { - return service::get_local_storage_service().start_rpc_server().then([] { - return make_ready_future(json_void()); - }); - }); - - ss::is_rpc_server_running.set(r, [] (std::unique_ptr req) { - return service::get_local_storage_service().is_rpc_server_running().then([] (bool running) { - return make_ready_future(running); - }); - }); - - ss::start_native_transport.set(r, [](std::unique_ptr req) { - return service::get_local_storage_service().start_native_transport().then([] { - return make_ready_future(json_void()); - }); - }); - - ss::stop_native_transport.set(r, [](std::unique_ptr req) { - return service::get_local_storage_service().stop_native_transport().then([] { - return make_ready_future(json_void()); - }); - }); - - ss::is_native_transport_running.set(r, [] (std::unique_ptr req) { - return service::get_local_storage_service().is_native_transport_running().then([] (bool running) { - return make_ready_future(running); - }); - }); - - ss::join_ring.set(r, [](std::unique_ptr req) { - return service::get_local_storage_service().join_ring().then([] { - return make_ready_future(json_void()); - }); - }); - - ss::is_joined.set(r, [] (std::unique_ptr req) { - return make_ready_future(service::get_local_storage_service().is_joined()); - }); - - ss::set_stream_throughput_mb_per_sec.set(r, [](std::unique_ptr req) { - //TBD - unimplemented(); - auto value = req->get_query_param("value"); - return make_ready_future(json_void()); - }); - - ss::get_stream_throughput_mb_per_sec.set(r, [](std::unique_ptr req) { - //TBD - unimplemented(); - return make_ready_future(0); - }); - - ss::get_compaction_throughput_mb_per_sec.set(r, [&ctx](std::unique_ptr req) { - int value = ctx.db.local().get_config().compaction_throughput_mb_per_sec(); - return make_ready_future(value); - }); - - ss::set_compaction_throughput_mb_per_sec.set(r, [](std::unique_ptr req) { - //TBD - unimplemented(); - auto value = req->get_query_param("value"); - return make_ready_future(json_void()); - }); - - ss::is_incremental_backups_enabled.set(r, [](std::unique_ptr req) { - // If this is issued in parallel with an ongoing change, we may see values not agreeing. - // Reissuing is asking for trouble, so we will just return true upon seeing any true value. - return service::get_local_storage_service().db().map_reduce(adder(), [] (database& db) { - for (auto& pair: db.get_keyspaces()) { - auto& ks = pair.second; - if (ks.incremental_backups_enabled()) { - return true; - } - } - return false; - }).then([] (bool val) { - return make_ready_future(val); - }); - }); - - ss::set_incremental_backups_enabled.set(r, [](std::unique_ptr req) { - auto val_str = req->get_query_param("value"); - bool value = (val_str == "True") || (val_str == "true") || (val_str == "1"); - return service::get_local_storage_service().db().invoke_on_all([value] (database& db) { - db.set_enable_incremental_backups(value); - - // Change both KS and CF, so they are in sync - for (auto& pair: db.get_keyspaces()) { - auto& ks = pair.second; - ks.set_incremental_backups(value); - } - - for (auto& pair: db.get_column_families()) { - auto cf_ptr = pair.second; - cf_ptr->set_incremental_backups(value); - } - }).then([] { - return make_ready_future(json_void()); - }); - }); - - ss::rebuild.set(r, [](std::unique_ptr req) { - auto source_dc = req->get_query_param("source_dc"); - return service::get_local_storage_service().rebuild(std::move(source_dc)).then([] { - return make_ready_future(json_void()); - }); - }); - - ss::bulk_load.set(r, [](std::unique_ptr req) { - //TBD - unimplemented(); - auto path = req->param["path"]; - return make_ready_future(json_void()); - }); - - ss::bulk_load_async.set(r, [](std::unique_ptr req) { - //TBD - unimplemented(); - auto path = req->param["path"]; - return make_ready_future(json_void()); - }); - - ss::reschedule_failed_deletions.set(r, [](std::unique_ptr req) { - //TBD - unimplemented(); - return make_ready_future(json_void()); - }); - - ss::load_new_ss_tables.set(r, [&ctx](std::unique_ptr req) { - auto ks = validate_keyspace(ctx, req->param); - auto cf = req->get_query_param("cf"); - // No need to add the keyspace, since all we want is to avoid always sending this to the same - // CPU. Even then I am being overzealous here. This is not something that happens all the time. - auto coordinator = std::hash()(cf) % smp::count; - return service::get_storage_service().invoke_on(coordinator, [ks = std::move(ks), cf = std::move(cf)] (service::storage_service& s) { - return s.load_new_sstables(ks, cf); - }).then([] { - return make_ready_future(json_void()); - }); - }); - - ss::sample_key_range.set(r, [](std::unique_ptr req) { - //TBD - unimplemented(); - std::vector res; - return make_ready_future(res); - }); - - ss::reset_local_schema.set(r, [](std::unique_ptr req) { - //TBD - unimplemented(); - return make_ready_future(json_void()); - }); - - ss::set_trace_probability.set(r, [](std::unique_ptr req) { - auto probability = req->get_query_param("probability"); - return futurize::apply([probability] { - double real_prob = std::stod(probability.c_str()); - return tracing::tracing::tracing_instance().invoke_on_all([real_prob] (auto& local_tracing) { - local_tracing.set_trace_probability(real_prob); - }).then([] { - return make_ready_future(json_void()); - }); - }).then_wrapped([probability] (auto&& f) { - try { - f.get(); - return make_ready_future(json_void()); - } catch (std::out_of_range& e) { - throw httpd::bad_param_exception(e.what()); - } catch (std::invalid_argument&){ - throw httpd::bad_param_exception(sprint("Bad format in a probability value: \"%s\"", probability.c_str())); - } - }); - }); - - ss::get_trace_probability.set(r, [](std::unique_ptr req) { - return make_ready_future(tracing::tracing::get_local_tracing_instance().get_trace_probability()); - }); - - ss::get_slow_query_info.set(r, [](const_req req) { - ss::slow_query_info res; - res.enable = tracing::tracing::get_local_tracing_instance().slow_query_tracing_enabled(); - res.ttl = tracing::tracing::get_local_tracing_instance().slow_query_record_ttl().count() ; - res.threshold = tracing::tracing::get_local_tracing_instance().slow_query_threshold().count(); - return res; - }); - - ss::set_slow_query.set(r, [](std::unique_ptr req) { - auto enable = req->get_query_param("enable"); - auto ttl = req->get_query_param("ttl"); - auto threshold = req->get_query_param("threshold"); - try { - return tracing::tracing::tracing_instance().invoke_on_all([enable, ttl, threshold] (auto& local_tracing) { - if (threshold != "") { - local_tracing.set_slow_query_threshold(std::chrono::microseconds(std::stol(threshold.c_str()))); - } - if (ttl != "") { - local_tracing.set_slow_query_record_ttl(std::chrono::seconds(std::stol(ttl.c_str()))); - } - if (enable != "") { - local_tracing.set_slow_query_enabled(strcasecmp(enable.c_str(), "true") == 0); - } - }).then([] { - return make_ready_future(json_void()); - }); - } catch (...) { - throw httpd::bad_param_exception(sprint("Bad format value: ")); - } - }); - - ss::enable_auto_compaction.set(r, [&ctx](std::unique_ptr req) { - //TBD - unimplemented(); - auto keyspace = validate_keyspace(ctx, req->param); - auto column_family = req->get_query_param("cf"); - return make_ready_future(json_void()); - }); - - ss::disable_auto_compaction.set(r, [&ctx](std::unique_ptr req) { - //TBD - unimplemented(); - auto keyspace = validate_keyspace(ctx, req->param); - auto column_family = req->get_query_param("cf"); - return make_ready_future(json_void()); - }); - - ss::deliver_hints.set(r, [](std::unique_ptr req) { - //TBD - unimplemented(); - auto host = req->get_query_param("host"); - return make_ready_future(json_void()); - }); - - ss::get_cluster_name.set(r, [](const_req req) { - return gms::get_local_gossiper().get_cluster_name(); - }); - - ss::get_partitioner_name.set(r, [](const_req req) { - return gms::get_local_gossiper().get_partitioner_name(); - }); - - ss::get_tombstone_warn_threshold.set(r, [](std::unique_ptr req) { - //TBD - unimplemented(); - return make_ready_future(0); - }); - - ss::set_tombstone_warn_threshold.set(r, [](std::unique_ptr req) { - //TBD - unimplemented(); - auto debug_threshold = req->get_query_param("debug_threshold"); - return make_ready_future(json_void()); - }); - - ss::get_tombstone_failure_threshold.set(r, [](std::unique_ptr req) { - //TBD - unimplemented(); - return make_ready_future(0); - }); - - ss::set_tombstone_failure_threshold.set(r, [](std::unique_ptr req) { - //TBD - unimplemented(); - auto debug_threshold = req->get_query_param("debug_threshold"); - return make_ready_future(json_void()); - }); - - ss::get_batch_size_failure_threshold.set(r, [](std::unique_ptr req) { - //TBD - unimplemented(); - return make_ready_future(0); - }); - - ss::set_batch_size_failure_threshold.set(r, [](std::unique_ptr req) { - //TBD - unimplemented(); - auto threshold = req->get_query_param("threshold"); - return make_ready_future(json_void()); - }); - - ss::set_hinted_handoff_throttle_in_kb.set(r, [](std::unique_ptr req) { - //TBD - unimplemented(); - auto debug_threshold = req->get_query_param("throttle"); - return make_ready_future(json_void()); - }); - - ss::get_metrics_load.set(r, [&ctx](std::unique_ptr req) { - return get_cf_stats(ctx, &column_family::stats::live_disk_space_used); - }); - - ss::get_exceptions.set(r, [](const_req req) { - return service::get_local_storage_service().get_exception_count(); - }); - - ss::get_total_hints_in_progress.set(r, [](std::unique_ptr req) { - //TBD - unimplemented(); - return make_ready_future(0); - }); - - ss::get_total_hints.set(r, [](std::unique_ptr req) { - //TBD - unimplemented(); - return make_ready_future(0); - }); - - ss::get_ownership.set(r, [] (std::unique_ptr req) { - return service::get_local_storage_service().get_ownership().then([] (auto&& ownership) { - std::vector res; - return make_ready_future(map_to_key_value(ownership, res)); - }); - }); - - ss::get_effective_ownership.set(r, [&ctx] (std::unique_ptr req) { - auto keyspace_name = req->param["keyspace"] == "null" ? "" : validate_keyspace(ctx, req->param); - return service::get_local_storage_service().effective_ownership(keyspace_name).then([] (auto&& ownership) { - std::vector res; - return make_ready_future(map_to_key_value(ownership, res)); - }); - }); -} - -} diff --git a/scylla/api/storage_service.hh b/scylla/api/storage_service.hh deleted file mode 100644 index 301b60f..0000000 --- a/scylla/api/storage_service.hh +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "api.hh" - -namespace api { - -void set_storage_service(http_context& ctx, routes& r); - -} diff --git a/scylla/api/stream_manager.cc b/scylla/api/stream_manager.cc deleted file mode 100644 index 23cb436..0000000 --- a/scylla/api/stream_manager.cc +++ /dev/null @@ -1,163 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "stream_manager.hh" -#include "streaming/stream_manager.hh" -#include "streaming/stream_result_future.hh" -#include "api/api-doc/stream_manager.json.hh" -#include -#include "gms/gossiper.hh" - -namespace api { - -namespace hs = httpd::stream_manager_json; - -static void set_summaries(const std::vector& from, - json::json_list& to) { - if (!from.empty()) { - hs::stream_summary res; - res.cf_id = boost::lexical_cast(from.front().cf_id); - // For each stream_session, we pretend we are sending/receiving one - // file, to make it compatible with nodetool. - res.files = 1; - // We can not estimate total number of bytes the stream_session will - // send or recvieve since we don't know the size of the frozen_mutation - // until we read it. - res.total_size = 0; - to.push(res); - } -} - -static hs::progress_info get_progress_info(const streaming::progress_info& info) { - hs::progress_info res; - res.current_bytes = info.current_bytes; - res.direction = info.dir; - res.file_name = info.file_name; - res.peer = boost::lexical_cast(info.peer); - res.session_index = 0; - res.total_bytes = info.total_bytes; - return res; -} - -static void set_files(const std::map& from, - json::json_list& to) { - for (auto i : from) { - hs::progress_info_mapper m; - m.key = i.first; - m.value = get_progress_info(i.second); - to.push(m); - } -} - -static hs::stream_state get_state( - streaming::stream_result_future& result_future) { - hs::stream_state state; - state.description = result_future.description; - state.plan_id = result_future.plan_id.to_sstring(); - for (auto info : result_future.get_coordinator().get()->get_all_session_info()) { - hs::stream_info si; - si.peer = boost::lexical_cast(info.peer); - si.session_index = 0; - si.state = info.state; - si.connecting = si.peer; - set_summaries(info.receiving_summaries, si.receiving_summaries); - set_summaries(info.sending_summaries, si.sending_summaries); - set_files(info.receiving_files, si.receiving_files); - set_files(info.sending_files, si.sending_files); - state.sessions.push(si); - } - return state; -} - -void set_stream_manager(http_context& ctx, routes& r) { - hs::get_current_streams.set(r, - [] (std::unique_ptr req) { - return streaming::get_stream_manager().invoke_on_all([] (auto& sm) { - return sm.update_all_progress_info(); - }).then([] { - return streaming::get_stream_manager().map_reduce0([](streaming::stream_manager& stream) { - std::vector res; - for (auto i : stream.get_initiated_streams()) { - res.push_back(get_state(*i.second.get())); - } - for (auto i : stream.get_receiving_streams()) { - res.push_back(get_state(*i.second.get())); - } - return res; - }, std::vector(),concat). - then([](const std::vector& res) { - return make_ready_future(res); - }); - }); - }); - - hs::get_all_active_streams_outbound.set(r, [](std::unique_ptr req) { - return streaming::get_stream_manager().map_reduce0([](streaming::stream_manager& stream) { - return stream.get_initiated_streams().size(); - }, 0, std::plus()).then([](int64_t res) { - return make_ready_future(res); - }); - }); - - hs::get_total_incoming_bytes.set(r, [](std::unique_ptr req) { - gms::inet_address peer(req->param["peer"]); - return streaming::get_stream_manager().map_reduce0([peer](streaming::stream_manager& sm) { - return sm.get_progress_on_all_shards(peer).then([] (auto sbytes) { - return sbytes.bytes_received; - }); - }, 0, std::plus()).then([](int64_t res) { - return make_ready_future(res); - }); - }); - - hs::get_all_total_incoming_bytes.set(r, [](std::unique_ptr req) { - return streaming::get_stream_manager().map_reduce0([](streaming::stream_manager& sm) { - return sm.get_progress_on_all_shards().then([] (auto sbytes) { - return sbytes.bytes_received; - }); - }, 0, std::plus()).then([](int64_t res) { - return make_ready_future(res); - }); - }); - - hs::get_total_outgoing_bytes.set(r, [](std::unique_ptr req) { - gms::inet_address peer(req->param["peer"]); - return streaming::get_stream_manager().map_reduce0([peer] (streaming::stream_manager& sm) { - return sm.get_progress_on_all_shards(peer).then([] (auto sbytes) { - return sbytes.bytes_sent; - }); - }, 0, std::plus()).then([](int64_t res) { - return make_ready_future(res); - }); - }); - - hs::get_all_total_outgoing_bytes.set(r, [](std::unique_ptr req) { - return streaming::get_stream_manager().map_reduce0([](streaming::stream_manager& sm) { - return sm.get_progress_on_all_shards().then([] (auto sbytes) { - return sbytes.bytes_sent; - }); - }, 0, std::plus()).then([](int64_t res) { - return make_ready_future(res); - }); - }); -} - -} diff --git a/scylla/api/stream_manager.hh b/scylla/api/stream_manager.hh deleted file mode 100644 index d5516a1..0000000 --- a/scylla/api/stream_manager.hh +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "api.hh" - -namespace api { - -void set_stream_manager(http_context& ctx, routes& r); - -} diff --git a/scylla/api/system.cc b/scylla/api/system.cc deleted file mode 100644 index c51f820..0000000 --- a/scylla/api/system.cc +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "api/api-doc/system.json.hh" -#include "api/api.hh" - -#include "http/exception.hh" -#include "log.hh" - -namespace api { - -namespace hs = httpd::system_json; - -void set_system(http_context& ctx, routes& r) { - hs::get_all_logger_names.set(r, [](const_req req) { - return logging::logger_registry().get_all_logger_names(); - }); - - hs::set_all_logger_level.set(r, [](const_req req) { - try { - logging::log_level level = boost::lexical_cast(std::string(req.get_query_param("level"))); - logging::logger_registry().set_all_loggers_level(level); - } catch (boost::bad_lexical_cast& e) { - throw bad_param_exception("Unknown logging level " + req.get_query_param("level")); - } - return json::json_void(); - }); - - hs::get_logger_level.set(r, [](const_req req) { - try { - return logging::level_name(logging::logger_registry().get_logger_level(req.param["name"])); - } catch (std::out_of_range& e) { - throw bad_param_exception("Unknown logger name " + req.param["name"]); - } - // just to keep the compiler happy - return sstring(); - }); - - hs::set_logger_level.set(r, [](const_req req) { - try { - logging::log_level level = boost::lexical_cast(std::string(req.get_query_param("level"))); - logging::logger_registry().set_logger_level(req.param["name"], level); - } catch (std::out_of_range& e) { - throw bad_param_exception("Unknown logger name " + req.param["name"]); - } catch (boost::bad_lexical_cast& e) { - throw bad_param_exception("Unknown logging level " + req.get_query_param("level")); - } - return json::json_void(); - }); -} - -} diff --git a/scylla/api/system.hh b/scylla/api/system.hh deleted file mode 100644 index e03e584..0000000 --- a/scylla/api/system.hh +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "api.hh" - -namespace api { - -void set_system(http_context& ctx, routes& r); - -} diff --git a/scylla/atomic_cell.hh b/scylla/atomic_cell.hh deleted file mode 100644 index 296cd26..0000000 --- a/scylla/atomic_cell.hh +++ /dev/null @@ -1,396 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "bytes.hh" -#include "timestamp.hh" -#include "tombstone.hh" -#include "gc_clock.hh" -#include "utils/managed_bytes.hh" -#include "net/byteorder.hh" -#include -#include -#include - -template -static inline -void set_field(Input& v, unsigned offset, T val) { - reinterpret_cast*>(v.begin() + offset)->raw = net::hton(val); -} - -template -static inline -T get_field(const bytes_view& v, unsigned offset) { - return net::ntoh(*reinterpret_cast*>(v.begin() + offset)); -} - -class atomic_cell_or_collection; - -/* - * Represents atomic cell layout. Works on serialized form. - * - * Layout: - * - * := ()? - * := - */ -class atomic_cell_type final { -private: - static constexpr int8_t LIVE_FLAG = 0x01; - static constexpr int8_t EXPIRY_FLAG = 0x02; // When present, expiry field is present. Set only for live cells - static constexpr int8_t REVERT_FLAG = 0x04; // transient flag used to efficiently implement ReversiblyMergeable for atomic cells. - static constexpr int8_t COUNTER_UPDATE_FLAG = 0x08; // Cell is a counter update. - static constexpr int8_t COUNTER_IN_PLACE_REVERT = 0x10; - static constexpr unsigned flags_size = 1; - static constexpr unsigned timestamp_offset = flags_size; - static constexpr unsigned timestamp_size = 8; - static constexpr unsigned expiry_offset = timestamp_offset + timestamp_size; - static constexpr unsigned expiry_size = 4; - static constexpr unsigned deletion_time_offset = timestamp_offset + timestamp_size; - static constexpr unsigned deletion_time_size = 4; - static constexpr unsigned ttl_offset = expiry_offset + expiry_size; - static constexpr unsigned ttl_size = 4; - friend class counter_cell_builder; -private: - static bool is_counter_update(bytes_view cell) { - return cell[0] & COUNTER_UPDATE_FLAG; - } - static bool is_revert_set(bytes_view cell) { - return cell[0] & REVERT_FLAG; - } - static bool is_counter_in_place_revert_set(bytes_view cell) { - return cell[0] & COUNTER_IN_PLACE_REVERT; - } - template - static void set_revert(BytesContainer& cell, bool revert) { - cell[0] = (cell[0] & ~REVERT_FLAG) | (revert * REVERT_FLAG); - } - template - static void set_counter_in_place_revert(BytesContainer& cell, bool flag) { - cell[0] = (cell[0] & ~COUNTER_IN_PLACE_REVERT) | (flag * COUNTER_IN_PLACE_REVERT); - } - static bool is_live(const bytes_view& cell) { - return cell[0] & LIVE_FLAG; - } - static bool is_live_and_has_ttl(const bytes_view& cell) { - return cell[0] & EXPIRY_FLAG; - } - static bool is_dead(const bytes_view& cell) { - return !is_live(cell); - } - // Can be called on live and dead cells - static api::timestamp_type timestamp(const bytes_view& cell) { - return get_field(cell, timestamp_offset); - } - template - static void set_timestamp(BytesContainer& cell, api::timestamp_type ts) { - set_field(cell, timestamp_offset, ts); - } - // Can be called on live cells only -private: - template - static BytesView do_get_value(BytesView cell) { - auto expiry_field_size = bool(cell[0] & EXPIRY_FLAG) * (expiry_size + ttl_size); - auto value_offset = flags_size + timestamp_size + expiry_field_size; - cell.remove_prefix(value_offset); - return cell; - } -public: - static bytes_view value(bytes_view cell) { - return do_get_value(cell); - } - static bytes_mutable_view value(bytes_mutable_view cell) { - return do_get_value(cell); - } - // Can be called on live counter update cells only - static int64_t counter_update_value(bytes_view cell) { - return get_field(cell, flags_size + timestamp_size); - } - // Can be called only when is_dead() is true. - static gc_clock::time_point deletion_time(const bytes_view& cell) { - assert(is_dead(cell)); - return gc_clock::time_point(gc_clock::duration( - get_field(cell, deletion_time_offset))); - } - // Can be called only when is_live_and_has_ttl() is true. - static gc_clock::time_point expiry(const bytes_view& cell) { - assert(is_live_and_has_ttl(cell)); - auto expiry = get_field(cell, expiry_offset); - return gc_clock::time_point(gc_clock::duration(expiry)); - } - // Can be called only when is_live_and_has_ttl() is true. - static gc_clock::duration ttl(const bytes_view& cell) { - assert(is_live_and_has_ttl(cell)); - return gc_clock::duration(get_field(cell, ttl_offset)); - } - static managed_bytes make_dead(api::timestamp_type timestamp, gc_clock::time_point deletion_time) { - managed_bytes b(managed_bytes::initialized_later(), flags_size + timestamp_size + deletion_time_size); - b[0] = 0; - set_field(b, timestamp_offset, timestamp); - set_field(b, deletion_time_offset, deletion_time.time_since_epoch().count()); - return b; - } - static managed_bytes make_live(api::timestamp_type timestamp, bytes_view value) { - auto value_offset = flags_size + timestamp_size; - managed_bytes b(managed_bytes::initialized_later(), value_offset + value.size()); - b[0] = LIVE_FLAG; - set_field(b, timestamp_offset, timestamp); - std::copy_n(value.begin(), value.size(), b.begin() + value_offset); - return b; - } - static managed_bytes make_live_counter_update(api::timestamp_type timestamp, int64_t value) { - auto value_offset = flags_size + timestamp_size; - managed_bytes b(managed_bytes::initialized_later(), value_offset + sizeof(value)); - b[0] = LIVE_FLAG | COUNTER_UPDATE_FLAG; - set_field(b, timestamp_offset, timestamp); - set_field(b, value_offset, value); - return b; - } - static managed_bytes make_live(api::timestamp_type timestamp, bytes_view value, gc_clock::time_point expiry, gc_clock::duration ttl) { - auto value_offset = flags_size + timestamp_size + expiry_size + ttl_size; - managed_bytes b(managed_bytes::initialized_later(), value_offset + value.size()); - b[0] = EXPIRY_FLAG | LIVE_FLAG; - set_field(b, timestamp_offset, timestamp); - set_field(b, expiry_offset, expiry.time_since_epoch().count()); - set_field(b, ttl_offset, ttl.count()); - std::copy_n(value.begin(), value.size(), b.begin() + value_offset); - return b; - } - // make_live_from_serializer() is intended for users that need to serialise - // some object or objects to the format used in atomic_cell::value(). - // With just make_live() the patter would look like follows: - // 1. allocate a buffer and write to it serialised objects - // 2. pass that buffer to make_live() - // 3. make_live() needs to prepend some metadata to the cell value so it - // allocates a new buffer and copies the content of the original one - // - // The allocation and copy of a buffer can be avoided. - // make_live_from_serializer() allows the user code to specify the timestamp - // and size of the cell value as well as provide the serialiser function - // object, which would write the serialised value of the cell to the buffer - // given to it by make_live_from_serializer(). - template - GCC6_CONCEPT(requires requires(Serializer serializer, bytes::iterator it) { - serializer(it); - }) - static managed_bytes make_live_from_serializer(api::timestamp_type timestamp, size_t size, Serializer&& serializer) { - auto value_offset = flags_size + timestamp_size; - managed_bytes b(managed_bytes::initialized_later(), value_offset + size); - b[0] = LIVE_FLAG; - set_field(b, timestamp_offset, timestamp); - serializer(b.begin() + value_offset); - return b; - } - template - friend class atomic_cell_base; - friend class atomic_cell; -}; - -template -class atomic_cell_base { -protected: - ByteContainer _data; -protected: - atomic_cell_base(ByteContainer&& data) : _data(std::forward(data)) { } - friend class atomic_cell_or_collection; -public: - bool is_counter_update() const { - return atomic_cell_type::is_counter_update(_data); - } - bool is_revert_set() const { - return atomic_cell_type::is_revert_set(_data); - } - bool is_counter_in_place_revert_set() const { - return atomic_cell_type::is_counter_in_place_revert_set(_data); - } - bool is_live() const { - return atomic_cell_type::is_live(_data); - } - bool is_live(tombstone t, bool is_counter) const { - return is_live() && !is_covered_by(t, is_counter); - } - bool is_live(tombstone t, gc_clock::time_point now, bool is_counter) const { - return is_live() && !is_covered_by(t, is_counter) && !has_expired(now); - } - bool is_live_and_has_ttl() const { - return atomic_cell_type::is_live_and_has_ttl(_data); - } - bool is_dead(gc_clock::time_point now) const { - return atomic_cell_type::is_dead(_data) || has_expired(now); - } - bool is_covered_by(tombstone t, bool is_counter) const { - return timestamp() <= t.timestamp || (is_counter && t.timestamp != api::missing_timestamp); - } - // Can be called on live and dead cells - api::timestamp_type timestamp() const { - return atomic_cell_type::timestamp(_data); - } - void set_timestamp(api::timestamp_type ts) { - atomic_cell_type::set_timestamp(_data, ts); - } - // Can be called on live cells only - auto value() const { - return atomic_cell_type::value(_data); - } - // Can be called on live counter update cells only - int64_t counter_update_value() const { - return atomic_cell_type::counter_update_value(_data); - } - // Can be called only when is_dead(gc_clock::time_point) - gc_clock::time_point deletion_time() const { - return !is_live() ? atomic_cell_type::deletion_time(_data) : expiry() - ttl(); - } - // Can be called only when is_live_and_has_ttl() - gc_clock::time_point expiry() const { - return atomic_cell_type::expiry(_data); - } - // Can be called only when is_live_and_has_ttl() - gc_clock::duration ttl() const { - return atomic_cell_type::ttl(_data); - } - // Can be called on live and dead cells - bool has_expired(gc_clock::time_point now) const { - return is_live_and_has_ttl() && expiry() < now; - } - bytes_view serialize() const { - return _data; - } - void set_revert(bool revert) { - atomic_cell_type::set_revert(_data, revert); - } - void set_counter_in_place_revert(bool flag) { - atomic_cell_type::set_counter_in_place_revert(_data, flag); - } -}; - -class atomic_cell_view final : public atomic_cell_base { - atomic_cell_view(bytes_view data) : atomic_cell_base(std::move(data)) {} -public: - static atomic_cell_view from_bytes(bytes_view data) { return atomic_cell_view(data); } - - friend class atomic_cell; - friend std::ostream& operator<<(std::ostream& os, const atomic_cell_view& acv); -}; - -class atomic_cell_mutable_view final : public atomic_cell_base { - atomic_cell_mutable_view(bytes_mutable_view data) : atomic_cell_base(std::move(data)) {} -public: - static atomic_cell_mutable_view from_bytes(bytes_mutable_view data) { return atomic_cell_mutable_view(data); } - - friend class atomic_cell; -}; - -class atomic_cell_ref final : public atomic_cell_base { -public: - atomic_cell_ref(managed_bytes& buf) : atomic_cell_base(buf) {} -}; - -class atomic_cell final : public atomic_cell_base { - atomic_cell(managed_bytes b) : atomic_cell_base(std::move(b)) {} -public: - atomic_cell(const atomic_cell&) = default; - atomic_cell(atomic_cell&&) = default; - atomic_cell& operator=(const atomic_cell&) = default; - atomic_cell& operator=(atomic_cell&&) = default; - static atomic_cell from_bytes(managed_bytes b) { - return atomic_cell(std::move(b)); - } - atomic_cell(atomic_cell_view other) : atomic_cell_base(managed_bytes{other._data}) {} - operator atomic_cell_view() const { - return atomic_cell_view(_data); - } - static atomic_cell make_dead(api::timestamp_type timestamp, gc_clock::time_point deletion_time) { - return atomic_cell_type::make_dead(timestamp, deletion_time); - } - static atomic_cell make_live(api::timestamp_type timestamp, bytes_view value) { - return atomic_cell_type::make_live(timestamp, value); - } - static atomic_cell make_live(api::timestamp_type timestamp, const bytes& value) { - return make_live(timestamp, bytes_view(value)); - } - static atomic_cell make_live_counter_update(api::timestamp_type timestamp, int64_t value) { - return atomic_cell_type::make_live_counter_update(timestamp, value); - } - static atomic_cell make_live(api::timestamp_type timestamp, bytes_view value, - gc_clock::time_point expiry, gc_clock::duration ttl) - { - return atomic_cell_type::make_live(timestamp, value, expiry, ttl); - } - static atomic_cell make_live(api::timestamp_type timestamp, const bytes& value, - gc_clock::time_point expiry, gc_clock::duration ttl) - { - return make_live(timestamp, bytes_view(value), expiry, ttl); - } - static atomic_cell make_live(api::timestamp_type timestamp, bytes_view value, ttl_opt ttl) { - if (!ttl) { - return atomic_cell_type::make_live(timestamp, value); - } else { - return atomic_cell_type::make_live(timestamp, value, gc_clock::now() + *ttl, *ttl); - } - } - template - static atomic_cell make_live_from_serializer(api::timestamp_type timestamp, size_t size, Serializer&& serializer) { - return atomic_cell_type::make_live_from_serializer(timestamp, size, std::forward(serializer)); - } - friend class atomic_cell_or_collection; - friend std::ostream& operator<<(std::ostream& os, const atomic_cell& ac); -}; - -class collection_mutation_view; - -// Represents a mutation of a collection. Actual format is determined by collection type, -// and is: -// set: list of atomic_cell -// map: list of pair (for key/value) -// list: tbd, probably ugly -class collection_mutation { -public: - managed_bytes data; - collection_mutation() {} - collection_mutation(managed_bytes b) : data(std::move(b)) {} - collection_mutation(collection_mutation_view v); - operator collection_mutation_view() const; -}; - -class collection_mutation_view { -public: - bytes_view data; - bytes_view serialize() const { return data; } - static collection_mutation_view from_bytes(bytes_view v) { return { v }; } -}; - -inline -collection_mutation::collection_mutation(collection_mutation_view v) - : data(v.data) { -} - -inline -collection_mutation::operator collection_mutation_view() const { - return { data }; -} - -class column_definition; - -int compare_atomic_cell_for_merge(atomic_cell_view left, atomic_cell_view right); -void merge_column(const column_definition& def, - atomic_cell_or_collection& old, - const atomic_cell_or_collection& neww); diff --git a/scylla/atomic_cell_hash.hh b/scylla/atomic_cell_hash.hh deleted file mode 100644 index 9037f55..0000000 --- a/scylla/atomic_cell_hash.hh +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -// Not part of atomic_cell.hh to avoid cyclic dependency between types.hh and atomic_cell.hh - -#include "types.hh" -#include "atomic_cell.hh" -#include "hashing.hh" -#include "counters.hh" - -template<> -struct appending_hash { - template - void operator()(Hasher& h, collection_mutation_view cell, const column_definition& cdef) const { - auto m_view = collection_type_impl::deserialize_mutation_form(cell); - ::feed_hash(h, m_view.tomb); - for (auto&& key_and_value : m_view.cells) { - ::feed_hash(h, key_and_value.first); - ::feed_hash(h, key_and_value.second, cdef); - } - } -}; - -template<> -struct appending_hash { - template - void operator()(Hasher& h, atomic_cell_view cell, const column_definition& cdef) const { - feed_hash(h, cell.is_live()); - feed_hash(h, cell.timestamp()); - if (cell.is_live()) { - if (cdef.is_counter()) { - ::feed_hash(h, counter_cell_view(cell)); - return; - } - if (cell.is_live_and_has_ttl()) { - feed_hash(h, cell.expiry()); - feed_hash(h, cell.ttl()); - } - feed_hash(h, cell.value()); - } else { - feed_hash(h, cell.deletion_time()); - } - } -}; - -template<> -struct appending_hash { - template - void operator()(Hasher& h, const atomic_cell& cell, const column_definition& cdef) const { - feed_hash(h, static_cast(cell), cdef); - } -}; - -template<> -struct appending_hash { - template - void operator()(Hasher& h, const collection_mutation& cm, const column_definition& cdef) const { - feed_hash(h, static_cast(cm), cdef); - } -}; diff --git a/scylla/atomic_cell_or_collection.hh b/scylla/atomic_cell_or_collection.hh deleted file mode 100644 index 4a3ea36..0000000 --- a/scylla/atomic_cell_or_collection.hh +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "atomic_cell.hh" -#include "schema.hh" -#include "hashing.hh" - -// A variant type that can hold either an atomic_cell, or a serialized collection. -// Which type is stored is determined by the schema. -// Has an "empty" state. -// Objects moved-from are left in an empty state. -class atomic_cell_or_collection final { - managed_bytes _data; -private: - atomic_cell_or_collection(managed_bytes&& data) : _data(std::move(data)) {} -public: - atomic_cell_or_collection() = default; - atomic_cell_or_collection(atomic_cell ac) : _data(std::move(ac._data)) {} - static atomic_cell_or_collection from_atomic_cell(atomic_cell data) { return { std::move(data._data) }; } - atomic_cell_view as_atomic_cell() const { return atomic_cell_view::from_bytes(_data); } - atomic_cell_ref as_atomic_cell_ref() { return { _data }; } - atomic_cell_mutable_view as_mutable_atomic_cell() { return atomic_cell_mutable_view::from_bytes(_data); } - atomic_cell_or_collection(collection_mutation cm) : _data(std::move(cm.data)) {} - explicit operator bool() const { - return !_data.empty(); - } - bool can_use_mutable_view() const { - return !_data.is_fragmented(); - } - static atomic_cell_or_collection from_collection_mutation(collection_mutation data) { - return std::move(data.data); - } - collection_mutation_view as_collection_mutation() const { - return collection_mutation_view{_data}; - } - bytes_view serialize() const { - return _data; - } - bool operator==(const atomic_cell_or_collection& other) const { - return _data == other._data; - } - template - void feed_hash(Hasher& h, const column_definition& def) const { - if (def.is_atomic()) { - ::feed_hash(h, as_atomic_cell(), def); - } else { - ::feed_hash(h, as_collection_mutation(), def); - } - } - size_t external_memory_usage() const { - return _data.external_memory_usage(); - } - friend std::ostream& operator<<(std::ostream&, const atomic_cell_or_collection&); -}; diff --git a/scylla/auth/auth.cc b/scylla/auth/auth.cc deleted file mode 100644 index c3471d4..0000000 --- a/scylla/auth/auth.cc +++ /dev/null @@ -1,384 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2016 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ -#include - -#include - -#include "auth.hh" -#include "authenticator.hh" -#include "authorizer.hh" -#include "database.hh" -#include "cql3/query_processor.hh" -#include "cql3/statements/raw/cf_statement.hh" -#include "cql3/statements/create_table_statement.hh" -#include "db/config.hh" -#include "service/migration_manager.hh" -#include "utils/loading_cache.hh" -#include "utils/hash.hh" - -const sstring auth::auth::DEFAULT_SUPERUSER_NAME("cassandra"); -const sstring auth::auth::AUTH_KS("system_auth"); -const sstring auth::auth::USERS_CF("users"); - -static const sstring USER_NAME("name"); -static const sstring SUPER("super"); - -static logging::logger alogger("auth"); - -// TODO: configurable -using namespace std::chrono_literals; -const std::chrono::milliseconds auth::auth::SUPERUSER_SETUP_DELAY = 10000ms; - -class auth_migration_listener : public service::migration_listener { - void on_create_keyspace(const sstring& ks_name) override {} - void on_create_column_family(const sstring& ks_name, const sstring& cf_name) override {} - void on_create_user_type(const sstring& ks_name, const sstring& type_name) override {} - void on_create_function(const sstring& ks_name, const sstring& function_name) override {} - void on_create_aggregate(const sstring& ks_name, const sstring& aggregate_name) override {} - void on_create_view(const sstring& ks_name, const sstring& view_name) override {} - - void on_update_keyspace(const sstring& ks_name) override {} - void on_update_column_family(const sstring& ks_name, const sstring& cf_name, bool) override {} - void on_update_user_type(const sstring& ks_name, const sstring& type_name) override {} - void on_update_function(const sstring& ks_name, const sstring& function_name) override {} - void on_update_aggregate(const sstring& ks_name, const sstring& aggregate_name) override {} - void on_update_view(const sstring& ks_name, const sstring& view_name, bool columns_changed) override {} - - void on_drop_keyspace(const sstring& ks_name) override { - auth::authorizer::get().revoke_all(auth::data_resource(ks_name)); - } - void on_drop_column_family(const sstring& ks_name, const sstring& cf_name) override { - auth::authorizer::get().revoke_all(auth::data_resource(ks_name, cf_name)); - } - void on_drop_user_type(const sstring& ks_name, const sstring& type_name) override {} - void on_drop_function(const sstring& ks_name, const sstring& function_name) override {} - void on_drop_aggregate(const sstring& ks_name, const sstring& aggregate_name) override {} - void on_drop_view(const sstring& ks_name, const sstring& view_name) override {} -}; - -static auth_migration_listener auth_migration; - -namespace std { -template <> -struct hash { - size_t operator()(const auth::data_resource & v) const { - return v.hash_value(); - } -}; - -template <> -struct hash { - size_t operator()(const auth::authenticated_user & v) const { - return utils::tuple_hash()(v.name(), v.is_anonymous()); - } -}; -} - -class auth::auth::permissions_cache { -public: - typedef utils::loading_cache, permission_set, utils::tuple_hash> cache_type; - typedef typename cache_type::key_type key_type; - - permissions_cache() - : permissions_cache( - cql3::get_local_query_processor().db().local().get_config()) { - } - - permissions_cache(const db::config& cfg) - : _cache(cfg.permissions_cache_max_entries(), std::chrono::milliseconds(cfg.permissions_validity_in_ms()), std::chrono::milliseconds(cfg.permissions_update_interval_in_ms()), alogger, - [] (const key_type& k) { - alogger.debug("Refreshing permissions for {}", k.first.name()); - return authorizer::get().authorize(::make_shared(k.first), k.second); - }) {} - - future<> stop() { - return _cache.stop(); - } - - future get(::shared_ptr user, data_resource resource) { - return _cache.get(key_type(*user, std::move(resource))); - } - -private: - cache_type _cache; -}; - -namespace std { // for ADL, yuch - -std::ostream& operator<<(std::ostream& os, const std::pair& p) { - os << "{user: " << p.first.name() << ", data_resource: " << p.second << "}"; - return os; -} - -} - -static distributed perm_cache; - -/** - * Poor mans job schedule. For maximum 2 jobs. Sic. - * Still does nothing more clever than waiting 10 seconds - * like origin, then runs the submitted tasks. - * - * Only difference compared to sleep (from which this - * borrows _heavily_) is that if tasks have not run by the time - * we exit (and do static clean up) we delete the promise + cont - * - * Should be abstracted to some sort of global server function - * probably. - */ -struct waiter { - promise<> done; - timer<> tmr; - waiter() : tmr([this] {done.set_value();}) - { - tmr.arm(auth::auth::SUPERUSER_SETUP_DELAY); - } - ~waiter() { - if (tmr.armed()) { - tmr.cancel(); - done.set_exception(std::runtime_error("shutting down")); - } - alogger.trace("Deleting scheduled task"); - } - void kill() { - } -}; - -typedef std::unique_ptr waiter_ptr; - -static std::vector & thread_waiters() { - static thread_local std::vector the_waiters; - return the_waiters; -} - -void auth::auth::schedule_when_up(scheduled_func f) { - alogger.trace("Adding scheduled task"); - - auto & waiters = thread_waiters(); - - waiters.emplace_back(std::make_unique()); - auto* w = waiters.back().get(); - - w->done.get_future().finally([w] { - auto & waiters = thread_waiters(); - auto i = std::find_if(waiters.begin(), waiters.end(), [w](const waiter_ptr& p) { - return p.get() == w; - }); - if (i != waiters.end()) { - waiters.erase(i); - } - }).then([f = std::move(f)] { - alogger.trace("Running scheduled task"); - return f(); - }).handle_exception([](auto ep) { - return make_ready_future(); - }); -} - -bool auth::auth::is_class_type(const sstring& type, const sstring& classname) { - if (type == classname) { - return true; - } - auto i = classname.find_last_of('.'); - return classname.compare(i + 1, sstring::npos, type) == 0; -} - -future<> auth::auth::setup() { - auto& db = cql3::get_local_query_processor().db().local(); - auto& cfg = db.get_config(); - - future<> f = perm_cache.start(); - - if (is_class_type(cfg.authenticator(), - authenticator::ALLOW_ALL_AUTHENTICATOR_NAME) - && is_class_type(cfg.authorizer(), - authorizer::ALLOW_ALL_AUTHORIZER_NAME) - ) { - // just create the objects - return f.then([&cfg] { - return authenticator::setup(cfg.authenticator()); - }).then([&cfg] { - return authorizer::setup(cfg.authorizer()); - }); - } - - if (!db.has_keyspace(AUTH_KS)) { - std::map opts; - opts["replication_factor"] = "1"; - auto ksm = keyspace_metadata::new_keyspace(AUTH_KS, "org.apache.cassandra.locator.SimpleStrategy", opts, true); - // We use min_timestamp so that default keyspace metadata will loose with any manual adjustments. See issue #2129. - f = service::get_local_migration_manager().announce_new_keyspace(ksm, api::min_timestamp, false); - } - - return f.then([] { - return setup_table(USERS_CF, sprint("CREATE TABLE %s.%s (%s text, %s boolean, PRIMARY KEY(%s)) WITH gc_grace_seconds=%d", - AUTH_KS, USERS_CF, USER_NAME, SUPER, USER_NAME, - 90 * 24 * 60 * 60)); // 3 months. - }).then([&cfg] { - return authenticator::setup(cfg.authenticator()); - }).then([&cfg] { - return authorizer::setup(cfg.authorizer()); - }).then([] { - service::get_local_migration_manager().register_listener(&auth_migration); // again, only one shard... - // instead of once-timer, just schedule this later - schedule_when_up([] { - // setup default super user - return has_existing_users(USERS_CF, DEFAULT_SUPERUSER_NAME, USER_NAME).then([](bool exists) { - if (!exists) { - auto query = sprint("INSERT INTO %s.%s (%s, %s) VALUES (?, ?) USING TIMESTAMP 0", - AUTH_KS, USERS_CF, USER_NAME, SUPER); - cql3::get_local_query_processor().process(query, db::consistency_level::ONE, {DEFAULT_SUPERUSER_NAME, true}).then([](auto) { - alogger.info("Created default superuser '{}'", DEFAULT_SUPERUSER_NAME); - }).handle_exception([](auto ep) { - try { - std::rethrow_exception(ep); - } catch (exceptions::request_execution_exception&) { - alogger.warn("Skipped default superuser setup: some nodes were not ready"); - } - }); - } - }); - }); - }); -} - -future<> auth::auth::shutdown() { - // just make sure we don't have pending tasks. - // this is mostly relevant for test cases where - // db-env-shutdown != process shutdown - return smp::invoke_on_all([] { - thread_waiters().clear(); - }).then([] { - return perm_cache.stop(); - }); -} - -future auth::auth::get_permissions(::shared_ptr user, data_resource resource) { - return perm_cache.local().get(std::move(user), std::move(resource)); -} - -static db::consistency_level consistency_for_user(const sstring& username) { - if (username == auth::auth::DEFAULT_SUPERUSER_NAME) { - return db::consistency_level::QUORUM; - } - return db::consistency_level::LOCAL_ONE; -} - -static future<::shared_ptr> select_user(const sstring& username) { - // Here was a thread local, explicit cache of prepared statement. In normal execution this is - // fine, but since we in testing set up and tear down system over and over, we'd start using - // obsolete prepared statements pretty quickly. - // Rely on query processing caching statements instead, and lets assume - // that a map lookup string->statement is not gonna kill us much. - return cql3::get_local_query_processor().process( - sprint("SELECT * FROM %s.%s WHERE %s = ?", - auth::auth::AUTH_KS, auth::auth::USERS_CF, - USER_NAME), consistency_for_user(username), - { username }, true); -} - -future auth::auth::is_existing_user(const sstring& username) { - return select_user(username).then( - [](::shared_ptr res) { - return make_ready_future(!res->empty()); - }); -} - -future auth::auth::is_super_user(const sstring& username) { - return select_user(username).then( - [](::shared_ptr res) { - return make_ready_future(!res->empty() && res->one().get_as(SUPER)); - }); -} - -future<> auth::auth::insert_user(const sstring& username, bool is_super) { - return cql3::get_local_query_processor().process(sprint("INSERT INTO %s.%s (%s, %s) VALUES (?, ?)", - AUTH_KS, USERS_CF, USER_NAME, SUPER), - consistency_for_user(username), { username, is_super }).discard_result(); -} - -future<> auth::auth::delete_user(const sstring& username) { - return cql3::get_local_query_processor().process(sprint("DELETE FROM %s.%s WHERE %s = ?", - AUTH_KS, USERS_CF, USER_NAME), - consistency_for_user(username), { username }).discard_result(); -} - -future<> auth::auth::setup_table(const sstring& name, const sstring& cql) { - auto& qp = cql3::get_local_query_processor(); - auto& db = qp.db().local(); - - if (db.has_schema(AUTH_KS, name)) { - return make_ready_future(); - } - - ::shared_ptr parsed = static_pointer_cast< - cql3::statements::raw::cf_statement>(cql3::query_processor::parse_statement(cql)); - parsed->prepare_keyspace(AUTH_KS); - ::shared_ptr statement = - static_pointer_cast( - parsed->prepare(db, qp.get_cql_stats())->statement); - auto schema = statement->get_cf_meta_data(); - auto uuid = generate_legacy_id(schema->ks_name(), schema->cf_name()); - - schema_builder b(schema); - b.set_uuid(uuid); - return service::get_local_migration_manager().announce_new_column_family(b.build(), false); -} - -future auth::auth::has_existing_users(const sstring& cfname, const sstring& def_user_name, const sstring& name_column) { - auto default_user_query = sprint("SELECT * FROM %s.%s WHERE %s = ?", AUTH_KS, cfname, name_column); - auto all_users_query = sprint("SELECT * FROM %s.%s LIMIT 1", AUTH_KS, cfname); - - return cql3::get_local_query_processor().process(default_user_query, db::consistency_level::ONE, { def_user_name }).then([=](::shared_ptr res) { - if (!res->empty()) { - return make_ready_future(true); - } - return cql3::get_local_query_processor().process(default_user_query, db::consistency_level::QUORUM, { def_user_name }).then([all_users_query](::shared_ptr res) { - if (!res->empty()) { - return make_ready_future(true); - } - return cql3::get_local_query_processor().process(all_users_query, db::consistency_level::QUORUM).then([](::shared_ptr res) { - return make_ready_future(!res->empty()); - }); - }); - }); -} - diff --git a/scylla/auth/auth.hh b/scylla/auth/auth.hh deleted file mode 100644 index 9a9be4e..0000000 --- a/scylla/auth/auth.hh +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2016 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include -#include -#include -#include - - -#include "exceptions/exceptions.hh" -#include "permission.hh" -#include "data_resource.hh" -#include "authenticated_user.hh" - -namespace auth { - -class auth { -public: - class permissions_cache; - - static const sstring DEFAULT_SUPERUSER_NAME; - static const sstring AUTH_KS; - static const sstring USERS_CF; - static const std::chrono::milliseconds SUPERUSER_SETUP_DELAY; - - static bool is_class_type(const sstring& type, const sstring& classname); - - static future get_permissions(::shared_ptr, data_resource); - - /** - * Checks if the username is stored in AUTH_KS.USERS_CF. - * - * @param username Username to query. - * @return whether or not Cassandra knows about the user. - */ - static future is_existing_user(const sstring& username); - - /** - * Checks if the user is a known superuser. - * - * @param username Username to query. - * @return true is the user is a superuser, false if they aren't or don't exist at all. - */ - static future is_super_user(const sstring& username); - - /** - * Inserts the user into AUTH_KS.USERS_CF (or overwrites their superuser status as a result of an ALTER USER query). - * - * @param username Username to insert. - * @param isSuper User's new status. - * @throws RequestExecutionException - */ - static future<> insert_user(const sstring& username, bool is_super); - - /** - * Deletes the user from AUTH_KS.USERS_CF. - * - * @param username Username to delete. - * @throws RequestExecutionException - */ - static future<> delete_user(const sstring& username); - - /** - * Sets up Authenticator and Authorizer. - */ - static future<> setup(); - static future<> shutdown(); - - /** - * Set up table from given CREATE TABLE statement under system_auth keyspace, if not already done so. - * - * @param name name of the table - * @param cql CREATE TABLE statement - */ - static future<> setup_table(const sstring& name, const sstring& cql); - - static future has_existing_users(const sstring& cfname, const sstring& def_user_name, const sstring& name_column_name); - - // For internal use. Run function "when system is up". - typedef std::function()> scheduled_func; - static void schedule_when_up(scheduled_func); -}; -} - -std::ostream& operator<<(std::ostream& os, const std::pair& p); diff --git a/scylla/auth/authenticated_user.cc b/scylla/auth/authenticated_user.cc deleted file mode 100644 index d956559..0000000 --- a/scylla/auth/authenticated_user.cc +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2016 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - - -#include "authenticated_user.hh" -#include "auth.hh" - -const sstring auth::authenticated_user::ANONYMOUS_USERNAME("anonymous"); - -auth::authenticated_user::authenticated_user() - : _anon(true) -{} - -auth::authenticated_user::authenticated_user(sstring name) - : _name(name), _anon(false) -{} - -auth::authenticated_user::authenticated_user(authenticated_user&&) = default; -auth::authenticated_user::authenticated_user(const authenticated_user&) = default; - -const sstring& auth::authenticated_user::name() const { - return _anon ? ANONYMOUS_USERNAME : _name; -} - -future auth::authenticated_user::is_super() const { - if (is_anonymous()) { - return make_ready_future(false); - } - return auth::auth::is_super_user(_name); -} - -bool auth::authenticated_user::operator==(const authenticated_user& v) const { - return _anon ? v._anon : _name == v._name; -} diff --git a/scylla/auth/authenticated_user.hh b/scylla/auth/authenticated_user.hh deleted file mode 100644 index b265537..0000000 --- a/scylla/auth/authenticated_user.hh +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2016 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include -#include -#include "seastarx.hh" - -namespace auth { - -class authenticated_user { -public: - static const sstring ANONYMOUS_USERNAME; - - authenticated_user(); - authenticated_user(sstring name); - authenticated_user(authenticated_user&&); - authenticated_user(const authenticated_user&); - - const sstring& name() const; - - /** - * Checks the user's superuser status. - * Only a superuser is allowed to perform CREATE USER and DROP USER queries. - * Im most cased, though not necessarily, a superuser will have Permission.ALL on every resource - * (depends on IAuthorizer implementation). - */ - future is_super() const; - - /** - * If IAuthenticator doesn't require authentication, this method may return true. - */ - bool is_anonymous() const { - return _anon; - } - - bool operator==(const authenticated_user&) const; -private: - sstring _name; - bool _anon; -}; - -} - diff --git a/scylla/auth/authenticator.cc b/scylla/auth/authenticator.cc deleted file mode 100644 index 83131fa..0000000 --- a/scylla/auth/authenticator.cc +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2016 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "authenticator.hh" -#include "authenticated_user.hh" -#include "password_authenticator.hh" -#include "auth.hh" -#include "db/config.hh" - -const sstring auth::authenticator::USERNAME_KEY("username"); -const sstring auth::authenticator::PASSWORD_KEY("password"); -const sstring auth::authenticator::ALLOW_ALL_AUTHENTICATOR_NAME("org.apache.cassandra.auth.AllowAllAuthenticator"); - -auth::authenticator::option auth::authenticator::string_to_option(const sstring& name) { - if (strcasecmp(name.c_str(), "password") == 0) { - return option::PASSWORD; - } - throw std::invalid_argument(name); -} - -sstring auth::authenticator::option_to_string(option opt) { - switch (opt) { - case option::PASSWORD: - return "PASSWORD"; - default: - throw std::invalid_argument(sprint("Unknown option {}", opt)); - } -} - -/** - * Authenticator is assumed to be a fully state-less immutable object (note all the const). - * We thus store a single instance globally, since it should be safe/ok. - */ -static std::unique_ptr global_authenticator; - -future<> -auth::authenticator::setup(const sstring& type) { - if (auth::auth::is_class_type(type, ALLOW_ALL_AUTHENTICATOR_NAME)) { - class allow_all_authenticator : public authenticator { - public: - const sstring& class_name() const override { - return ALLOW_ALL_AUTHENTICATOR_NAME; - } - bool require_authentication() const override { - return false; - } - option_set supported_options() const override { - return option_set(); - } - option_set alterable_options() const override { - return option_set(); - } - future<::shared_ptr> authenticate(const credentials_map& credentials) const override { - return make_ready_future<::shared_ptr>(::make_shared()); - } - future<> create(sstring username, const option_map& options) override { - return make_ready_future(); - } - future<> alter(sstring username, const option_map& options) override { - return make_ready_future(); - } - future<> drop(sstring username) override { - return make_ready_future(); - } - const resource_ids& protected_resources() const override { - static const resource_ids ids; - return ids; - } - ::shared_ptr new_sasl_challenge() const override { - throw std::runtime_error("Should not reach"); - } - }; - global_authenticator = std::make_unique(); - } else if (auth::auth::is_class_type(type, password_authenticator::PASSWORD_AUTHENTICATOR_NAME)) { - auto pwa = std::make_unique(); - auto f = pwa->init(); - return f.then([pwa = std::move(pwa)]() mutable { - global_authenticator = std::move(pwa); - }); - } else { - throw exceptions::configuration_exception("Invalid authenticator type: " + type); - } - return make_ready_future(); -} - -auth::authenticator& auth::authenticator::get() { - assert(global_authenticator); - return *global_authenticator; -} diff --git a/scylla/auth/authenticator.hh b/scylla/auth/authenticator.hh deleted file mode 100644 index 2459338..0000000 --- a/scylla/auth/authenticator.hh +++ /dev/null @@ -1,200 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2016 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include "bytes.hh" -#include "data_resource.hh" -#include "enum_set.hh" -#include "exceptions/exceptions.hh" - -namespace db { - class config; -} - -namespace auth { - -class authenticated_user; - -class authenticator { -public: - static const sstring USERNAME_KEY; - static const sstring PASSWORD_KEY; - static const sstring ALLOW_ALL_AUTHENTICATOR_NAME; - - /** - * Supported CREATE USER/ALTER USER options. - * Currently only PASSWORD is available. - */ - enum class option { - PASSWORD - }; - - static option string_to_option(const sstring&); - static sstring option_to_string(option); - - using option_set = enum_set>; - using option_map = std::unordered_map>; - using credentials_map = std::unordered_map; - - /** - * Setup is called once upon system startup to initialize the IAuthenticator. - * - * For example, use this method to create any required keyspaces/column families. - * Note: Only call from main thread. - */ - static future<> setup(const sstring& type); - - /** - * Returns the system authenticator. Must have called setup before calling this. - */ - static authenticator& get(); - - virtual ~authenticator() - {} - - virtual const sstring& class_name() const = 0; - - /** - * Whether or not the authenticator requires explicit login. - * If false will instantiate user with AuthenticatedUser.ANONYMOUS_USER. - */ - virtual bool require_authentication() const = 0; - - /** - * Set of options supported by CREATE USER and ALTER USER queries. - * Should never return null - always return an empty set instead. - */ - virtual option_set supported_options() const = 0; - - /** - * Subset of supportedOptions that users are allowed to alter when performing ALTER USER [themselves]. - * Should never return null - always return an empty set instead. - */ - virtual option_set alterable_options() const = 0; - - /** - * Authenticates a user given a Map of credentials. - * Should never return null - always throw AuthenticationException instead. - * Returning AuthenticatedUser.ANONYMOUS_USER is an option as well if authentication is not required. - * - * @throws authentication_exception if credentials don't match any known user. - */ - virtual future<::shared_ptr> authenticate(const credentials_map& credentials) const = 0; - - /** - * Called during execution of CREATE USER query (also may be called on startup, see seedSuperuserOptions method). - * If authenticator is static then the body of the method should be left blank, but don't throw an exception. - * options are guaranteed to be a subset of supportedOptions(). - * - * @param username Username of the user to create. - * @param options Options the user will be created with. - * @throws exceptions::request_validation_exception - * @throws exceptions::request_execution_exception - */ - virtual future<> create(sstring username, const option_map& options) = 0; - - /** - * Called during execution of ALTER USER query. - * options are always guaranteed to be a subset of supportedOptions(). Furthermore, if the user performing the query - * is not a superuser and is altering himself, then options are guaranteed to be a subset of alterableOptions(). - * Keep the body of the method blank if your implementation doesn't support any options. - * - * @param username Username of the user that will be altered. - * @param options Options to alter. - * @throws exceptions::request_validation_exception - * @throws exceptions::request_execution_exception - */ - virtual future<> alter(sstring username, const option_map& options) = 0; - - - /** - * Called during execution of DROP USER query. - * - * @param username Username of the user that will be dropped. - * @throws exceptions::request_validation_exception - * @throws exceptions::request_execution_exception - */ - virtual future<> drop(sstring username) = 0; - - /** - * Set of resources that should be made inaccessible to users and only accessible internally. - * - * @return Keyspaces, column families that will be unmodifiable by users; other resources. - * @see resource_ids - */ - virtual const resource_ids& protected_resources() const = 0; - - class sasl_challenge { - public: - virtual ~sasl_challenge() {} - virtual bytes evaluate_response(bytes_view client_response) = 0; - virtual bool is_complete() const = 0; - virtual future<::shared_ptr> get_authenticated_user() const = 0; - }; - - /** - * Provide a sasl_challenge to be used by the CQL binary protocol server. If - * the configured authenticator requires authentication but does not implement this - * interface we refuse to start the binary protocol server as it will have no way - * of authenticating clients. - * @return sasl_challenge implementation - */ - virtual ::shared_ptr new_sasl_challenge() const = 0; -}; - -inline std::ostream& operator<<(std::ostream& os, authenticator::option opt) { - return os << authenticator::option_to_string(opt); -} - -} - diff --git a/scylla/auth/authorizer.cc b/scylla/auth/authorizer.cc deleted file mode 100644 index 206d4f6..0000000 --- a/scylla/auth/authorizer.cc +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2016 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "authorizer.hh" -#include "authenticated_user.hh" -#include "default_authorizer.hh" -#include "auth.hh" -#include "db/config.hh" - -const sstring auth::authorizer::ALLOW_ALL_AUTHORIZER_NAME("org.apache.cassandra.auth.AllowAllAuthorizer"); - -/** - * Authenticator is assumed to be a fully state-less immutable object (note all the const). - * We thus store a single instance globally, since it should be safe/ok. - */ -static std::unique_ptr global_authorizer; - -future<> -auth::authorizer::setup(const sstring& type) { - if (auth::auth::is_class_type(type, ALLOW_ALL_AUTHORIZER_NAME)) { - class allow_all_authorizer : public authorizer { - public: - future authorize(::shared_ptr, data_resource) const override { - return make_ready_future(permissions::ALL); - } - future<> grant(::shared_ptr, permission_set, data_resource, sstring) override { - throw exceptions::invalid_request_exception("GRANT operation is not supported by AllowAllAuthorizer"); - } - future<> revoke(::shared_ptr, permission_set, data_resource, sstring) override { - throw exceptions::invalid_request_exception("REVOKE operation is not supported by AllowAllAuthorizer"); - } - future> list(::shared_ptr performer, permission_set, optional, optional) const override { - throw exceptions::invalid_request_exception("LIST PERMISSIONS operation is not supported by AllowAllAuthorizer"); - } - future<> revoke_all(sstring dropped_user) override { - return make_ready_future(); - } - future<> revoke_all(data_resource) override { - return make_ready_future(); - } - const resource_ids& protected_resources() override { - static const resource_ids ids; - return ids; - } - future<> validate_configuration() const override { - return make_ready_future(); - } - }; - - global_authorizer = std::make_unique(); - } else if (auth::auth::is_class_type(type, default_authorizer::DEFAULT_AUTHORIZER_NAME)) { - auto da = std::make_unique(); - auto f = da->init(); - return f.then([da = std::move(da)]() mutable { - global_authorizer = std::move(da); - }); - } else { - throw exceptions::configuration_exception("Invalid authorizer type: " + type); - } - return make_ready_future(); -} - -auth::authorizer& auth::authorizer::get() { - assert(global_authorizer); - return *global_authorizer; -} diff --git a/scylla/auth/authorizer.hh b/scylla/auth/authorizer.hh deleted file mode 100644 index f949d68..0000000 --- a/scylla/auth/authorizer.hh +++ /dev/null @@ -1,173 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2016 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include -#include - -#include -#include -#include - -#include "permission.hh" -#include "data_resource.hh" - -#include "seastarx.hh" - -namespace auth { - -class authenticated_user; - -struct permission_details { - sstring user; - data_resource resource; - permission_set permissions; - - bool operator<(const permission_details& v) const { - return std::tie(user, resource, permissions) < std::tie(v.user, v.resource, v.permissions); - } -}; - -using std::experimental::optional; - -class authorizer { -public: - static const sstring ALLOW_ALL_AUTHORIZER_NAME; - - virtual ~authorizer() {} - - /** - * The primary Authorizer method. Returns a set of permissions of a user on a resource. - * - * @param user Authenticated user requesting authorization. - * @param resource Resource for which the authorization is being requested. @see DataResource. - * @return Set of permissions of the user on the resource. Should never return empty. Use permission.NONE instead. - */ - virtual future authorize(::shared_ptr, data_resource) const = 0; - - /** - * Grants a set of permissions on a resource to a user. - * The opposite of revoke(). - * - * @param performer User who grants the permissions. - * @param permissions Set of permissions to grant. - * @param to Grantee of the permissions. - * @param resource Resource on which to grant the permissions. - * - * @throws RequestValidationException - * @throws RequestExecutionException - */ - virtual future<> grant(::shared_ptr performer, permission_set, data_resource, sstring to) = 0; - - /** - * Revokes a set of permissions on a resource from a user. - * The opposite of grant(). - * - * @param performer User who revokes the permissions. - * @param permissions Set of permissions to revoke. - * @param from Revokee of the permissions. - * @param resource Resource on which to revoke the permissions. - * - * @throws RequestValidationException - * @throws RequestExecutionException - */ - virtual future<> revoke(::shared_ptr performer, permission_set, data_resource, sstring from) = 0; - - /** - * Returns a list of permissions on a resource of a user. - * - * @param performer User who wants to see the permissions. - * @param permissions Set of Permission values the user is interested in. The result should only include the matching ones. - * @param resource The resource on which permissions are requested. Can be null, in which case permissions on all resources - * should be returned. - * @param of The user whose permissions are requested. Can be null, in which case permissions of every user should be returned. - * - * @return All of the matching permission that the requesting user is authorized to know about. - * - * @throws RequestValidationException - * @throws RequestExecutionException - */ - virtual future> list(::shared_ptr performer, permission_set, optional, optional) const = 0; - - /** - * This method is called before deleting a user with DROP USER query so that a new user with the same - * name wouldn't inherit permissions of the deleted user in the future. - * - * @param droppedUser The user to revoke all permissions from. - */ - virtual future<> revoke_all(sstring dropped_user) = 0; - - /** - * This method is called after a resource is removed (i.e. keyspace or a table is dropped). - * - * @param droppedResource The resource to revoke all permissions on. - */ - virtual future<> revoke_all(data_resource) = 0; - - /** - * Set of resources that should be made inaccessible to users and only accessible internally. - * - * @return Keyspaces, column families that will be unmodifiable by users; other resources. - */ - virtual const resource_ids& protected_resources() = 0; - - /** - * Validates configuration of IAuthorizer implementation (if configurable). - * - * @throws ConfigurationException when there is a configuration error. - */ - virtual future<> validate_configuration() const = 0; - - /** - * Setup is called once upon system startup to initialize the IAuthorizer. - * - * For example, use this method to create any required keyspaces/column families. - */ - static future<> setup(const sstring& type); - - /** - * Returns the system authorizer. Must have called setup before calling this. - */ - static authorizer& get(); -}; - -} diff --git a/scylla/auth/data_resource.cc b/scylla/auth/data_resource.cc deleted file mode 100644 index 7af829d..0000000 --- a/scylla/auth/data_resource.cc +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2016 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "data_resource.hh" - -#include -#include "service/storage_proxy.hh" - -const sstring auth::data_resource::ROOT_NAME("data"); - -auth::data_resource::data_resource(level l, const sstring& ks, const sstring& cf) - : _level(l), _ks(ks), _cf(cf) -{ -} - -auth::data_resource::data_resource() - : data_resource(level::ROOT) -{} - -auth::data_resource::data_resource(const sstring& ks) - : data_resource(level::KEYSPACE, ks) -{} - -auth::data_resource::data_resource(const sstring& ks, const sstring& cf) - : data_resource(level::COLUMN_FAMILY, ks, cf) -{} - -auth::data_resource::level auth::data_resource::get_level() const { - return _level; -} - -auth::data_resource auth::data_resource::from_name( - const sstring& s) { - - static std::regex slash_regex("/"); - - auto i = std::regex_token_iterator(s.begin(), - s.end(), slash_regex, -1); - auto e = std::regex_token_iterator(); - auto n = std::distance(i, e); - - if (n > 3 || ROOT_NAME != sstring(*i++)) { - throw std::invalid_argument(sprint("%s is not a valid data resource name", s)); - } - - if (n == 1) { - return data_resource(); - } - auto ks = *i++; - if (n == 2) { - return data_resource(ks.str()); - } - auto cf = *i++; - return data_resource(ks.str(), cf.str()); -} - -sstring auth::data_resource::name() const { - switch (get_level()) { - case level::ROOT: - return ROOT_NAME; - case level::KEYSPACE: - return sprint("%s/%s", ROOT_NAME, _ks); - case level::COLUMN_FAMILY: - default: - return sprint("%s/%s/%s", ROOT_NAME, _ks, _cf); - } -} - -auth::data_resource auth::data_resource::get_parent() const { - switch (get_level()) { - case level::KEYSPACE: - return data_resource(); - case level::COLUMN_FAMILY: - return data_resource(_ks); - default: - throw std::invalid_argument("Root-level resource can't have a parent"); - } -} - -const sstring& auth::data_resource::keyspace() const { - if (is_root_level()) { - throw std::invalid_argument("ROOT data resource has no keyspace"); - } - return _ks; -} - -const sstring& auth::data_resource::column_family() const { - if (!is_column_family_level()) { - throw std::invalid_argument(sprint("%s data resource has no column family", name())); - } - return _cf; -} - -bool auth::data_resource::has_parent() const { - return !is_root_level(); -} - -bool auth::data_resource::exists() const { - switch (get_level()) { - case level::ROOT: - return true; - case level::KEYSPACE: - return service::get_local_storage_proxy().get_db().local().has_keyspace(_ks); - case level::COLUMN_FAMILY: - default: - return service::get_local_storage_proxy().get_db().local().has_schema(_ks, _cf); - } -} - -sstring auth::data_resource::to_string() const { - switch (get_level()) { - case level::ROOT: - return ""; - case level::KEYSPACE: - return sprint("", _ks); - case level::COLUMN_FAMILY: - default: - return sprint("", _ks, _cf); - } -} - -bool auth::data_resource::operator==(const data_resource& v) const { - return _ks == v._ks && _cf == v._cf; -} - -bool auth::data_resource::operator<(const data_resource& v) const { - return _ks < v._ks ? true : (v._ks < _ks ? false : _cf < v._cf); -} - -std::ostream& auth::operator<<(std::ostream& os, const data_resource& r) { - return os << r.to_string(); -} - diff --git a/scylla/auth/data_resource.hh b/scylla/auth/data_resource.hh deleted file mode 100644 index 3b365fb..0000000 --- a/scylla/auth/data_resource.hh +++ /dev/null @@ -1,159 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2016 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "utils/hash.hh" -#include -#include -#include -#include "seastarx.hh" - -namespace auth { - -class data_resource { -private: - enum class level { - ROOT, KEYSPACE, COLUMN_FAMILY - }; - - static const sstring ROOT_NAME; - - level _level; - sstring _ks; - sstring _cf; - - data_resource(level, const sstring& ks = {}, const sstring& cf = {}); - - level get_level() const; -public: - /** - * Creates a DataResource representing the root-level resource. - * @return the root-level resource. - */ - data_resource(); - /** - * Creates a DataResource representing a keyspace. - * - * @param keyspace Name of the keyspace. - */ - data_resource(const sstring& ks); - /** - * Creates a DataResource instance representing a column family. - * - * @param keyspace Name of the keyspace. - * @param columnFamily Name of the column family. - */ - data_resource(const sstring& ks, const sstring& cf); - - /** - * Parses a data resource name into a DataResource instance. - * - * @param name Name of the data resource. - * @return DataResource instance matching the name. - */ - static data_resource from_name(const sstring&); - - /** - * @return Printable name of the resource. - */ - sstring name() const; - - /** - * @return Parent of the resource, if any. Throws IllegalStateException if it's the root-level resource. - */ - data_resource get_parent() const; - - bool is_root_level() const { - return get_level() == level::ROOT; - } - - bool is_keyspace_level() const { - return get_level() == level::KEYSPACE; - } - - bool is_column_family_level() const { - return get_level() == level::COLUMN_FAMILY; - } - - /** - * @return keyspace of the resource. - * @throws std::invalid_argument if it's the root-level resource. - */ - const sstring& keyspace() const; - - /** - * @return column family of the resource. - * @throws std::invalid_argument if it's not a cf-level resource. - */ - const sstring& column_family() const; - - /** - * @return Whether or not the resource has a parent in the hierarchy. - */ - bool has_parent() const; - - /** - * @return Whether or not the resource exists in scylla. - */ - bool exists() const; - - sstring to_string() const; - - bool operator==(const data_resource&) const; - bool operator<(const data_resource&) const; - - size_t hash_value() const { - return utils::tuple_hash()(_ks, _cf); - } -}; - -/** - * Resource id mappings, i.e. keyspace and/or column families. - */ -using resource_ids = std::set; - -std::ostream& operator<<(std::ostream&, const data_resource&); - -} - - - diff --git a/scylla/auth/default_authorizer.cc b/scylla/auth/default_authorizer.cc deleted file mode 100644 index 1493180..0000000 --- a/scylla/auth/default_authorizer.cc +++ /dev/null @@ -1,240 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2016 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include -#include -#include -#include - -#include - -#include "auth.hh" -#include "default_authorizer.hh" -#include "authenticated_user.hh" -#include "permission.hh" -#include "cql3/query_processor.hh" -#include "exceptions/exceptions.hh" -#include "log.hh" - -const sstring auth::default_authorizer::DEFAULT_AUTHORIZER_NAME( - "org.apache.cassandra.auth.CassandraAuthorizer"); - -static const sstring USER_NAME = "username"; -static const sstring RESOURCE_NAME = "resource"; -static const sstring PERMISSIONS_NAME = "permissions"; -static const sstring PERMISSIONS_CF = "permissions"; - -static logging::logger alogger("default_authorizer"); - -auth::default_authorizer::default_authorizer() { -} -auth::default_authorizer::~default_authorizer() { -} - -future<> auth::default_authorizer::init() { - sstring create_table = sprint("CREATE TABLE %s.%s (" - "%s text," - "%s text," - "%s set," - "PRIMARY KEY(%s, %s)" - ") WITH gc_grace_seconds=%d", auth::auth::AUTH_KS, - PERMISSIONS_CF, USER_NAME, RESOURCE_NAME, PERMISSIONS_NAME, - USER_NAME, RESOURCE_NAME, 90 * 24 * 60 * 60); // 3 months. - - return auth::setup_table(PERMISSIONS_CF, create_table); -} - - -future auth::default_authorizer::authorize( - ::shared_ptr user, data_resource resource) const { - return user->is_super().then([this, user, resource = std::move(resource)](bool is_super) { - if (is_super) { - return make_ready_future(permissions::ALL); - } - - /** - * TOOD: could create actual data type for permission (translating string<->perm), - * but this seems overkill right now. We still must store strings so... - */ - auto& qp = cql3::get_local_query_processor(); - auto query = sprint("SELECT %s FROM %s.%s WHERE %s = ? AND %s = ?" - , PERMISSIONS_NAME, auth::AUTH_KS, PERMISSIONS_CF, USER_NAME, RESOURCE_NAME); - return qp.process(query, db::consistency_level::LOCAL_ONE, {user->name(), resource.name() }) - .then_wrapped([=](future<::shared_ptr> f) { - try { - auto res = f.get0(); - - if (res->empty() || !res->one().has(PERMISSIONS_NAME)) { - return make_ready_future(permissions::NONE); - } - return make_ready_future(permissions::from_strings(res->one().get_set(PERMISSIONS_NAME))); - } catch (exceptions::request_execution_exception& e) { - alogger.warn("CassandraAuthorizer failed to authorize {} for {}", user->name(), resource); - return make_ready_future(permissions::NONE); - } - }); - }); -} - -#include - -future<> auth::default_authorizer::modify( - ::shared_ptr performer, permission_set set, - data_resource resource, sstring user, sstring op) { - // TODO: why does this not check super user? - auto& qp = cql3::get_local_query_processor(); - auto query = sprint("UPDATE %s.%s SET %s = %s %s ? WHERE %s = ? AND %s = ?", - auth::AUTH_KS, PERMISSIONS_CF, PERMISSIONS_NAME, - PERMISSIONS_NAME, op, USER_NAME, RESOURCE_NAME); - return qp.process(query, db::consistency_level::ONE, { - permissions::to_strings(set), user, resource.name() }).discard_result(); -} - - -future<> auth::default_authorizer::grant( - ::shared_ptr performer, permission_set set, - data_resource resource, sstring to) { - return modify(std::move(performer), std::move(set), std::move(resource), std::move(to), "+"); -} - -future<> auth::default_authorizer::revoke( - ::shared_ptr performer, permission_set set, - data_resource resource, sstring from) { - return modify(std::move(performer), std::move(set), std::move(resource), std::move(from), "-"); -} - -future> auth::default_authorizer::list( - ::shared_ptr performer, permission_set set, - optional resource, optional user) const { - return performer->is_super().then([this, performer, set = std::move(set), resource = std::move(resource), user = std::move(user)](bool is_super) { - if (!is_super && (!user || performer->name() != *user)) { - throw exceptions::unauthorized_exception(sprint("You are not authorized to view %s's permissions", user ? *user : "everyone")); - } - - auto query = sprint("SELECT %s, %s, %s FROM %s.%s", USER_NAME, RESOURCE_NAME, PERMISSIONS_NAME, auth::AUTH_KS, PERMISSIONS_CF); - auto& qp = cql3::get_local_query_processor(); - - // Oh, look, it is a case where it does not pay off to have - // parameters to process in an initializer list. - future<::shared_ptr> f = make_ready_future<::shared_ptr>(); - - if (resource && user) { - query += sprint(" WHERE %s = ? AND %s = ?", USER_NAME, RESOURCE_NAME); - f = qp.process(query, db::consistency_level::ONE, {*user, resource->name()}); - } else if (resource) { - query += sprint(" WHERE %s = ? ALLOW FILTERING", RESOURCE_NAME); - f = qp.process(query, db::consistency_level::ONE, {resource->name()}); - } else if (user) { - query += sprint(" WHERE %s = ?", USER_NAME); - f = qp.process(query, db::consistency_level::ONE, {*user}); - } else { - f = qp.process(query, db::consistency_level::ONE, {}); - } - - return f.then([set](::shared_ptr res) { - std::vector result; - - for (auto& row : *res) { - if (row.has(PERMISSIONS_NAME)) { - auto username = row.get_as(USER_NAME); - auto resource = data_resource::from_name(row.get_as(RESOURCE_NAME)); - auto ps = permissions::from_strings(row.get_set(PERMISSIONS_NAME)); - ps = permission_set::from_mask(ps.mask() & set.mask()); - - result.emplace_back(permission_details {username, resource, ps}); - } - } - return make_ready_future>(std::move(result)); - }); - }); -} - -future<> auth::default_authorizer::revoke_all(sstring dropped_user) { - auto& qp = cql3::get_local_query_processor(); - auto query = sprint("DELETE FROM %s.%s WHERE %s = ?", auth::AUTH_KS, - PERMISSIONS_CF, USER_NAME); - return qp.process(query, db::consistency_level::ONE, { dropped_user }).discard_result().handle_exception( - [dropped_user](auto ep) { - try { - std::rethrow_exception(ep); - } catch (exceptions::request_execution_exception& e) { - alogger.warn("CassandraAuthorizer failed to revoke all permissions of {}: {}", dropped_user, e); - } - }); -} - -future<> auth::default_authorizer::revoke_all(data_resource resource) { - auto& qp = cql3::get_local_query_processor(); - auto query = sprint("SELECT %s FROM %s.%s WHERE %s = ? ALLOW FILTERING", - USER_NAME, auth::AUTH_KS, PERMISSIONS_CF, RESOURCE_NAME); - return qp.process(query, db::consistency_level::LOCAL_ONE, { resource.name() }) - .then_wrapped([resource, &qp](future<::shared_ptr> f) { - try { - auto res = f.get0(); - return parallel_for_each(res->begin(), res->end(), [&qp, res, resource](const cql3::untyped_result_set::row& r) { - auto query = sprint("DELETE FROM %s.%s WHERE %s = ? AND %s = ?" - , auth::AUTH_KS, PERMISSIONS_CF, USER_NAME, RESOURCE_NAME); - return qp.process(query, db::consistency_level::LOCAL_ONE, { r.get_as(USER_NAME), resource.name() }) - .discard_result().handle_exception([resource](auto ep) { - try { - std::rethrow_exception(ep); - } catch (exceptions::request_execution_exception& e) { - alogger.warn("CassandraAuthorizer failed to revoke all permissions on {}: {}", resource, e); - } - - }); - }); - } catch (exceptions::request_execution_exception& e) { - alogger.warn("CassandraAuthorizer failed to revoke all permissions on {}: {}", resource, e); - return make_ready_future(); - } - }); -} - - -const auth::resource_ids& auth::default_authorizer::protected_resources() { - static const resource_ids ids({ data_resource(auth::AUTH_KS, PERMISSIONS_CF) }); - return ids; -} - -future<> auth::default_authorizer::validate_configuration() const { - return make_ready_future(); -} diff --git a/scylla/auth/default_authorizer.hh b/scylla/auth/default_authorizer.hh deleted file mode 100644 index d66e283..0000000 --- a/scylla/auth/default_authorizer.hh +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2016 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "authorizer.hh" - -namespace auth { - -class default_authorizer : public authorizer { -public: - static const sstring DEFAULT_AUTHORIZER_NAME; - - default_authorizer(); - ~default_authorizer(); - - future<> init(); - - future authorize(::shared_ptr, data_resource) const override; - - future<> grant(::shared_ptr, permission_set, data_resource, sstring) override; - - future<> revoke(::shared_ptr, permission_set, data_resource, sstring) override; - - future> list(::shared_ptr, permission_set, optional, optional) const override; - - future<> revoke_all(sstring) override; - - future<> revoke_all(data_resource) override; - - const resource_ids& protected_resources() override; - - future<> validate_configuration() const override; - -private: - future<> modify(::shared_ptr, permission_set, data_resource, sstring, sstring); -}; - -} /* namespace auth */ - diff --git a/scylla/auth/password_authenticator.cc b/scylla/auth/password_authenticator.cc deleted file mode 100644 index a65d20a..0000000 --- a/scylla/auth/password_authenticator.cc +++ /dev/null @@ -1,351 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2016 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include -#include -#include -#include - -#include - -#include "auth.hh" -#include "password_authenticator.hh" -#include "authenticated_user.hh" -#include "cql3/query_processor.hh" -#include "log.hh" - -const sstring auth::password_authenticator::PASSWORD_AUTHENTICATOR_NAME("org.apache.cassandra.auth.PasswordAuthenticator"); - -// name of the hash column. -static const sstring SALTED_HASH = "salted_hash"; -static const sstring USER_NAME = "username"; -static const sstring DEFAULT_USER_NAME = auth::auth::DEFAULT_SUPERUSER_NAME; -static const sstring DEFAULT_USER_PASSWORD = auth::auth::DEFAULT_SUPERUSER_NAME; -static const sstring CREDENTIALS_CF = "credentials"; - -static logging::logger plogger("password_authenticator"); - -auth::password_authenticator::~password_authenticator() -{} - -auth::password_authenticator::password_authenticator() -{} - -// TODO: blowfish -// Origin uses Java bcrypt library, i.e. blowfish salt -// generation and hashing, which is arguably a "better" -// password hash than sha/md5 versions usually available in -// crypt_r. Otoh, glibc 2.7+ uses a modified sha512 algo -// which should be the same order of safe, so the only -// real issue should be salted hash compatibility with -// origin if importing system tables from there. -// -// Since bcrypt/blowfish is _not_ (afaict) not available -// as a dev package/lib on most linux distros, we'd have to -// copy and compile for example OWL crypto -// (http://cvsweb.openwall.com/cgi/cvsweb.cgi/Owl/packages/glibc/crypt_blowfish/) -// to be fully bit-compatible. -// -// Until we decide this is needed, let's just use crypt_r, -// and some old-fashioned random salt generation. - -static constexpr size_t rand_bytes = 16; - -static sstring hashpw(const sstring& pass, const sstring& salt) { - // crypt_data is huge. should this be a thread_local static? - auto tmp = std::make_unique(); - tmp->initialized = 0; - auto res = crypt_r(pass.c_str(), salt.c_str(), tmp.get()); - if (res == nullptr) { - throw std::system_error(errno, std::system_category()); - } - return res; -} - -static bool checkpw(const sstring& pass, const sstring& salted_hash) { - auto tmp = hashpw(pass, salted_hash); - return tmp == salted_hash; -} - -static sstring gensalt() { - static sstring prefix; - - std::random_device rd; - std::default_random_engine e1(rd()); - std::uniform_int_distribution dist; - - sstring valid_salt = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789./"; - sstring input(rand_bytes, 0); - - for (char&c : input) { - c = valid_salt[dist(e1) % valid_salt.size()]; - } - - sstring salt; - - if (!prefix.empty()) { - return prefix + salt; - } - - auto tmp = std::make_unique(); - tmp->initialized = 0; - - // Try in order: - // blowfish 2011 fix, blowfish, sha512, sha256, md5 - for (sstring pfx : { "$2y$", "$2a$", "$6$", "$5$", "$1$" }) { - salt = pfx + input; - if (crypt_r("fisk", salt.c_str(), tmp.get())) { - prefix = pfx; - return salt; - } - } - throw std::runtime_error("Could not initialize hashing algorithm"); -} - -static sstring hashpw(const sstring& pass) { - return hashpw(pass, gensalt()); -} - -future<> auth::password_authenticator::init() { - gensalt(); // do this once to determine usable hashing - - sstring create_table = sprint( - "CREATE TABLE %s.%s (" - "%s text," - "%s text," // salt + hash + number of rounds - "options map,"// for future extensions - "PRIMARY KEY(%s)" - ") WITH gc_grace_seconds=%d", - auth::auth::AUTH_KS, - CREDENTIALS_CF, USER_NAME, SALTED_HASH, USER_NAME, - 90 * 24 * 60 * 60); // 3 months. - - return auth::setup_table(CREDENTIALS_CF, create_table).then([this] { - // instead of once-timer, just schedule this later - auth::schedule_when_up([] { - return auth::has_existing_users(CREDENTIALS_CF, DEFAULT_USER_NAME, USER_NAME).then([](bool exists) { - if (!exists) { - cql3::get_local_query_processor().process(sprint("INSERT INTO %s.%s (%s, %s) VALUES (?, ?) USING TIMESTAMP 0", - auth::AUTH_KS, - CREDENTIALS_CF, - USER_NAME, SALTED_HASH - ), - db::consistency_level::ONE, {DEFAULT_USER_NAME, hashpw(DEFAULT_USER_PASSWORD)}).then([](auto) { - plogger.info("Created default user '{}'", DEFAULT_USER_NAME); - }); - } - }); - }); - }); -} - -db::consistency_level auth::password_authenticator::consistency_for_user(const sstring& username) { - if (username == DEFAULT_USER_NAME) { - return db::consistency_level::QUORUM; - } - return db::consistency_level::LOCAL_ONE; -} - -const sstring& auth::password_authenticator::class_name() const { - return PASSWORD_AUTHENTICATOR_NAME; -} - -bool auth::password_authenticator::require_authentication() const { - return true; -} - -auth::authenticator::option_set auth::password_authenticator::supported_options() const { - return option_set::of(); -} - -auth::authenticator::option_set auth::password_authenticator::alterable_options() const { - return option_set::of(); -} - -future<::shared_ptr > auth::password_authenticator::authenticate( - const credentials_map& credentials) const { - if (!credentials.count(USERNAME_KEY)) { - throw exceptions::authentication_exception(sprint("Required key '%s' is missing", USERNAME_KEY)); - } - if (!credentials.count(PASSWORD_KEY)) { - throw exceptions::authentication_exception(sprint("Required key '%s' is missing", PASSWORD_KEY)); - } - - auto& username = credentials.at(USERNAME_KEY); - auto& password = credentials.at(PASSWORD_KEY); - - // Here was a thread local, explicit cache of prepared statement. In normal execution this is - // fine, but since we in testing set up and tear down system over and over, we'd start using - // obsolete prepared statements pretty quickly. - // Rely on query processing caching statements instead, and lets assume - // that a map lookup string->statement is not gonna kill us much. - return futurize_apply([this, username, password] { - auto& qp = cql3::get_local_query_processor(); - return qp.process(sprint("SELECT %s FROM %s.%s WHERE %s = ?", SALTED_HASH, - auth::AUTH_KS, CREDENTIALS_CF, USER_NAME), - consistency_for_user(username), {username}, true); - }).then_wrapped([=](future<::shared_ptr> f) { - try { - auto res = f.get0(); - if (res->empty() || !checkpw(password, res->one().get_as(SALTED_HASH))) { - throw exceptions::authentication_exception("Username and/or password are incorrect"); - } - return make_ready_future<::shared_ptr>(::make_shared(username)); - } catch (std::system_error &) { - std::throw_with_nested(exceptions::authentication_exception("Could not verify password")); - } catch (exceptions::request_execution_exception& e) { - std::throw_with_nested(exceptions::authentication_exception(e.what())); - } catch (...) { - std::throw_with_nested(exceptions::authentication_exception("authentication failed")); - } - }); -} - -future<> auth::password_authenticator::create(sstring username, - const option_map& options) { - try { - auto password = boost::any_cast(options.at(option::PASSWORD)); - auto query = sprint("INSERT INTO %s.%s (%s, %s) VALUES (?, ?)", - auth::AUTH_KS, CREDENTIALS_CF, USER_NAME, SALTED_HASH); - auto& qp = cql3::get_local_query_processor(); - return qp.process(query, consistency_for_user(username), { username, hashpw(password) }).discard_result(); - } catch (std::out_of_range&) { - throw exceptions::invalid_request_exception("PasswordAuthenticator requires PASSWORD option"); - } -} - -future<> auth::password_authenticator::alter(sstring username, - const option_map& options) { - try { - auto password = boost::any_cast(options.at(option::PASSWORD)); - auto query = sprint("UPDATE %s.%s SET %s = ? WHERE %s = ?", - auth::AUTH_KS, CREDENTIALS_CF, SALTED_HASH, USER_NAME); - auto& qp = cql3::get_local_query_processor(); - return qp.process(query, consistency_for_user(username), { hashpw(password), username }).discard_result(); - } catch (std::out_of_range&) { - throw exceptions::invalid_request_exception("PasswordAuthenticator requires PASSWORD option"); - } -} - -future<> auth::password_authenticator::drop(sstring username) { - try { - auto query = sprint("DELETE FROM %s.%s WHERE %s = ?", - auth::AUTH_KS, CREDENTIALS_CF, USER_NAME); - auto& qp = cql3::get_local_query_processor(); - return qp.process(query, consistency_for_user(username), { username }).discard_result(); - } catch (std::out_of_range&) { - throw exceptions::invalid_request_exception("PasswordAuthenticator requires PASSWORD option"); - } -} - -const auth::resource_ids& auth::password_authenticator::protected_resources() const { - static const resource_ids ids({ data_resource(auth::AUTH_KS, CREDENTIALS_CF) }); - return ids; -} - -::shared_ptr auth::password_authenticator::new_sasl_challenge() const { - class plain_text_password_challenge: public sasl_challenge { - public: - plain_text_password_challenge(const password_authenticator& a) - : _authenticator(a) - {} - - /** - * SASL PLAIN mechanism specifies that credentials are encoded in a - * sequence of UTF-8 bytes, delimited by 0 (US-ASCII NUL). - * The form is : {code}authzIdauthnIdpassword{code} - * authzId is optional, and in fact we don't care about it here as we'll - * set the authzId to match the authnId (that is, there is no concept of - * a user being authorized to act on behalf of another). - * - * @param bytes encoded credentials string sent by the client - * @return map containing the username/password pairs in the form an IAuthenticator - * would expect - * @throws javax.security.sasl.SaslException - */ - bytes evaluate_response(bytes_view client_response) override { - plogger.debug("Decoding credentials from client token"); - - sstring username, password; - - auto b = client_response.crbegin(); - auto e = client_response.crend(); - auto i = b; - - while (i != e) { - if (*i == 0) { - sstring tmp(i.base(), b.base()); - if (password.empty()) { - password = std::move(tmp); - } else if (username.empty()) { - username = std::move(tmp); - } - b = ++i; - continue; - } - ++i; - } - - if (username.empty()) { - throw exceptions::authentication_exception("Authentication ID must not be null"); - } - if (password.empty()) { - throw exceptions::authentication_exception("Password must not be null"); - } - - _credentials[USERNAME_KEY] = std::move(username); - _credentials[PASSWORD_KEY] = std::move(password); - _complete = true; - return {}; - } - bool is_complete() const override { - return _complete; - } - future<::shared_ptr> get_authenticated_user() const override { - return _authenticator.authenticate(_credentials); - } - private: - const password_authenticator& _authenticator; - credentials_map _credentials; - bool _complete = false; - }; - return ::make_shared(*this); -} diff --git a/scylla/auth/password_authenticator.hh b/scylla/auth/password_authenticator.hh deleted file mode 100644 index 08c1767..0000000 --- a/scylla/auth/password_authenticator.hh +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2016 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "authenticator.hh" - -namespace auth { - -class password_authenticator : public authenticator { -public: - static const sstring PASSWORD_AUTHENTICATOR_NAME; - - password_authenticator(); - ~password_authenticator(); - - future<> init(); - - const sstring& class_name() const override; - bool require_authentication() const override; - option_set supported_options() const override; - option_set alterable_options() const override; - future<::shared_ptr> authenticate(const credentials_map& credentials) const override; - future<> create(sstring username, const option_map& options) override; - future<> alter(sstring username, const option_map& options) override; - future<> drop(sstring username) override; - const resource_ids& protected_resources() const override; - ::shared_ptr new_sasl_challenge() const override; - - - static db::consistency_level consistency_for_user(const sstring& username); -}; - -} - diff --git a/scylla/auth/permission.cc b/scylla/auth/permission.cc deleted file mode 100644 index 559ef5b..0000000 --- a/scylla/auth/permission.cc +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2016 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include -#include -#include "permission.hh" - -const auth::permission_set auth::permissions::ALL_DATA = - auth::permission_set::of(); -const auth::permission_set auth::permissions::ALL = auth::permissions::ALL_DATA; -const auth::permission_set auth::permissions::NONE; -const auth::permission_set auth::permissions::ALTERATIONS = - auth::permission_set::of(); - -static const std::unordered_map permission_names({ - { "READ", auth::permission::READ }, - { "WRITE", auth::permission::WRITE }, - { "CREATE", auth::permission::CREATE }, - { "ALTER", auth::permission::ALTER }, - { "DROP", auth::permission::DROP }, - { "SELECT", auth::permission::SELECT }, - { "MODIFY", auth::permission::MODIFY }, - { "AUTHORIZE", auth::permission::AUTHORIZE }, -}); - -const sstring& auth::permissions::to_string(permission p) { - for (auto& v : permission_names) { - if (v.second == p) { - return v.first; - } - } - throw std::out_of_range("unknown permission"); -} - -auth::permission auth::permissions::from_string(const sstring& s) { - sstring upper(s); - boost::to_upper(upper); - return permission_names.at(upper); -} - -std::unordered_set auth::permissions::to_strings(const permission_set& set) { - std::unordered_set res; - for (auto& v : permission_names) { - if (set.contains(v.second)) { - res.emplace(v.first); - } - } - return res; -} - -auth::permission_set auth::permissions::from_strings(const std::unordered_set& set) { - permission_set res = auth::permissions::NONE; - for (auto& s : set) { - res.set(from_string(s)); - } - return res; -} - -bool auth::operator<(const permission_set& p1, const permission_set& p2) { - return p1.mask() < p2.mask(); -} diff --git a/scylla/auth/permission.hh b/scylla/auth/permission.hh deleted file mode 100644 index 6d3e7e5..0000000 --- a/scylla/auth/permission.hh +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2016 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include -#include - -#include "seastarx.hh" -#include "enum_set.hh" - -namespace auth { - -enum class permission { - //Deprecated - READ, - //Deprecated - WRITE, - - // schema management - CREATE, // required for CREATE KEYSPACE and CREATE TABLE. - ALTER, // required for ALTER KEYSPACE, ALTER TABLE, CREATE INDEX, DROP INDEX. - DROP, // required for DROP KEYSPACE and DROP TABLE. - - // data access - SELECT, // required for SELECT. - MODIFY, // required for INSERT, UPDATE, DELETE, TRUNCATE. - - // permission management - AUTHORIZE, // required for GRANT and REVOKE. -}; - -typedef enum_set> permission_set; - -bool operator<(const permission_set&, const permission_set&); - -namespace permissions { - -extern const permission_set ALL_DATA; -extern const permission_set ALL; -extern const permission_set NONE; -extern const permission_set ALTERATIONS; - -const sstring& to_string(permission); -permission from_string(const sstring&); - -std::unordered_set to_strings(const permission_set&); -permission_set from_strings(const std::unordered_set&); - - -} - -} diff --git a/scylla/cache_streamed_mutation.hh b/scylla/cache_streamed_mutation.hh deleted file mode 100644 index 0eb3db6..0000000 --- a/scylla/cache_streamed_mutation.hh +++ /dev/null @@ -1,508 +0,0 @@ -/* - * Copyright (C) 2017 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include -#include "row_cache.hh" -#include "mutation_reader.hh" -#include "streamed_mutation.hh" -#include "partition_version.hh" -#include "utils/logalloc.hh" -#include "query-request.hh" -#include "partition_snapshot_reader.hh" -#include "partition_snapshot_row_cursor.hh" -#include "read_context.hh" - -namespace cache { - -class lsa_manager { - row_cache& _cache; -public: - lsa_manager(row_cache& cache) : _cache(cache) { } - template - decltype(auto) run_in_read_section(const Func& func) { - return _cache._read_section(_cache._tracker.region(), [&func] () { - return with_linearized_managed_bytes([&func] () { - return func(); - }); - }); - } - template - decltype(auto) run_in_update_section(const Func& func) { - return _cache._update_section(_cache._tracker.region(), [&func] () { - return with_linearized_managed_bytes([&func] () { - return func(); - }); - }); - } - template - void run_in_update_section_with_allocator(Func&& func) { - return _cache._update_section(_cache._tracker.region(), [this, &func] () { - return with_linearized_managed_bytes([this, &func] () { - return with_allocator(_cache._tracker.region().allocator(), [this, &func] () mutable { - return func(); - }); - }); - }); - } - logalloc::region& region() { return _cache._tracker.region(); } - logalloc::allocating_section& read_section() { return _cache._read_section; } -}; - -class cache_streamed_mutation final : public streamed_mutation::impl { - lw_shared_ptr _snp; - position_in_partition::tri_compare _position_cmp; - - query::clustering_key_filter_ranges _ck_ranges; - query::clustering_row_ranges::const_iterator _ck_ranges_curr; - query::clustering_row_ranges::const_iterator _ck_ranges_end; - - lsa_manager _lsa_manager; - - stdx::optional _last_row_key; - - // We need to be prepared that we may get overlapping and out of order - // range tombstones. We must emit fragments with strictly monotonic positions, - // so we can't just trim such tombstones to the position of the last fragment. - // To solve that, range tombstones are accumulated first in a range_tombstone_stream - // and emitted once we have a fragment with a larger position. - range_tombstone_stream _tombstones; - - // Holds the lower bound of a position range which hasn't been processed yet. - // Only fragments with positions < _lower_bound have been emitted. - position_in_partition _lower_bound; - position_in_partition_view _upper_bound; - - bool _static_row_done = false; - bool _reading_underlying = false; - lw_shared_ptr _read_context; - partition_snapshot_row_cursor _next_row; - bool _next_row_in_range = false; - - future<> do_fill_buffer(); - future<> copy_from_cache_to_buffer(); - future<> process_static_row(); - void move_to_end(); - future<> move_to_next_range(); - future<> move_to_current_range(); - future<> move_to_next_entry(); - // Emits all delayed range tombstones with positions smaller than upper_bound. - void drain_tombstones(position_in_partition_view upper_bound); - // Emits all delayed range tombstones. - void drain_tombstones(); - void add_to_buffer(const partition_snapshot_row_cursor&); - void add_clustering_row_to_buffer(mutation_fragment&&); - void add_to_buffer(range_tombstone&&); - void add_to_buffer(mutation_fragment&&); - future<> read_from_underlying(); - future<> start_reading_from_underlying(); - bool after_current_range(position_in_partition_view position); - bool can_populate() const; - void maybe_update_continuity(); - void maybe_add_to_cache(const mutation_fragment& mf); - void maybe_add_to_cache(const clustering_row& cr); - void maybe_add_to_cache(const range_tombstone& rt); - void maybe_add_to_cache(const static_row& sr); - void maybe_set_static_row_continuous(); -public: - cache_streamed_mutation(schema_ptr s, - dht::decorated_key dk, - query::clustering_key_filter_ranges&& crr, - lw_shared_ptr ctx, - lw_shared_ptr snp, - row_cache& cache) - : streamed_mutation::impl(std::move(s), dk, snp->partition_tombstone()) - , _snp(std::move(snp)) - , _position_cmp(*_schema) - , _ck_ranges(std::move(crr)) - , _ck_ranges_curr(_ck_ranges.begin()) - , _ck_ranges_end(_ck_ranges.end()) - , _lsa_manager(cache) - , _tombstones(*_schema) - , _lower_bound(position_in_partition::before_all_clustered_rows()) - , _upper_bound(position_in_partition_view::before_all_clustered_rows()) - , _read_context(std::move(ctx)) - , _next_row(*_schema, cache._tracker.region(), *_snp) - { } - cache_streamed_mutation(const cache_streamed_mutation&) = delete; - cache_streamed_mutation(cache_streamed_mutation&&) = delete; - virtual future<> fill_buffer() override; - virtual ~cache_streamed_mutation() { - maybe_merge_versions(_snp, _lsa_manager.region(), _lsa_manager.read_section()); - } -}; - -inline -future<> cache_streamed_mutation::process_static_row() { - if (_snp->version()->partition().static_row_continuous()) { - _read_context->cache().on_row_hit(); - row sr = _snp->static_row(); - if (!sr.empty()) { - push_mutation_fragment(mutation_fragment(static_row(std::move(sr)))); - } - return make_ready_future<>(); - } else { - _read_context->cache().on_row_miss(); - return _read_context->get_next_fragment().then([this] (mutation_fragment_opt&& sr) { - if (sr) { - assert(sr->is_static_row()); - maybe_add_to_cache(sr->as_static_row()); - push_mutation_fragment(std::move(*sr)); - } - maybe_set_static_row_continuous(); - }); - } -} - -inline -future<> cache_streamed_mutation::fill_buffer() { - if (!_static_row_done) { - _static_row_done = true; - auto after_static_row = [this] { - if (_ck_ranges_curr == _ck_ranges_end) { - _end_of_stream = true; - return make_ready_future<>(); - } - return _lsa_manager.run_in_read_section([this] { - return move_to_current_range(); - }).then([this] { - return fill_buffer(); - }); - }; - if (_schema->has_static_columns()) { - return process_static_row().then(std::move(after_static_row)); - } else { - return after_static_row(); - } - } - return do_until([this] { return _end_of_stream || is_buffer_full(); }, [this] { - return do_fill_buffer(); - }); -} - -inline -future<> cache_streamed_mutation::do_fill_buffer() { - if (_reading_underlying) { - return read_from_underlying(); - } - return _lsa_manager.run_in_read_section([this] { - auto same_pos = _next_row.maybe_refresh(); - // FIXME: If continuity changed anywhere between _lower_bound and _next_row.position() - // we need to redo the lookup with _lower_bound. There is no eviction yet, so not yet a problem. - assert(same_pos); - while (!is_buffer_full() && !_end_of_stream && !_reading_underlying) { - future<> f = copy_from_cache_to_buffer(); - if (!f.available() || need_preempt()) { - return f; - } - } - return make_ready_future<>(); - }); -} - -inline -future<> cache_streamed_mutation::read_from_underlying() { - return consume_mutation_fragments_until(_read_context->get_streamed_mutation(), - [this] { return !_reading_underlying || is_buffer_full(); }, - [this] (mutation_fragment mf) { - _read_context->cache().on_row_miss(); - maybe_add_to_cache(mf); - add_to_buffer(std::move(mf)); - }, - [this] { - _reading_underlying = false; - return _lsa_manager.run_in_update_section([this] { - auto same_pos = _next_row.maybe_refresh(); - assert(same_pos); // FIXME: handle eviction - if (_next_row_in_range) { - maybe_update_continuity(); - add_to_buffer(_next_row); - return move_to_next_entry(); - } else { - if (no_clustering_row_between(*_schema, _upper_bound, _next_row.position())) { - this->maybe_update_continuity(); - } else { - // FIXME: Insert dummy entry at _upper_bound. - _read_context->cache().on_mispopulate(); - } - return move_to_next_range(); - } - }); - }); -} - -inline -void cache_streamed_mutation::maybe_update_continuity() { - if (can_populate() && _next_row.is_in_latest_version()) { - if (_last_row_key) { - if (_next_row.previous_row_in_latest_version_has_key(*_last_row_key)) { - _next_row.set_continuous(true); - } - } else if (!_ck_ranges_curr->start()) { - _next_row.set_continuous(true); - } - } else { - _read_context->cache().on_mispopulate(); - } -} - -inline -void cache_streamed_mutation::maybe_add_to_cache(const mutation_fragment& mf) { - if (mf.is_range_tombstone()) { - maybe_add_to_cache(mf.as_range_tombstone()); - } else { - assert(mf.is_clustering_row()); - const clustering_row& cr = mf.as_clustering_row(); - maybe_add_to_cache(cr); - } -} - -inline -void cache_streamed_mutation::maybe_add_to_cache(const clustering_row& cr) { - if (!can_populate()) { - _read_context->cache().on_mispopulate(); - return; - } - _lsa_manager.run_in_update_section_with_allocator([this, &cr] { - mutation_partition& mp = _snp->version()->partition(); - rows_entry::compare less(*_schema); - - // FIXME: If _next_row is up to date, but latest version doesn't have iterator in - // current row (could be far away, so we'd do this often), then this will do - // the lookup in mp. This is not necessary, because _next_row has iterators for - // next rows in each version, even if they're not part of the current row. - // They're currently buried in the heap, but you could keep a vector of - // iterators per each version in addition to the heap. - auto new_entry = alloc_strategy_unique_ptr( - current_allocator().construct(cr.key(), cr.tomb(), cr.marker(), cr.cells())); - new_entry->set_continuous(false); - auto it = _next_row.has_up_to_date_row_from_latest_version() - ? _next_row.get_iterator_in_latest_version() : mp.clustered_rows().lower_bound(cr.key(), less); - auto insert_result = mp.clustered_rows().insert_check(it, *new_entry, less); - if (insert_result.second) { - _read_context->cache().on_row_insert(); - new_entry.release(); - } - it = insert_result.first; - - rows_entry& e = *it; - if (_last_row_key) { - if (it == mp.clustered_rows().begin()) { - // FIXME: check whether entry for _last_row_key is in older versions and if so set - // continuity to true. - _read_context->cache().on_mispopulate(); - } else { - auto prev_it = it; - --prev_it; - clustering_key_prefix::equality eq(*_schema); - if (eq(*_last_row_key, prev_it->key())) { - e.set_continuous(true); - } - } - } else if (!_ck_ranges_curr->start()) { - e.set_continuous(true); - } else { - // FIXME: Insert dummy entry at _ck_ranges_curr->start() - _read_context->cache().on_mispopulate(); - } - }); -} - -inline -bool cache_streamed_mutation::after_current_range(position_in_partition_view p) { - return _position_cmp(p, _upper_bound) >= 0; -} - -inline -future<> cache_streamed_mutation::start_reading_from_underlying() { - _reading_underlying = true; - auto end = _next_row_in_range ? position_in_partition(_next_row.position()) - : position_in_partition(_upper_bound); - return _read_context->fast_forward_to(position_range{_lower_bound, std::move(end)}); -} - -inline -future<> cache_streamed_mutation::copy_from_cache_to_buffer() { - position_in_partition_view next_lower_bound = _next_row.dummy() ? _next_row.position() : position_in_partition_view::after_key(_next_row.key()); - for (auto&& rts : _snp->range_tombstones(*_schema, _lower_bound, _next_row_in_range ? next_lower_bound : _upper_bound)) { - add_to_buffer(std::move(rts)); - if (is_buffer_full()) { - return make_ready_future<>(); - } - } - if (_next_row_in_range) { - add_to_buffer(_next_row); - return move_to_next_entry(); - } else { - return move_to_next_range(); - } -} - -inline -void cache_streamed_mutation::move_to_end() { - drain_tombstones(); - _end_of_stream = true; -} - -inline -future<> cache_streamed_mutation::move_to_next_range() { - ++_ck_ranges_curr; - if (_ck_ranges_curr == _ck_ranges_end) { - move_to_end(); - return make_ready_future<>(); - } else { - return move_to_current_range(); - } -} - -inline -future<> cache_streamed_mutation::move_to_current_range() { - _last_row_key = std::experimental::nullopt; - _lower_bound = position_in_partition::for_range_start(*_ck_ranges_curr); - _upper_bound = position_in_partition_view::for_range_end(*_ck_ranges_curr); - auto complete_until_next = _next_row.advance_to(_lower_bound) || _next_row.continuous(); - _next_row_in_range = !after_current_range(_next_row.position()); - if (!complete_until_next) { - return start_reading_from_underlying(); - } - return make_ready_future<>(); -} - -// _next_row must be inside the range. -inline -future<> cache_streamed_mutation::move_to_next_entry() { - if (no_clustering_row_between(*_schema, _next_row.position(), _upper_bound)) { - return move_to_next_range(); - } else { - if (!_next_row.next()) { - move_to_end(); - return make_ready_future<>(); - } - _next_row_in_range = !after_current_range(_next_row.position()); - if (!_next_row.continuous()) { - return start_reading_from_underlying(); - } - return make_ready_future<>(); - } -} - -inline -void cache_streamed_mutation::drain_tombstones(position_in_partition_view pos) { - while (auto mfo = _tombstones.get_next(pos)) { - push_mutation_fragment(std::move(*mfo)); - } -} - -inline -void cache_streamed_mutation::drain_tombstones() { - while (auto mfo = _tombstones.get_next()) { - push_mutation_fragment(std::move(*mfo)); - } -} - -inline -void cache_streamed_mutation::add_to_buffer(mutation_fragment&& mf) { - if (mf.is_clustering_row()) { - add_clustering_row_to_buffer(std::move(mf)); - } else { - assert(mf.is_range_tombstone()); - add_to_buffer(std::move(mf).as_range_tombstone()); - } -} - -inline -void cache_streamed_mutation::add_to_buffer(const partition_snapshot_row_cursor& row) { - if (!row.dummy()) { - _read_context->cache().on_row_hit(); - add_clustering_row_to_buffer(row.row()); - } -} - -inline -void cache_streamed_mutation::add_clustering_row_to_buffer(mutation_fragment&& mf) { - auto& row = mf.as_clustering_row(); - drain_tombstones(row.position()); - _last_row_key = row.key(); - _lower_bound = position_in_partition::after_key(row.key()); - push_mutation_fragment(std::move(mf)); -} - -inline -void cache_streamed_mutation::add_to_buffer(range_tombstone&& rt) { - // This guarantees that rt starts after any emitted clustering_row - if (!rt.trim_front(*_schema, _lower_bound)) { - return; - } - _lower_bound = position_in_partition(rt.position()); - _tombstones.apply(std::move(rt)); - drain_tombstones(_lower_bound); -} - -inline -void cache_streamed_mutation::maybe_add_to_cache(const range_tombstone& rt) { - if (can_populate()) { - _lsa_manager.run_in_update_section_with_allocator([&] { - _snp->version()->partition().apply_row_tombstone(*_schema, rt); - }); - } else { - _read_context->cache().on_mispopulate(); - } -} - -inline -void cache_streamed_mutation::maybe_add_to_cache(const static_row& sr) { - if (can_populate()) { - _read_context->cache().on_row_insert(); - _lsa_manager.run_in_update_section_with_allocator([&] { - _snp->version()->partition().static_row().apply(*_schema, column_kind::static_column, sr.cells()); - }); - } else { - _read_context->cache().on_mispopulate(); - } -} - -inline -void cache_streamed_mutation::maybe_set_static_row_continuous() { - if (can_populate()) { - _snp->version()->partition().set_static_row_continuous(true); - } else { - _read_context->cache().on_mispopulate(); - } -} - -inline -bool cache_streamed_mutation::can_populate() const { - return _snp->at_latest_version() && _read_context->cache().phase_of(_read_context->key()) == _read_context->phase(); -} - -} // namespace cache - -inline streamed_mutation make_cache_streamed_mutation(schema_ptr s, - dht::decorated_key dk, - query::clustering_key_filter_ranges crr, - row_cache& cache, - lw_shared_ptr ctx, - lw_shared_ptr snp) -{ - return make_streamed_mutation( - std::move(s), std::move(dk), std::move(crr), std::move(ctx), std::move(snp), cache); -} diff --git a/scylla/caching_options.hh b/scylla/caching_options.hh deleted file mode 100644 index c7bc370..0000000 --- a/scylla/caching_options.hh +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once -#include -#include -#include "exceptions/exceptions.hh" -#include "json.hh" -#include "seastarx.hh" - -class schema; - -class caching_options { - // For Origin, the default value for the row is "NONE". However, since our - // row_cache will cache both keys and rows, we will default to ALL. - // - // FIXME: We don't yet make any changes to our caching policies based on - // this (and maybe we shouldn't) - static constexpr auto default_key = "ALL"; - static constexpr auto default_row = "ALL"; - - sstring _key_cache; - sstring _row_cache; - caching_options(sstring k, sstring r) : _key_cache(k), _row_cache(r) { - if ((k != "ALL") && (k != "NONE")) { - throw exceptions::configuration_exception("Invalid key value: " + k); - } - - if ((r == "ALL") || (r == "NONE")) { - return; - } else { - try { - boost::lexical_cast(r); - } catch (boost::bad_lexical_cast& e) { - throw exceptions::configuration_exception("Invalid key value: " + r); - } - } - } - - friend class schema; - caching_options() : _key_cache(default_key), _row_cache(default_row) {} -public: - - std::map to_map() const { - return {{ "keys", _key_cache }, { "rows_per_partition", _row_cache }}; - } - - sstring to_sstring() const { - return json::to_json(to_map()); - } - - template - static caching_options from_map(const Map & map) { - sstring k = default_key; - sstring r = default_row; - - for (auto& p : map) { - if (p.first == "keys") { - k = p.second; - } else if (p.first == "rows_per_partition") { - r = p.second; - } else { - throw exceptions::configuration_exception("Invalid caching option: " + p.first); - } - } - return caching_options(k, r); - } - static caching_options from_sstring(const sstring& str) { - return from_map(json::to_map(str)); - } - - bool operator==(const caching_options& other) const { - return _key_cache == other._key_cache && _row_cache == other._row_cache; - } - bool operator!=(const caching_options& other) const { - return !(*this == other); - } -}; - - - diff --git a/scylla/canonical_mutation.cc b/scylla/canonical_mutation.cc deleted file mode 100644 index f4de2fc..0000000 --- a/scylla/canonical_mutation.cc +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "canonical_mutation.hh" -#include "mutation.hh" -#include "mutation_partition_serializer.hh" -#include "counters.hh" -#include "converting_mutation_partition_applier.hh" -#include "hashing_partition_visitor.hh" -#include "utils/UUID.hh" -#include "serializer.hh" -#include "idl/uuid.dist.hh" -#include "idl/keys.dist.hh" -#include "idl/mutation.dist.hh" -#include "serializer_impl.hh" -#include "serialization_visitors.hh" -#include "idl/uuid.dist.impl.hh" -#include "idl/keys.dist.impl.hh" -#include "idl/mutation.dist.impl.hh" - -canonical_mutation::canonical_mutation(bytes data) - : _data(std::move(data)) -{ } - -canonical_mutation::canonical_mutation(const mutation& m) -{ - mutation_partition_serializer part_ser(*m.schema(), m.partition()); - - bytes_ostream out; - ser::writer_of_canonical_mutation wr(out); - std::move(wr).write_table_id(m.schema()->id()) - .write_schema_version(m.schema()->version()) - .write_key(m.key()) - .write_mapping(m.schema()->get_column_mapping()) - .partition([&] (auto wr) { - part_ser.write(std::move(wr)); - }).end_canonical_mutation(); - _data = to_bytes(out.linearize()); -} - -utils::UUID canonical_mutation::column_family_id() const { - auto in = ser::as_input_stream(_data); - auto mv = ser::deserialize(in, boost::type()); - return mv.table_id(); -} - -mutation canonical_mutation::to_mutation(schema_ptr s) const { - auto in = ser::as_input_stream(_data); - auto mv = ser::deserialize(in, boost::type()); - - auto cf_id = mv.table_id(); - if (s->id() != cf_id) { - throw std::runtime_error(sprint("Attempted to deserialize canonical_mutation of table %s with schema of table %s (%s.%s)", - cf_id, s->id(), s->ks_name(), s->cf_name())); - } - - auto version = mv.schema_version(); - auto pk = mv.key(); - - mutation m(std::move(pk), std::move(s)); - - if (version == m.schema()->version()) { - auto partition_view = mutation_partition_view::from_view(mv.partition()); - m.partition().apply(*m.schema(), partition_view, *m.schema()); - } else { - column_mapping cm = mv.mapping(); - converting_mutation_partition_applier v(cm, *m.schema(), m.partition()); - auto partition_view = mutation_partition_view::from_view(mv.partition()); - partition_view.accept(cm, v); - } - return m; -} diff --git a/scylla/canonical_mutation.hh b/scylla/canonical_mutation.hh deleted file mode 100644 index 00e48c7..0000000 --- a/scylla/canonical_mutation.hh +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "bytes.hh" -#include "schema.hh" -#include "database_fwd.hh" -#include "mutation_partition_visitor.hh" -#include "mutation_partition_serializer.hh" - -// Immutable mutation form which can be read using any schema version of the same table. -// Safe to access from other shards via const&. -// Safe to pass serialized across nodes. -class canonical_mutation { - bytes _data; -public: - explicit canonical_mutation(bytes); - explicit canonical_mutation(const mutation&); - - canonical_mutation(canonical_mutation&&) = default; - canonical_mutation(const canonical_mutation&) = default; - canonical_mutation& operator=(const canonical_mutation&) = default; - canonical_mutation& operator=(canonical_mutation&&) = default; - - // Create a mutation object interpreting this canonical mutation using - // given schema. - // - // Data which is not representable in the target schema is dropped. If this - // is not intended, user should sync the schema first. - mutation to_mutation(schema_ptr) const; - - utils::UUID column_family_id() const; - - const bytes& representation() const { return _data; } - -}; diff --git a/scylla/cartesian_product.hh b/scylla/cartesian_product.hh deleted file mode 100644 index b0e4357..0000000 --- a/scylla/cartesian_product.hh +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - * - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -// Single-pass range over cartesian product of vectors. - -// Note: -// {a, b, c} x {1, 2} = {{a, 1}, {a, 2}, {b, 1}, {b, 2}, {c, 1}, {c, 2}} -template -struct cartesian_product { - const std::vector>& _vec_of_vecs; -public: - class iterator : public std::iterator> { - public: - using value_type = std::vector; - private: - size_t _pos; - const std::vector>* _vec_of_vecs; - value_type _current; - std::vector::const_iterator> _iterators; - public: - struct end_tag {}; - iterator(end_tag) : _pos(-1) {} - iterator(const std::vector>& vec_of_vecs) : _pos(0), _vec_of_vecs(&vec_of_vecs) { - _iterators.reserve(vec_of_vecs.size()); - for (auto&& vec : vec_of_vecs) { - _iterators.push_back(vec.begin()); - if (vec.empty()) { - _pos = -1; - break; - } - } - } - value_type& operator*() { - _current.clear(); - _current.reserve(_vec_of_vecs->size()); - for (auto& i : _iterators) { - _current.emplace_back(*i); - } - return _current; - } - void operator++() { - ++_pos; - - for (ssize_t i = _iterators.size() - 1; i >= 0; --i) { - ++_iterators[i]; - if (_iterators[i] != (*_vec_of_vecs)[i].end()) { - return; - } - _iterators[i] = (*_vec_of_vecs)[i].begin(); - } - - // If we're here it means we've covered every combination - _pos = -1; - } - bool operator==(const iterator& o) const { return _pos == o._pos; } - bool operator!=(const iterator& o) const { return _pos != o._pos; } - }; -public: - cartesian_product(const std::vector>& vec_of_vecs) : _vec_of_vecs(vec_of_vecs) {} - iterator begin() { return iterator(_vec_of_vecs); } - iterator end() { return iterator(typename iterator::end_tag()); } -}; - -template -static inline -size_t cartesian_product_size(const std::vector>& vec_of_vecs) { - size_t r = 1; - for (auto&& vec : vec_of_vecs) { - r *= vec.size(); - } - return r; -} - -template -static inline -bool cartesian_product_is_empty(const std::vector>& vec_of_vecs) { - for (auto&& vec : vec_of_vecs) { - if (vec.empty()) { - return true; - } - } - return vec_of_vecs.empty(); -} - -template -static inline -cartesian_product make_cartesian_product(const std::vector>& vec_of_vecs) { - return cartesian_product(vec_of_vecs); -} diff --git a/scylla/cell_locking.hh b/scylla/cell_locking.hh deleted file mode 100644 index 8ac2313..0000000 --- a/scylla/cell_locking.hh +++ /dev/null @@ -1,566 +0,0 @@ -/* - * Copyright (C) 2017 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include - -#if __has_include() - -#include - -template -using small_vector = boost::container::small_vector; - -#else - -#include -template -using small_vector = std::vector; - -#endif - -#include "fnv1a_hasher.hh" -#include "streamed_mutation.hh" -#include "mutation_partition.hh" - -class cells_range { - using ids_vector_type = small_vector; - - position_in_partition_view _position; - ids_vector_type _ids; -public: - using iterator = ids_vector_type::iterator; - using const_iterator = ids_vector_type::const_iterator; - - cells_range() - : _position(position_in_partition_view(position_in_partition_view::static_row_tag_t())) { } - - explicit cells_range(position_in_partition_view pos, const row& cells) - : _position(pos) - { - _ids.reserve(cells.size()); - cells.for_each_cell([this] (auto id, auto&&) { - _ids.emplace_back(id); - }); - } - - position_in_partition_view position() const { return _position; } - bool empty() const { return _ids.empty(); } - - auto begin() const { return _ids.begin(); } - auto end() const { return _ids.end(); } -}; - -class partition_cells_range { - const mutation_partition& _mp; -public: - class iterator { - const mutation_partition& _mp; - stdx::optional _position; - cells_range _current; - public: - explicit iterator(const mutation_partition& mp) - : _mp(mp) - , _current(position_in_partition_view(position_in_partition_view::static_row_tag_t()), mp.static_row()) - { } - - iterator(const mutation_partition& mp, mutation_partition::rows_type::const_iterator it) - : _mp(mp) - , _position(it) - { } - - iterator& operator++() { - if (!_position) { - _position = _mp.clustered_rows().begin(); - } else { - ++(*_position); - } - if (_position != _mp.clustered_rows().end()) { - auto it = *_position; - _current = cells_range(position_in_partition_view(position_in_partition_view::clustering_row_tag_t(), it->key()), - it->row().cells()); - } - return *this; - } - - iterator operator++(int) { - iterator it(*this); - operator++(); - return it; - } - - cells_range& operator*() { - return _current; - } - - cells_range* operator->() { - return &_current; - } - - bool operator==(const iterator& other) const { - return _position == other._position; - } - bool operator!=(const iterator& other) const { - return !(*this == other); - } - }; -public: - explicit partition_cells_range(const mutation_partition& mp) : _mp(mp) { } - - iterator begin() const { - return iterator(_mp); - } - iterator end() const { - return iterator(_mp, _mp.clustered_rows().end()); - } -}; - -class locked_cell; - -struct cell_locker_stats { - uint64_t lock_acquisitions = 0; - uint64_t operations_waiting_for_lock = 0; -}; - -class cell_locker { -public: - using timeout_clock = lowres_clock; -private: - using semaphore_type = basic_semaphore; - - class partition_entry; - - struct cell_address { - position_in_partition position; - column_id id; - }; - - class cell_entry : public bi::unordered_set_base_hook>, - public enable_lw_shared_from_this { - partition_entry& _parent; - cell_address _address; - semaphore_type _semaphore { 0 }; - - friend class cell_locker; - public: - cell_entry(partition_entry& parent, position_in_partition position, column_id id) - : _parent(parent) - , _address { std::move(position), id } - { } - - // Upgrades cell_entry to another schema. - // Changes the value of cell_address, so cell_entry has to be - // temporarily removed from its parent partition_entry. - // Returns true if the cell_entry still exist in the new schema and - // should be reinserted. - bool upgrade(const schema& from, const schema& to, column_kind kind) noexcept { - auto& old_column_mapping = from.get_column_mapping(); - auto& column = old_column_mapping.column_at(kind, _address.id); - auto cdef = to.get_column_definition(column.name()); - if (!cdef) { - return false; - } - _address.id = cdef->id; - return true; - } - - const position_in_partition& position() const { - return _address.position; - } - - future<> lock(timeout_clock::time_point _timeout) { - return _semaphore.wait(_timeout); - } - void unlock() { - _semaphore.signal(); - } - - ~cell_entry() { - if (!is_linked()) { - return; - } - unlink(); - if (!--_parent._cell_count) { - delete &_parent; - } - } - - class hasher { - const schema* _schema; // pointer instead of reference for default assignment - public: - explicit hasher(const schema& s) : _schema(&s) { } - - size_t operator()(const cell_address& ca) const { - fnv1a_hasher hasher; - ca.position.feed_hash(hasher, *_schema); - ::feed_hash(hasher, ca.id); - return hasher.finalize(); - } - size_t operator()(const cell_entry& ce) const { - return operator()(ce._address); - } - }; - - class equal_compare { - position_in_partition::equal_compare _cmp; - private: - bool do_compare(const cell_address& a, const cell_address& b) const { - return a.id == b.id && _cmp(a.position, b.position); - } - public: - explicit equal_compare(const schema& s) : _cmp(s) { } - bool operator()(const cell_address& ca, const cell_entry& ce) const { - return do_compare(ca, ce._address); - } - bool operator()(const cell_entry& ce, const cell_address& ca) const { - return do_compare(ca, ce._address); - } - bool operator()(const cell_entry& a, const cell_entry& b) const { - return do_compare(a._address, b._address); - } - }; - }; - - class partition_entry : public bi::unordered_set_base_hook> { - using cells_type = bi::unordered_set, - bi::hash, - bi::constant_time_size>; - - static constexpr size_t initial_bucket_count = 16; - using max_load_factor = std::ratio<3, 4>; - dht::decorated_key _key; - cell_locker& _parent; - size_t _rehash_at_size = compute_rehash_at_size(initial_bucket_count); - std::unique_ptr _buckets; // TODO: start with internal storage? - size_t _cell_count = 0; // cells_type::empty() is not O(1) if the hook is auto-unlink - cells_type::bucket_type _internal_buckets[initial_bucket_count]; - cells_type _cells; - schema_ptr _schema; - - friend class cell_entry; - private: - static constexpr size_t compute_rehash_at_size(size_t bucket_count) { - return bucket_count * max_load_factor::num / max_load_factor::den; - } - void maybe_rehash() { - if (_cell_count >= _rehash_at_size) { - auto new_bucket_count = std::min(_cells.bucket_count() * 2, _cells.bucket_count() + 1024); - auto buckets = std::make_unique(new_bucket_count); - - _cells.rehash(cells_type::bucket_traits(buckets.get(), new_bucket_count)); - _buckets = std::move(buckets); - - _rehash_at_size = compute_rehash_at_size(new_bucket_count); - } - } - public: - partition_entry(schema_ptr s, cell_locker& parent, const dht::decorated_key& dk) - : _key(dk) - , _parent(parent) - , _cells(cells_type::bucket_traits(_internal_buckets, initial_bucket_count), - cell_entry::hasher(*s), cell_entry::equal_compare(*s)) - , _schema(s) - { } - - ~partition_entry() { - if (is_linked()) { - _parent._partition_count--; - } - } - - // Upgrades partition entry to new schema. Returns false if all - // cell_entries has been removed during the upgrade. - bool upgrade(schema_ptr new_schema); - - void insert(lw_shared_ptr cell) { - _cells.insert(*cell); - _cell_count++; - maybe_rehash(); - } - - cells_type& cells() { - return _cells; - } - - struct hasher { - size_t operator()(const dht::decorated_key& dk) const { - return std::hash()(dk); - } - size_t operator()(const partition_entry& pe) const { - return operator()(pe._key); - } - }; - - class equal_compare { - dht::decorated_key_equals_comparator _cmp; - public: - explicit equal_compare(const schema& s) : _cmp(s) { } - bool operator()(const dht::decorated_key& dk, const partition_entry& pe) { - return _cmp(dk, pe._key); - } - bool operator()(const partition_entry& pe, const dht::decorated_key& dk) { - return _cmp(dk, pe._key); - } - bool operator()(const partition_entry& a, const partition_entry& b) { - return _cmp(a._key, b._key); - } - }; - }; - - using partitions_type = bi::unordered_set, - bi::hash, - bi::constant_time_size>; - - static constexpr size_t initial_bucket_count = 4 * 1024; - using max_load_factor = std::ratio<3, 4>; - - std::unique_ptr _buckets; - partitions_type _partitions; - size_t _partition_count = 0; - size_t _rehash_at_size = compute_rehash_at_size(initial_bucket_count); - schema_ptr _schema; - - // partitions_type uses equality comparator which keeps a reference to the - // original schema, we must ensure that it doesn't die. - schema_ptr _original_schema; - cell_locker_stats& _stats; - - friend class locked_cell; -private: - struct locker; - - static constexpr size_t compute_rehash_at_size(size_t bucket_count) { - return bucket_count * max_load_factor::num / max_load_factor::den; - } - void maybe_rehash() { - if (_partition_count >= _rehash_at_size) { - auto new_bucket_count = std::min(_partitions.bucket_count() * 2, _partitions.bucket_count() + 64 * 1024); - auto buckets = std::make_unique(new_bucket_count); - - _partitions.rehash(partitions_type::bucket_traits(buckets.get(), new_bucket_count)); - _buckets = std::move(buckets); - - _rehash_at_size = compute_rehash_at_size(new_bucket_count); - } - } -public: - explicit cell_locker(schema_ptr s, cell_locker_stats& stats) - : _buckets(std::make_unique(initial_bucket_count)) - , _partitions(partitions_type::bucket_traits(_buckets.get(), initial_bucket_count), - partition_entry::hasher(), partition_entry::equal_compare(*s)) - , _schema(s) - , _original_schema(std::move(s)) - , _stats(stats) - { } - - ~cell_locker() { - assert(_partitions.empty()); - } - - void set_schema(schema_ptr s) { - _schema = s; - } - schema_ptr schema() const { - return _schema; - } - - // partition_cells_range is required to be in cell_locker::schema() - future> lock_cells(const dht::decorated_key& dk, partition_cells_range&& range, - timeout_clock::time_point timeout); -}; - - -class locked_cell { - lw_shared_ptr _entry; -public: - explicit locked_cell(lw_shared_ptr entry) - : _entry(std::move(entry)) { } - - locked_cell(const locked_cell&) = delete; - locked_cell(locked_cell&&) = default; - - ~locked_cell() { - if (_entry) { - _entry->unlock(); - } - } -}; - -struct cell_locker::locker { - cell_entry::hasher _hasher; - cell_entry::equal_compare _eq_cmp; - partition_entry& _partition_entry; - - partition_cells_range _range; - partition_cells_range::iterator _current_ck; - cells_range::const_iterator _current_cell; - - timeout_clock::time_point _timeout; - std::vector _locks; - cell_locker_stats& _stats; -private: - void update_ck() { - if (!is_done()) { - _current_cell = _current_ck->begin(); - } - } - - future<> lock_next(); - - bool is_done() const { return _current_ck == _range.end(); } -public: - explicit locker(const ::schema& s, cell_locker_stats& st, partition_entry& pe, partition_cells_range&& range, timeout_clock::time_point timeout) - : _hasher(s) - , _eq_cmp(s) - , _partition_entry(pe) - , _range(std::move(range)) - , _current_ck(_range.begin()) - , _timeout(timeout) - , _stats(st) - { - update_ck(); - } - - locker(const locker&) = delete; - locker(locker&&) = delete; - - future<> lock_all() { - // Cannot defer before first call to lock_next(). - return lock_next().then([this] { - return do_until([this] { return is_done(); }, [this] { - return lock_next(); - }); - }); - } - - std::vector get() && { return std::move(_locks); } -}; - -inline -future> cell_locker::lock_cells(const dht::decorated_key& dk, partition_cells_range&& range, timeout_clock::time_point timeout) { - partition_entry::hasher pe_hash; - partition_entry::equal_compare pe_eq(*_schema); - - auto it = _partitions.find(dk, pe_hash, pe_eq); - std::unique_ptr partition; - if (it == _partitions.end()) { - partition = std::make_unique(_schema, *this, dk); - } else if (!it->upgrade(_schema)) { - partition = std::unique_ptr(&*it); - _partition_count--; - _partitions.erase(it); - } - - if (partition) { - std::vector locks; - for (auto&& r : range) { - if (r.empty()) { - continue; - } - for (auto&& c : r) { - auto cell = make_lw_shared(*partition, position_in_partition(r.position()), c); - _stats.lock_acquisitions++; - partition->insert(cell); - locks.emplace_back(std::move(cell)); - } - } - - if (!locks.empty()) { - _partitions.insert(*partition.release()); - _partition_count++; - maybe_rehash(); - } - return make_ready_future>(std::move(locks)); - } - - auto l = std::make_unique(*_schema, _stats, *it, std::move(range), timeout); - auto f = l->lock_all(); - return f.then([l = std::move(l)] { - return std::move(*l).get(); - }); -} - -inline -future<> cell_locker::locker::lock_next() { - while (!is_done()) { - if (_current_cell == _current_ck->end()) { - ++_current_ck; - update_ck(); - continue; - } - - auto cid = *_current_cell++; - - cell_address ca { position_in_partition(_current_ck->position()), cid }; - auto it = _partition_entry.cells().find(ca, _hasher, _eq_cmp); - if (it != _partition_entry.cells().end()) { - _stats.operations_waiting_for_lock++; - return it->lock(_timeout).then([this, ce = it->shared_from_this()] () mutable { - _stats.operations_waiting_for_lock--; - _stats.lock_acquisitions++; - _locks.emplace_back(std::move(ce)); - }); - } - - auto cell = make_lw_shared(_partition_entry, position_in_partition(_current_ck->position()), cid); - _stats.lock_acquisitions++; - _partition_entry.insert(cell); - _locks.emplace_back(std::move(cell)); - } - return make_ready_future<>(); -} - -inline -bool cell_locker::partition_entry::upgrade(schema_ptr new_schema) { - if (_schema == new_schema) { - return true; - } - - auto buckets = std::make_unique(_cells.bucket_count()); - auto cells = cells_type(cells_type::bucket_traits(buckets.get(), _cells.bucket_count()), - cell_entry::hasher(*new_schema), cell_entry::equal_compare(*new_schema)); - - _cells.clear_and_dispose([&] (cell_entry* cell_ptr) noexcept { - auto& cell = *cell_ptr; - auto kind = cell.position().is_static_row() ? column_kind::static_column - : column_kind::regular_column; - auto reinsert = cell.upgrade(*_schema, *new_schema, kind); - if (reinsert) { - cells.insert(cell); - } else { - _cell_count--; - } - }); - - // bi::unordered_set move assignment is actually a swap. - // Original _buckets cannot be destroyed before the container using them is - // so we need to explicitly make sure that the original _cells is no more. - _cells = std::move(cells); - auto destroy = [] (auto) { }; - destroy(std::move(cells)); - - _buckets = std::move(buckets); - _schema = new_schema; - return _cell_count; -} diff --git a/scylla/checked-file-impl.hh b/scylla/checked-file-impl.hh deleted file mode 100644 index c4607bc..0000000 --- a/scylla/checked-file-impl.hh +++ /dev/null @@ -1,162 +0,0 @@ -/* - * Copyright (C) 2016 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "seastar/core/file.hh" -#include "disk-error-handler.hh" - -class checked_file_impl : public file_impl { -public: - - checked_file_impl(const io_error_handler& error_handler, file f) - : _error_handler(error_handler), _file(f) { - _memory_dma_alignment = f.memory_dma_alignment(); - _disk_read_dma_alignment = f.disk_read_dma_alignment(); - _disk_write_dma_alignment = f.disk_write_dma_alignment(); - } - - virtual future write_dma(uint64_t pos, const void* buffer, size_t len, const io_priority_class& pc) override { - return do_io_check(_error_handler, [&] { - return get_file_impl(_file)->write_dma(pos, buffer, len, pc); - }); - } - - virtual future write_dma(uint64_t pos, std::vector iov, const io_priority_class& pc) override { - return do_io_check(_error_handler, [&] { - return get_file_impl(_file)->write_dma(pos, iov, pc); - }); - } - - virtual future read_dma(uint64_t pos, void* buffer, size_t len, const io_priority_class& pc) override { - return do_io_check(_error_handler, [&] { - return get_file_impl(_file)->read_dma(pos, buffer, len, pc); - }); - } - - virtual future read_dma(uint64_t pos, std::vector iov, const io_priority_class& pc) override { - return do_io_check(_error_handler, [&] { - return get_file_impl(_file)->read_dma(pos, iov, pc); - }); - } - - virtual future<> flush(void) override { - return do_io_check(_error_handler, [&] { - return get_file_impl(_file)->flush(); - }); - } - - virtual future stat(void) override { - return do_io_check(_error_handler, [&] { - return get_file_impl(_file)->stat(); - }); - } - - virtual future<> truncate(uint64_t length) override { - return do_io_check(_error_handler, [&] { - return get_file_impl(_file)->truncate(length); - }); - } - - virtual future<> discard(uint64_t offset, uint64_t length) override { - return do_io_check(_error_handler, [&] { - return get_file_impl(_file)->discard(offset, length); - }); - } - - virtual future<> allocate(uint64_t position, uint64_t length) override { - return do_io_check(_error_handler, [&] { - return get_file_impl(_file)->allocate(position, length); - }); - } - - virtual future size(void) override { - return do_io_check(_error_handler, [&] { - return get_file_impl(_file)->size(); - }); - } - - virtual future<> close() override { - return do_io_check(_error_handler, [&] { - return get_file_impl(_file)->close(); - }); - } - - // returns a handle for plain file, so make_checked_file() should be called - // on file returned by handle. - virtual std::unique_ptr dup() override { - return get_file_impl(_file)->dup(); - } - - virtual subscription list_directory(std::function (directory_entry de)> next) override { - return do_io_check(_error_handler, [&] { - return get_file_impl(_file)->list_directory(next); - }); - } - - virtual future> dma_read_bulk(uint64_t offset, size_t range_size, const io_priority_class& pc) override { - return do_io_check(_error_handler, [&] { - return get_file_impl(_file)->dma_read_bulk(offset, range_size, pc); - }); - } -private: - const io_error_handler& _error_handler; - file _file; -}; - -inline file make_checked_file(const io_error_handler& error_handler, file f) -{ - return file(::make_shared(error_handler, f)); -} - -future -inline open_checked_file_dma(const io_error_handler& error_handler, - sstring name, open_flags flags, - file_open_options options) -{ - return do_io_check(error_handler, [&] { - return open_file_dma(name, flags, options).then([&] (file f) { - return make_ready_future(make_checked_file(error_handler, f)); - }); - }); -} - -future -inline open_checked_file_dma(const io_error_handler& error_handler, - sstring name, open_flags flags) -{ - return do_io_check(error_handler, [&] { - return open_file_dma(name, flags).then([&] (file f) { - return make_ready_future(make_checked_file(error_handler, f)); - }); - }); -} - -future -inline open_checked_directory(const io_error_handler& error_handler, - sstring name) -{ - return do_io_check(error_handler, [&] { - return engine().open_directory(name).then([&] (file f) { - return make_ready_future(make_checked_file(error_handler, f)); - }); - }); -} diff --git a/scylla/clocks-impl.cc b/scylla/clocks-impl.cc deleted file mode 100644 index 5aead53..0000000 --- a/scylla/clocks-impl.cc +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "clocks-impl.hh" - -std::atomic clocks_offset; diff --git a/scylla/clocks-impl.hh b/scylla/clocks-impl.hh deleted file mode 100644 index 192b118..0000000 --- a/scylla/clocks-impl.hh +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (C) 2017 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include -#include -#include -#include - -extern std::atomic clocks_offset; - -template -static inline void forward_jump_clocks(Duration delta) -{ - auto d = std::chrono::duration_cast(delta).count(); - clocks_offset.fetch_add(d, std::memory_order_relaxed); -} - -static inline std::chrono::seconds get_clocks_offset() -{ - auto off = clocks_offset.load(std::memory_order_relaxed); - return std::chrono::seconds(off); -} - -// Returns a time point which is earlier from t by d, or minimum time point if it cannot be represented. -template -inline -auto saturating_subtract(std::chrono::time_point t, std::chrono::duration d) -> decltype(t) { - return std::max(t, decltype(t)::min() + d) - d; -} diff --git a/scylla/clustering_bounds_comparator.hh b/scylla/clustering_bounds_comparator.hh deleted file mode 100644 index 61445e9..0000000 --- a/scylla/clustering_bounds_comparator.hh +++ /dev/null @@ -1,167 +0,0 @@ - -/* - * Copyright (C) 2016 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "keys.hh" -#include "schema.hh" -#include "range.hh" - -/** - * Represents the kind of bound in a range tombstone. - */ -enum class bound_kind : uint8_t { - excl_end = 0, - incl_start = 1, - // values 2 to 5 are reserved for forward Origin compatibility - incl_end = 6, - excl_start = 7, -}; - -std::ostream& operator<<(std::ostream& out, const bound_kind k); - -bound_kind invert_kind(bound_kind k); -int32_t weight(bound_kind k); - -static inline bound_kind flip_bound_kind(bound_kind bk) -{ - switch (bk) { - case bound_kind::excl_end: return bound_kind::excl_start; - case bound_kind::incl_end: return bound_kind::incl_start; - case bound_kind::excl_start: return bound_kind::excl_end; - case bound_kind::incl_start: return bound_kind::incl_end; - } - abort(); -} - -class bound_view { -public: - const static thread_local clustering_key empty_prefix; - const clustering_key_prefix& prefix; - bound_kind kind; - bound_view(const clustering_key_prefix& prefix, bound_kind kind) - : prefix(prefix) - , kind(kind) - { } - bound_view(const bound_view& other) noexcept = default; - bound_view& operator=(const bound_view& other) noexcept { - if (this != &other) { - this->~bound_view(); - new (this) bound_view(other); - } - return *this; - } - struct tri_compare { - // To make it assignable and to avoid taking a schema_ptr, we - // wrap the schema reference. - std::reference_wrapper _s; - tri_compare(const schema& s) : _s(s) - { } - int operator()(const clustering_key_prefix& p1, int32_t w1, const clustering_key_prefix& p2, int32_t w2) const { - auto type = _s.get().clustering_key_prefix_type(); - auto res = prefix_equality_tri_compare(type->types().begin(), - type->begin(p1), type->end(p1), - type->begin(p2), type->end(p2), - ::tri_compare); - if (res) { - return res; - } - auto d1 = p1.size(_s); - auto d2 = p2.size(_s); - if (d1 == d2) { - return w1 - w2; - } - return d1 < d2 ? w1 - (w1 <= 0) : -(w2 - (w2 <= 0)); - } - int operator()(const bound_view b, const clustering_key_prefix& p) const { - return operator()(b.prefix, weight(b.kind), p, 0); - } - int operator()(const clustering_key_prefix& p, const bound_view b) const { - return operator()(p, 0, b.prefix, weight(b.kind)); - } - int operator()(const bound_view b1, const bound_view b2) const { - return operator()(b1.prefix, weight(b1.kind), b2.prefix, weight(b2.kind)); - } - }; - struct compare { - // To make it assignable and to avoid taking a schema_ptr, we - // wrap the schema reference. - tri_compare _cmp; - compare(const schema& s) : _cmp(s) - { } - bool operator()(const clustering_key_prefix& p1, int32_t w1, const clustering_key_prefix& p2, int32_t w2) const { - return _cmp(p1, w1, p2, w2) < 0; - } - bool operator()(const bound_view b, const clustering_key_prefix& p) const { - return operator()(b.prefix, weight(b.kind), p, 0); - } - bool operator()(const clustering_key_prefix& p, const bound_view b) const { - return operator()(p, 0, b.prefix, weight(b.kind)); - } - bool operator()(const bound_view b1, const bound_view b2) const { - return operator()(b1.prefix, weight(b1.kind), b2.prefix, weight(b2.kind)); - } - }; - bool equal(const schema& s, const bound_view other) const { - return kind == other.kind && prefix.equal(s, other.prefix); - } - bool adjacent(const schema& s, const bound_view other) const { - return invert_kind(other.kind) == kind && prefix.equal(s, other.prefix); - } - static bound_view bottom() { - return {empty_prefix, bound_kind::incl_start}; - } - static bound_view top() { - return {empty_prefix, bound_kind::incl_end}; - } - template typename R> - GCC6_CONCEPT( requires Range ) - static bound_view from_range_start(const R& range) { - return range.start() - ? bound_view(range.start()->value(), range.start()->is_inclusive() ? bound_kind::incl_start : bound_kind::excl_start) - : bottom(); - } - template typename R> - GCC6_CONCEPT( requires Range ) - static bound_view from_range_end(const R& range) { - return range.end() - ? bound_view(range.end()->value(), range.end()->is_inclusive() ? bound_kind::incl_end : bound_kind::excl_end) - : top(); - } - template typename R> - GCC6_CONCEPT( requires Range ) - static std::pair from_range(const R& range) { - return {from_range_start(range), from_range_end(range)}; - } - template typename R> - GCC6_CONCEPT( requires Range ) - static stdx::optional::bound> to_range_bound(const bound_view& bv) { - if (&bv.prefix == &empty_prefix) { - return {}; - } - bool inclusive = bv.kind != bound_kind::excl_end && bv.kind != bound_kind::excl_start; - return {typename R::bound(bv.prefix.view(), inclusive)}; - } - friend std::ostream& operator<<(std::ostream& out, const bound_view& b) { - return out << "{bound: prefix=" << b.prefix << ", kind=" << b.kind << "}"; - } -}; diff --git a/scylla/clustering_key_filter.hh b/scylla/clustering_key_filter.hh deleted file mode 100644 index 229f674..0000000 --- a/scylla/clustering_key_filter.hh +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (C) 2016 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "schema.hh" -#include "query-request.hh" - -namespace query { - -class clustering_key_filter_ranges { - clustering_row_ranges _storage; - const clustering_row_ranges& _ref; -public: - clustering_key_filter_ranges(const clustering_row_ranges& ranges) : _ref(ranges) { } - struct reversed { }; - clustering_key_filter_ranges(reversed, const clustering_row_ranges& ranges) - : _storage(ranges.rbegin(), ranges.rend()), _ref(_storage) { } - - clustering_key_filter_ranges(clustering_key_filter_ranges&& other) noexcept - : _storage(std::move(other._storage)) - , _ref(&other._ref == &other._storage ? _storage : other._ref) - { } - - clustering_key_filter_ranges& operator=(clustering_key_filter_ranges&& other) noexcept { - if (this != &other) { - this->~clustering_key_filter_ranges(); - new (this) clustering_key_filter_ranges(std::move(other)); - } - return *this; - } - - auto begin() const { return _ref.begin(); } - auto end() const { return _ref.end(); } - bool empty() const { return _ref.empty(); } - size_t size() const { return _ref.size(); } - const clustering_row_ranges& ranges() const { return _ref; } - - static clustering_key_filter_ranges get_ranges(const schema& schema, const query::partition_slice& slice, const partition_key& key) { - const query::clustering_row_ranges& ranges = slice.row_ranges(schema, key); - if (slice.options.contains(query::partition_slice::option::reversed)) { - return clustering_key_filter_ranges(clustering_key_filter_ranges::reversed{}, ranges); - } - return clustering_key_filter_ranges(ranges); - } -}; - -} diff --git a/scylla/clustering_ranges_walker.hh b/scylla/clustering_ranges_walker.hh deleted file mode 100644 index 6772a27..0000000 --- a/scylla/clustering_ranges_walker.hh +++ /dev/null @@ -1,220 +0,0 @@ -/* - * Copyright (C) 2017 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "schema.hh" -#include "query-request.hh" -#include "streamed_mutation.hh" - -// Utility for in-order checking of overlap with position ranges. -class clustering_ranges_walker { - const schema& _schema; - const query::clustering_row_ranges& _ranges; - query::clustering_row_ranges::const_iterator _current; - query::clustering_row_ranges::const_iterator _end; - bool _in_current; // next position is known to be >= _current_start - bool _with_static_row; - position_in_partition_view _current_start; - position_in_partition_view _current_end; - stdx::optional _trim; - size_t _change_counter = 1; -private: - bool advance_to_next_range() { - _in_current = false; - if (!_current_start.is_static_row()) { - ++_current; - } - ++_change_counter; - if (_current == _end) { - return false; - } - _current_start = position_in_partition_view::for_range_start(*_current); - _current_end = position_in_partition_view::for_range_end(*_current); - return true; - } -public: - clustering_ranges_walker(const schema& s, const query::clustering_row_ranges& ranges, bool with_static_row = true) - : _schema(s) - , _ranges(ranges) - , _current(ranges.begin()) - , _end(ranges.end()) - , _in_current(with_static_row) - , _with_static_row(with_static_row) - , _current_start(with_static_row ? position_in_partition_view::for_static_row() - : position_in_partition_view::for_range_start(*_current)) - , _current_end(with_static_row ? position_in_partition_view::before_all_clustered_rows() - : position_in_partition_view::for_range_end(*_current)) - { } - clustering_ranges_walker(clustering_ranges_walker&& o) noexcept - : _schema(o._schema) - , _ranges(o._ranges) - , _current(o._current) - , _end(o._end) - , _in_current(o._in_current) - , _with_static_row(o._with_static_row) - , _current_start(o._current_start) - , _current_end(o._current_end) - , _trim(std::move(o._trim)) - , _change_counter(o._change_counter) - { } - clustering_ranges_walker& operator=(clustering_ranges_walker&& o) { - if (this != &o) { - this->~clustering_ranges_walker(); - new (this) clustering_ranges_walker(std::move(o)); - } - return *this; - } - - // Excludes positions smaller than pos from the ranges. - // pos should be monotonic. - // No constraints between pos and positions passed to advance_to(). - // - // After the invocation, when !out_of_range(), lower_bound() returns the smallest position still contained. - void trim_front(position_in_partition pos) { - position_in_partition::less_compare less(_schema); - - if (_current == _end) { - return; - } - - do { - if (!less(_current_start, pos)) { - break; - } - if (less(pos, _current_end)) { - _trim = std::move(pos); - _current_start = *_trim; - _in_current = false; - ++_change_counter; - break; - } - } while (advance_to_next_range()); - } - - // Returns true if given position is contained. - // Must be called with monotonic positions. - // Idempotent. - bool advance_to(position_in_partition_view pos) { - position_in_partition::less_compare less(_schema); - - if (_current == _end) { - return false; - } - - do { - if (!_in_current && less(pos, _current_start)) { - break; - } - // All subsequent clustering keys are larger than the start of this - // range so there is no need to check that again. - _in_current = true; - - if (less(pos, _current_end)) { - return true; - } - } while (advance_to_next_range()); - - return false; - } - - // Returns true if the range expressed by start and end (as in position_range) overlaps - // with clustering ranges. - // Must be called with monotonic start position. That position must also be greater than - // the last position passed to the other advance_to() overload. - // Idempotent. - bool advance_to(position_in_partition_view start, position_in_partition_view end) { - position_in_partition::less_compare less(_schema); - - if (_current == _end) { - return false; - } - - do { - if (less(end, _current_start)) { - break; - } - if (less(start, _current_end)) { - return true; - } - } while (advance_to_next_range()); - - return false; - } - - // Returns true if the range tombstone expressed by start and end (as in position_range) overlaps - // with clustering ranges. - // No monotonicity restrictions on argument values across calls. - // Does not affect lower_bound(). - // Idempotent. - bool contains_tombstone(position_in_partition_view start, position_in_partition_view end) const { - position_in_partition::less_compare less(_schema); - - if (_trim && less(end, *_trim)) { - return false; - } - - auto i = _current; - while (i != _end) { - auto range_start = position_in_partition_view::for_range_start(*i); - if (less(end, range_start)) { - return false; - } - auto range_end = position_in_partition_view::for_range_end(*i); - if (less(start, range_end)) { - return true; - } - ++i; - } - - return false; - } - - // Returns true if advanced past all contained positions. Any later advance_to() until reset() will return false. - bool out_of_range() const { - return _current == _end; - } - - // Resets the state of the walker so that advance_to() can be now called for new sequence of positions. - // Any range trimmings still hold after this. - void reset() { - auto trim = std::move(_trim); - auto ctr = _change_counter; - *this = clustering_ranges_walker(_schema, _ranges, _with_static_row); - _change_counter = ctr + 1; - if (trim) { - trim_front(std::move(*trim)); - } - } - - // Can be called only when !out_of_range() - position_in_partition_view lower_bound() const { - return _current_start; - } - - // When lower_bound() changes, this also does - // Always > 0. - size_t lower_bound_change_counter() const { - return _change_counter; - } -}; diff --git a/scylla/combine.hh b/scylla/combine.hh deleted file mode 100644 index 2b08f48..0000000 --- a/scylla/combine.hh +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -// combine two sorted uniqued sequences into a single sorted sequence -// unique elements are copied, duplicate elements are merged with a -// binary function. -template -OutputIterator -combine(InputIterator1 begin1, InputIterator1 end1, - InputIterator2 begin2, InputIterator2 end2, - OutputIterator out, - Compare compare, - Merge merge) { - while (begin1 != end1 && begin2 != end2) { - auto& e1 = *begin1; - auto& e2 = *begin2; - if (compare(e1, e2)) { - *out++ = e1; - ++begin1; - } else if (compare(e2, e1)) { - *out++ = e2; - ++begin2; - } else { - *out++ = merge(e1, e2); - ++begin1; - ++begin2; - } - } - out = std::copy(begin1, end1, out); - out = std::copy(begin2, end2, out); - return out; -} - - diff --git a/scylla/compaction_strategy.hh b/scylla/compaction_strategy.hh deleted file mode 100644 index 8f2aef4..0000000 --- a/scylla/compaction_strategy.hh +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -class column_family; -class schema; -using schema_ptr = lw_shared_ptr; - -namespace sstables { - -enum class compaction_strategy_type { - null, - major, - size_tiered, - leveled, - date_tiered, -}; - -class compaction_strategy_impl; -class sstable; -class sstable_set; -struct compaction_descriptor; -struct resharding_descriptor; - -class compaction_strategy { - ::shared_ptr _compaction_strategy_impl; -public: - compaction_strategy(::shared_ptr impl); - - compaction_strategy(); - ~compaction_strategy(); - compaction_strategy(const compaction_strategy&); - compaction_strategy(compaction_strategy&&); - compaction_strategy& operator=(compaction_strategy&&); - - // Return a list of sstables to be compacted after applying the strategy. - compaction_descriptor get_sstables_for_compaction(column_family& cfs, std::vector> candidates); - - std::vector get_resharding_jobs(column_family& cf, std::vector> candidates); - - // Some strategies may look at the compacted and resulting sstables to - // get some useful information for subsequent compactions. - void notify_completion(const std::vector>& removed, const std::vector>& added); - - // Return if parallel compaction is allowed by strategy. - bool parallel_compaction() const; - - // Return if optimization to rule out sstables based on clustering key filter should be applied. - bool use_clustering_key_filter() const; - - // An estimation of number of compaction for strategy to be satisfied. - int64_t estimated_pending_compactions(column_family& cf) const; - - static sstring name(compaction_strategy_type type) { - switch (type) { - case compaction_strategy_type::null: - return "NullCompactionStrategy"; - case compaction_strategy_type::major: - return "MajorCompactionStrategy"; - case compaction_strategy_type::size_tiered: - return "SizeTieredCompactionStrategy"; - case compaction_strategy_type::leveled: - return "LeveledCompactionStrategy"; - case compaction_strategy_type::date_tiered: - return "DateTieredCompactionStrategy"; - default: - throw std::runtime_error("Invalid Compaction Strategy"); - } - } - - static compaction_strategy_type type(const sstring& name) { - auto pos = name.find("org.apache.cassandra.db.compaction."); - sstring short_name = (pos == sstring::npos) ? name : name.substr(pos + 35); - if (short_name == "NullCompactionStrategy") { - return compaction_strategy_type::null; - } else if (short_name == "MajorCompactionStrategy") { - return compaction_strategy_type::major; - } else if (short_name == "SizeTieredCompactionStrategy") { - return compaction_strategy_type::size_tiered; - } else if (short_name == "LeveledCompactionStrategy") { - return compaction_strategy_type::leveled; - } else if (short_name == "DateTieredCompactionStrategy") { - return compaction_strategy_type::date_tiered; - } else { - throw exceptions::configuration_exception(sprint("Unable to find compaction strategy class '%s'", name)); - } - } - - compaction_strategy_type type() const; - - sstring name() const { - return name(type()); - } - - sstable_set make_sstable_set(schema_ptr schema) const; -}; - -// Creates a compaction_strategy object from one of the strategies available. -compaction_strategy make_compaction_strategy(compaction_strategy_type strategy, const std::map& options); - -} diff --git a/scylla/compatible_ring_position.hh b/scylla/compatible_ring_position.hh deleted file mode 100644 index 3e92a0f..0000000 --- a/scylla/compatible_ring_position.hh +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (C) 2016 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - - -#pragma once - -#include "query-request.hh" -#include - -// Wraps ring_position so it is compatible with old-style C++: default constructor, -// stateless comparators, yada yada -class compatible_ring_position { - const schema* _schema = nullptr; - // optional to supply a default constructor, no more - std::experimental::optional _rp; -public: - compatible_ring_position() noexcept = default; - compatible_ring_position(const schema& s, const dht::ring_position& rp) - : _schema(&s), _rp(rp) { - } - compatible_ring_position(const schema& s, dht::ring_position&& rp) - : _schema(&s), _rp(std::move(rp)) { - } - const dht::token& token() const { - return _rp->token(); - } - friend int tri_compare(const compatible_ring_position& x, const compatible_ring_position& y) { - return x._rp->tri_compare(*x._schema, *y._rp); - } - friend bool operator<(const compatible_ring_position& x, const compatible_ring_position& y) { - return tri_compare(x, y) < 0; - } - friend bool operator<=(const compatible_ring_position& x, const compatible_ring_position& y) { - return tri_compare(x, y) <= 0; - } - friend bool operator>(const compatible_ring_position& x, const compatible_ring_position& y) { - return tri_compare(x, y) > 0; - } - friend bool operator>=(const compatible_ring_position& x, const compatible_ring_position& y) { - return tri_compare(x, y) >= 0; - } - friend bool operator==(const compatible_ring_position& x, const compatible_ring_position& y) { - return tri_compare(x, y) == 0; - } - friend bool operator!=(const compatible_ring_position& x, const compatible_ring_position& y) { - return tri_compare(x, y) != 0; - } -}; - diff --git a/scylla/compound.hh b/scylla/compound.hh deleted file mode 100644 index 2491ef3..0000000 --- a/scylla/compound.hh +++ /dev/null @@ -1,239 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "types.hh" -#include -#include -#include -#include -#include -#include "utils/serialization.hh" -#include "unimplemented.hh" - -enum class allow_prefixes { no, yes }; - -template -class compound_type final { -private: - const std::vector _types; - const bool _byte_order_equal; - const bool _byte_order_comparable; - const bool _is_reversed; -public: - static constexpr bool is_prefixable = AllowPrefixes == allow_prefixes::yes; - using prefix_type = compound_type; - using value_type = std::vector; - using size_type = uint16_t; - - compound_type(std::vector types) - : _types(std::move(types)) - , _byte_order_equal(std::all_of(_types.begin(), _types.end(), [] (auto t) { - return t->is_byte_order_equal(); - })) - , _byte_order_comparable(false) - , _is_reversed(_types.size() == 1 && _types[0]->is_reversed()) - { } - - compound_type(compound_type&&) = default; - - auto const& types() const { - return _types; - } - - bool is_singular() const { - return _types.size() == 1; - } - - prefix_type as_prefix() { - return prefix_type(_types); - } -private: - /* - * Format: - * ... - * - */ - template - static void serialize_value(RangeOfSerializedComponents&& values, bytes::iterator& out) { - for (auto&& val : values) { - assert(val.size() <= std::numeric_limits::max()); - write(out, size_type(val.size())); - out = std::copy(val.begin(), val.end(), out); - } - } - template - static size_t serialized_size(RangeOfSerializedComponents&& values) { - size_t len = 0; - for (auto&& val : values) { - len += sizeof(size_type) + val.size(); - } - return len; - } -public: - bytes serialize_single(bytes&& v) { - return serialize_value({std::move(v)}); - } - template - static bytes serialize_value(RangeOfSerializedComponents&& values) { - auto size = serialized_size(values); - if (size > std::numeric_limits::max()) { - throw std::runtime_error(sprint("Key size too large: %d > %d", size, std::numeric_limits::max())); - } - bytes b(bytes::initialized_later(), size); - auto i = b.begin(); - serialize_value(values, i); - return b; - } - template - static bytes serialize_value(std::initializer_list values) { - return serialize_value(boost::make_iterator_range(values.begin(), values.end())); - } - bytes serialize_optionals(const std::vector& values) { - return serialize_value(values | boost::adaptors::transformed([] (const bytes_opt& bo) -> bytes_view { - if (!bo) { - throw std::logic_error("attempted to create key component from empty optional"); - } - return *bo; - })); - } - bytes serialize_value_deep(const std::vector& values) { - // TODO: Optimize - std::vector partial; - partial.reserve(values.size()); - auto i = _types.begin(); - for (auto&& component : values) { - assert(i != _types.end()); - partial.push_back((*i++)->decompose(component)); - } - return serialize_value(partial); - } - bytes decompose_value(const value_type& values) { - return serialize_value(values); - } - class iterator : public std::iterator { - private: - bytes_view _v; - bytes_view _current; - private: - void read_current() { - size_type len; - { - if (_v.empty()) { - _v = bytes_view(nullptr, 0); - return; - } - len = read_simple(_v); - if (_v.size() < len) { - throw marshal_exception(); - } - } - _current = bytes_view(_v.begin(), len); - _v.remove_prefix(len); - } - public: - struct end_iterator_tag {}; - iterator(const bytes_view& v) : _v(v) { - read_current(); - } - iterator(end_iterator_tag, const bytes_view& v) : _v(nullptr, 0) {} - iterator& operator++() { - read_current(); - return *this; - } - iterator operator++(int) { - iterator i(*this); - ++(*this); - return i; - } - const value_type& operator*() const { return _current; } - const value_type* operator->() const { return &_current; } - bool operator!=(const iterator& i) const { return _v.begin() != i._v.begin(); } - bool operator==(const iterator& i) const { return _v.begin() == i._v.begin(); } - }; - static iterator begin(const bytes_view& v) { - return iterator(v); - } - static iterator end(const bytes_view& v) { - return iterator(typename iterator::end_iterator_tag(), v); - } - static boost::iterator_range components(const bytes_view& v) { - return { begin(v), end(v) }; - } - value_type deserialize_value(bytes_view v) { - std::vector result; - result.reserve(_types.size()); - std::transform(begin(v), end(v), std::back_inserter(result), [] (auto&& v) { - return bytes(v.begin(), v.end()); - }); - return result; - } - bool less(bytes_view b1, bytes_view b2) { - return compare(b1, b2) < 0; - } - size_t hash(bytes_view v) { - if (_byte_order_equal) { - return std::hash()(v); - } - auto t = _types.begin(); - size_t h = 0; - for (auto&& value : components(v)) { - h ^= (*t)->hash(value); - ++t; - } - return h; - } - int compare(bytes_view b1, bytes_view b2) { - if (_byte_order_comparable) { - if (_is_reversed) { - return compare_unsigned(b2, b1); - } else { - return compare_unsigned(b1, b2); - } - } - return lexicographical_tri_compare(_types.begin(), _types.end(), - begin(b1), end(b1), begin(b2), end(b2), [] (auto&& type, auto&& v1, auto&& v2) { - return type->compare(v1, v2); - }); - } - // Retruns true iff given prefix has no missing components - bool is_full(bytes_view v) const { - assert(AllowPrefixes == allow_prefixes::yes); - return std::distance(begin(v), end(v)) == (ssize_t)_types.size(); - } - bool is_empty(bytes_view v) const { - return begin(v) == end(v); - } - void validate(bytes_view v) { - // FIXME: implement - warn(unimplemented::cause::VALIDATION); - } - bool equal(bytes_view v1, bytes_view v2) { - if (_byte_order_equal) { - return compare_unsigned(v1, v2) == 0; - } - // FIXME: call equal() on each component - return compare(v1, v2) == 0; - } -}; - -using compound_prefix = compound_type; diff --git a/scylla/compound_compat.hh b/scylla/compound_compat.hh deleted file mode 100644 index caadba5..0000000 --- a/scylla/compound_compat.hh +++ /dev/null @@ -1,605 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include -#include -#include "compound.hh" -#include "schema.hh" - -// -// This header provides adaptors between the representation used by our compound_type<> -// and representation used by Origin. -// -// For single-component keys the legacy representation is equivalent -// to the only component's serialized form. For composite keys it the following -// (See org.apache.cassandra.db.marshal.CompositeType): -// -// ::= ( )+ -// ::= -// ::= -// ::= -// -// is component's value in serialized form. is always 0 for partition key. -// - -// Given a representation serialized using @CompoundType, provides a view on the -// representation of the same components as they would be serialized by Origin. -// -// The view is exposed in a form of a byte range. For example of use see to_legacy() function. -template -class legacy_compound_view { - static_assert(!CompoundType::is_prefixable, "Legacy view not defined for prefixes"); - CompoundType& _type; - bytes_view _packed; -public: - legacy_compound_view(CompoundType& c, bytes_view packed) - : _type(c) - , _packed(packed) - { } - - class iterator : public std::iterator { - bool _singular; - // Offset within virtual output space of a component. - // - // Offset: -2 -1 0 ... LEN-1 LEN - // Field: [ length MSB ] [ length LSB ] [ VALUE ] [ EOC ] - // - int32_t _offset; - typename CompoundType::iterator _i; - public: - struct end_tag {}; - - iterator(const legacy_compound_view& v) - : _singular(v._type.is_singular()) - , _offset(_singular ? 0 : -2) - , _i(v._type.begin(v._packed)) - { } - - iterator(const legacy_compound_view& v, end_tag) - : _offset(-2) - , _i(v._type.end(v._packed)) - { } - - value_type operator*() const { - int32_t component_size = _i->size(); - if (_offset == -2) { - return (component_size >> 8) & 0xff; - } else if (_offset == -1) { - return component_size & 0xff; - } else if (_offset < component_size) { - return (*_i)[_offset]; - } else { // _offset == component_size - return 0; // EOC field - } - } - - iterator& operator++() { - auto component_size = (int32_t) _i->size(); - if (_offset < component_size - // When _singular, we skip the EOC byte. - && (!_singular || _offset != (component_size - 1))) - { - ++_offset; - } else { - ++_i; - _offset = -2; - } - return *this; - } - - bool operator==(const iterator& other) const { - return _offset == other._offset && other._i == _i; - } - - bool operator!=(const iterator& other) const { - return !(*this == other); - } - }; - - // A trichotomic comparator defined on @CompoundType representations which - // orders them according to lexicographical ordering of their corresponding - // legacy representations. - // - // tri_comparator(t)(k1, k2) - // - // ...is equivalent to: - // - // compare_unsigned(to_legacy(t, k1), to_legacy(t, k2)) - // - // ...but more efficient. - // - struct tri_comparator { - const CompoundType& _type; - - tri_comparator(const CompoundType& type) - : _type(type) - { } - - // @k1 and @k2 must be serialized using @type, which was passed to the constructor. - int operator()(bytes_view k1, bytes_view k2) const { - if (_type.is_singular()) { - return compare_unsigned(*_type.begin(k1), *_type.begin(k2)); - } - return lexicographical_tri_compare( - _type.begin(k1), _type.end(k1), - _type.begin(k2), _type.end(k2), - [] (const bytes_view& c1, const bytes_view& c2) -> int { - if (c1.size() != c2.size()) { - return c1.size() < c2.size() ? -1 : 1; - } - return memcmp(c1.begin(), c2.begin(), c1.size()); - }); - } - }; - - // Equivalent to std::distance(begin(), end()), but computes faster - size_t size() const { - if (_type.is_singular()) { - return _type.begin(_packed)->size(); - } - size_t s = 0; - for (auto&& component : _type.components(_packed)) { - s += 2 /* length field */ + component.size() + 1 /* EOC */; - } - return s; - } - - iterator begin() const { - return iterator(*this); - } - - iterator end() const { - return iterator(*this, typename iterator::end_tag()); - } -}; - -// Converts compound_type<> representation to legacy representation -// @packed is assumed to be serialized using supplied @type. -template -static inline -bytes to_legacy(CompoundType& type, bytes_view packed) { - legacy_compound_view lv(type, packed); - bytes legacy_form(bytes::initialized_later(), lv.size()); - std::copy(lv.begin(), lv.end(), legacy_form.begin()); - return legacy_form; -} - -class composite_view; - -// Represents a value serialized according to Origin's CompositeType. -// If is_compound is true, then the value is one or more components encoded as: -// -// ::= ( )+ -// ::= -// ::= -// ::= -// -// If false, then it encodes a single value, without a prefix length or a suffix EOC. -class composite final { - bytes _bytes; - bool _is_compound; -public: - composite(bytes&& b, bool is_compound) - : _bytes(std::move(b)) - , _is_compound(is_compound) - { } - - explicit composite(bytes&& b) - : _bytes(std::move(b)) - , _is_compound(true) - { } - - composite() - : _bytes() - , _is_compound(true) - { } - - using size_type = uint16_t; - using eoc_type = int8_t; - - /* - * The 'end-of-component' byte should always be 0 for actual column name. - * However, it can set to 1 for query bounds. This allows to query for the - * equivalent of 'give me the full range'. That is, if a slice query is: - * start = <3><"foo".getBytes()><0> - * end = <3><"foo".getBytes()><1> - * then we'll return *all* the columns whose first component is "foo". - * If for a component, the 'end-of-component' is != 0, there should not be any - * following component. The end-of-component can also be -1 to allow - * non-inclusive query. For instance: - * end = <3><"foo".getBytes()><-1> - * allows to query everything that is smaller than <3><"foo".getBytes()>, but - * not <3><"foo".getBytes()> itself. - */ - enum class eoc : eoc_type { - start = -1, - none = 0, - end = 1 - }; - - using component = std::pair; - using component_view = std::pair; -private: - template>::value>> - static size_t size(Value& val) { - return val.size(); - } - static size_t size(const data_value& val) { - return val.serialized_size(); - } - template>::value>> - static void write_value(Value&& val, bytes::iterator& out) { - out = std::copy(val.begin(), val.end(), out); - } - static void write_value(const data_value& val, bytes::iterator& out) { - val.serialize(out); - } - template - static void serialize_value(RangeOfSerializedComponents&& values, bytes::iterator& out, bool is_compound) { - if (!is_compound) { - auto it = values.begin(); - write_value(std::forward(*it), out); - return; - } - - for (auto&& val : values) { - write(out, static_cast(size(val))); - write_value(std::forward(val), out); - // Range tombstones are not keys. For collections, only frozen - // values can be keys. Therefore, for as long as it is safe to - // assume that this code will be used to create keys, it is safe - // to assume the trailing byte is always zero. - write(out, eoc_type(eoc::none)); - } - } - template - static size_t serialized_size(RangeOfSerializedComponents&& values, bool is_compound) { - size_t len = 0; - auto it = values.begin(); - if (it != values.end()) { - // CQL3 uses a specific prefix (0xFFFF) to encode "static columns" - // (CASSANDRA-6561). This does mean the maximum size of the first component of a - // composite is 65534, not 65535 (or we wouldn't be able to detect if the first 2 - // bytes is the static prefix or not). - auto value_size = size(*it); - if (value_size > static_cast(std::numeric_limits::max() - uint8_t(is_compound))) { - throw std::runtime_error(sprint("First component size too large: %d > %d", value_size, std::numeric_limits::max() - is_compound)); - } - if (!is_compound) { - return value_size; - } - len += sizeof(size_type) + value_size + sizeof(eoc_type); - ++it; - } - for ( ; it != values.end(); ++it) { - auto value_size = size(*it); - if (value_size > std::numeric_limits::max()) { - throw std::runtime_error(sprint("Component size too large: %d > %d", value_size, std::numeric_limits::max())); - } - len += sizeof(size_type) + value_size + sizeof(eoc_type); - } - return len; - } -public: - template - auto describe_type(Describer f) const { - return f(const_cast(_bytes)); - } - - // marker is ignored if !is_compound - template - static composite serialize_value(RangeOfSerializedComponents&& values, bool is_compound = true, eoc marker = eoc::none) { - auto size = serialized_size(values, is_compound); - bytes b(bytes::initialized_later(), size); - auto i = b.begin(); - serialize_value(std::forward(values), i, is_compound); - if (is_compound && !b.empty()) { - b.back() = eoc_type(marker); - } - return composite(std::move(b), is_compound); - } - - template - static composite serialize_static(const schema& s, RangeOfSerializedComponents&& values) { - // FIXME: Optimize - auto b = bytes(size_t(2), bytes::value_type(0xff)); - std::vector sv(s.clustering_key_size()); - b += composite::serialize_value(boost::range::join(sv, std::forward(values)), true).release_bytes(); - return composite(std::move(b)); - } - - static eoc to_eoc(int8_t eoc_byte) { - return eoc_byte == 0 ? eoc::none : (eoc_byte < 0 ? eoc::start : eoc::end); - } - - class iterator : public std::iterator { - bytes_view _v; - component_view _current; - private: - void read_current() { - size_type len; - { - if (_v.empty()) { - _v = bytes_view(nullptr, 0); - return; - } - len = read_simple(_v); - if (_v.size() < len) { - throw marshal_exception(); - } - } - auto value = bytes_view(_v.begin(), len); - _v.remove_prefix(len); - _current = component_view(std::move(value), to_eoc(read_simple(_v))); - } - public: - struct end_iterator_tag {}; - - iterator(const bytes_view& v, bool is_compound, bool is_static) - : _v(v) { - if (is_static) { - _v.remove_prefix(2); - } - if (is_compound) { - read_current(); - } else { - _current = component_view(_v, eoc::none); - _v.remove_prefix(_v.size()); - } - } - - iterator(end_iterator_tag) : _v(nullptr, 0) {} - - iterator& operator++() { - read_current(); - return *this; - } - - iterator operator++(int) { - iterator i(*this); - ++(*this); - return i; - } - - const value_type& operator*() const { return _current; } - const value_type* operator->() const { return &_current; } - bool operator!=(const iterator& i) const { return _v.begin() != i._v.begin(); } - bool operator==(const iterator& i) const { return _v.begin() == i._v.begin(); } - }; - - iterator begin() const { - return iterator(_bytes, _is_compound, is_static()); - } - - iterator end() const { - return iterator(iterator::end_iterator_tag()); - } - - boost::iterator_range components() const & { - return { begin(), end() }; - } - - auto values() const & { - return components() | boost::adaptors::transformed([](auto&& c) { return c.first; }); - } - - std::vector components() const && { - std::vector result; - std::transform(begin(), end(), std::back_inserter(result), [](auto&& p) { - return component(bytes(p.first.begin(), p.first.end()), p.second); - }); - return result; - } - - std::vector values() const && { - std::vector result; - boost::copy(components() | boost::adaptors::transformed([](auto&& c) { return to_bytes(c.first); }), std::back_inserter(result)); - return result; - } - - const bytes& get_bytes() const { - return _bytes; - } - - bytes release_bytes() && { - return std::move(_bytes); - } - - size_t size() const { - return _bytes.size(); - } - - bool empty() const { - return _bytes.empty(); - } - - static bool is_static(bytes_view bytes, bool is_compound) { - return is_compound && bytes.size() > 2 && (bytes[0] & bytes[1] & 0xff) == 0xff; - } - - bool is_static() const { - return is_static(_bytes, _is_compound); - } - - bool is_compound() const { - return _is_compound; - } - - // The following factory functions assume this composite is a compound value. - template - static composite from_clustering_element(const schema& s, const ClusteringElement& ce) { - return serialize_value(ce.components(s)); - } - - static composite from_exploded(const std::vector& v, eoc marker = eoc::none) { - if (v.size() == 0) { - return composite(bytes(size_t(1), bytes::value_type(marker))); - } - return serialize_value(v, true, marker); - } - - static composite static_prefix(const schema& s) { - return serialize_static(s, std::vector()); - } - - explicit operator bytes_view() const { - return _bytes; - } - - template - friend inline std::ostream& operator<<(std::ostream& os, const std::pair& c) { - return os << "{value=" << c.first << "; eoc=" << sprint("0x%02x", eoc_type(c.second) & 0xff) << "}"; - } - - friend std::ostream& operator<<(std::ostream& os, const composite& v); - - struct tri_compare { - const std::vector& _types; - tri_compare(const std::vector& types) : _types(types) {} - int operator()(const composite&, const composite&) const; - int operator()(composite_view, composite_view) const; - }; -}; - -class composite_view final { - bytes_view _bytes; - bool _is_compound; -public: - composite_view(bytes_view b, bool is_compound = true) - : _bytes(b) - , _is_compound(is_compound) - { } - - composite_view(const composite& c) - : composite_view(static_cast(c), c.is_compound()) - { } - - composite_view() - : _bytes(nullptr, 0) - , _is_compound(true) - { } - - std::vector explode() const { - if (!_is_compound) { - return { _bytes }; - } - - std::vector ret; - ret.reserve(8); - for (auto it = begin(), e = end(); it != e; ) { - ret.push_back(it->first); - auto marker = it->second; - ++it; - if (it != e && marker != composite::eoc::none) { - throw runtime_exception(sprint("non-zero component divider found (%d) mid", sprint("0x%02x", composite::eoc_type(marker) & 0xff))); - } - } - return ret; - } - - composite::iterator begin() const { - return composite::iterator(_bytes, _is_compound, is_static()); - } - - composite::iterator end() const { - return composite::iterator(composite::iterator::end_iterator_tag()); - } - - boost::iterator_range components() const { - return { begin(), end() }; - } - - composite::eoc last_eoc() const { - if (!_is_compound || _bytes.empty()) { - return composite::eoc::none; - } - bytes_view v(_bytes); - v.remove_prefix(v.size() - 1); - return composite::to_eoc(read_simple(v)); - } - - auto values() const { - return components() | boost::adaptors::transformed([](auto&& c) { return c.first; }); - } - - size_t size() const { - return _bytes.size(); - } - - bool empty() const { - return _bytes.empty(); - } - - bool is_static() const { - return composite::is_static(_bytes, _is_compound); - } - - explicit operator bytes_view() const { - return _bytes; - } - - bool operator==(const composite_view& k) const { return k._bytes == _bytes && k._is_compound == _is_compound; } - bool operator!=(const composite_view& k) const { return !(k == *this); } - - friend inline std::ostream& operator<<(std::ostream& os, composite_view v) { - return os << "{" << ::join(", ", v.components()) << ", compound=" << v._is_compound << ", static=" << v.is_static() << "}"; - } -}; - -inline -std::ostream& operator<<(std::ostream& os, const composite& v) { - return os << composite_view(v); -} - -inline -int composite::tri_compare::operator()(const composite& v1, const composite& v2) const { - return (*this)(composite_view(v1), composite_view(v2)); -} - -inline -int composite::tri_compare::operator()(composite_view v1, composite_view v2) const { - // See org.apache.cassandra.db.composites.AbstractCType#compare - if (v1.empty()) { - return v2.empty() ? 0 : -1; - } - if (v2.empty()) { - return 1; - } - if (v1.is_static() != v2.is_static()) { - return v1.is_static() ? -1 : 1; - } - auto a_values = v1.components(); - auto b_values = v2.components(); - auto cmp = [&](const data_type& t, component_view c1, component_view c2) { - // First by value, then by EOC - auto r = t->compare(c1.first, c2.first); - if (r) { - return r; - } - return static_cast(c1.second) - static_cast(c2.second); - }; - return lexicographical_tri_compare(_types.begin(), _types.end(), - a_values.begin(), a_values.end(), - b_values.begin(), b_values.end(), - cmp); -} diff --git a/scylla/compress.hh b/scylla/compress.hh deleted file mode 100644 index b521320..0000000 --- a/scylla/compress.hh +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "exceptions/exceptions.hh" - -enum class compressor { - none, - lz4, - snappy, - deflate, -}; - -class compression_parameters { -public: - static constexpr int32_t DEFAULT_CHUNK_LENGTH = 4 * 1024; - static constexpr double DEFAULT_CRC_CHECK_CHANCE = 1.0; - - static constexpr auto SSTABLE_COMPRESSION = "sstable_compression"; - static constexpr auto CHUNK_LENGTH_KB = "chunk_length_kb"; - static constexpr auto CRC_CHECK_CHANCE = "crc_check_chance"; -private: - compressor _compressor; - std::experimental::optional _chunk_length; - std::experimental::optional _crc_check_chance; -public: - compression_parameters(compressor c = compressor::lz4) : _compressor(c) { } - compression_parameters(const std::map& options) { - validate_options(options); - - auto it = options.find(SSTABLE_COMPRESSION); - if (it == options.end() || it->second.empty()) { - _compressor = compressor::none; - return; - } - const auto& compressor_class = it->second; - if (is_compressor_class(compressor_class, "LZ4Compressor")) { - _compressor = compressor::lz4; - } else if (is_compressor_class(compressor_class, "SnappyCompressor")) { - _compressor = compressor::snappy; - } else if (is_compressor_class(compressor_class, "DeflateCompressor")) { - _compressor = compressor::deflate; - } else { - throw exceptions::configuration_exception(sstring("Unsupported compression class '") + compressor_class + "'."); - } - auto chunk_length = options.find(CHUNK_LENGTH_KB); - if (chunk_length != options.end()) { - try { - _chunk_length = std::stoi(chunk_length->second) * 1024; - } catch (const std::exception& e) { - throw exceptions::syntax_exception(sstring("Invalid integer value ") + chunk_length->second + " for " + CHUNK_LENGTH_KB); - } - } - auto crc_chance = options.find(CRC_CHECK_CHANCE); - if (crc_chance != options.end()) { - try { - _crc_check_chance = std::stod(crc_chance->second); - } catch (const std::exception& e) { - throw exceptions::syntax_exception(sstring("Invalid double value ") + crc_chance->second + "for " + CRC_CHECK_CHANCE); - } - } - } - - compressor get_compressor() const { return _compressor; } - int32_t chunk_length() const { return _chunk_length.value_or(int(DEFAULT_CHUNK_LENGTH)); } - double crc_check_chance() const { return _crc_check_chance.value_or(double(DEFAULT_CRC_CHECK_CHANCE)); } - - void validate() { - if (_chunk_length) { - auto chunk_length = _chunk_length.value(); - if (chunk_length <= 0) { - throw exceptions::configuration_exception(sstring("Invalid negative or null ") + CHUNK_LENGTH_KB); - } - // _chunk_length must be a power of two - if (chunk_length & (chunk_length - 1)) { - throw exceptions::configuration_exception(sstring(CHUNK_LENGTH_KB) + " must be a power of 2."); - } - } - if (_crc_check_chance && (_crc_check_chance.value() < 0.0 || _crc_check_chance.value() > 1.0)) { - throw exceptions::configuration_exception(sstring(CRC_CHECK_CHANCE) + " must be between 0.0 and 1.0."); - } - } - - std::map get_options() const { - if (_compressor == compressor::none) { - return std::map(); - } - std::map opts; - opts.emplace(sstring(SSTABLE_COMPRESSION), compressor_name()); - if (_chunk_length) { - opts.emplace(sstring(CHUNK_LENGTH_KB), std::to_string(_chunk_length.value() / 1024)); - } - if (_crc_check_chance) { - opts.emplace(sstring(CRC_CHECK_CHANCE), std::to_string(_crc_check_chance.value())); - } - return opts; - } - bool operator==(const compression_parameters& other) const { - return _compressor == other._compressor - && _chunk_length == other._chunk_length - && _crc_check_chance == other._crc_check_chance; - } - bool operator!=(const compression_parameters& other) const { - return !(*this == other); - } -private: - void validate_options(const std::map& options) { - // currently, there are no options specific to a particular compressor - static std::set keywords({ - sstring(SSTABLE_COMPRESSION), - sstring(CHUNK_LENGTH_KB), - sstring(CRC_CHECK_CHANCE), - }); - for (auto&& opt : options) { - if (!keywords.count(opt.first)) { - throw exceptions::configuration_exception(sprint("Unknown compression option '%s'.", opt.first)); - } - } - } - bool is_compressor_class(const sstring& value, const sstring& class_name) { - static const sstring namespace_prefix = "org.apache.cassandra.io.compress."; - return value == class_name || value == namespace_prefix + class_name; - } - sstring compressor_name() const { - switch (_compressor) { - case compressor::lz4: - return "org.apache.cassandra.io.compress.LZ4Compressor"; - case compressor::snappy: - return "org.apache.cassandra.io.compress.SnappyCompressor"; - case compressor::deflate: - return "org.apache.cassandra.io.compress.DeflateCompressor"; - default: - abort(); - } - } -}; diff --git a/scylla/converting_mutation_partition_applier.hh b/scylla/converting_mutation_partition_applier.hh deleted file mode 100644 index 5879988..0000000 --- a/scylla/converting_mutation_partition_applier.hh +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "mutation_partition_view.hh" -#include "mutation_partition.hh" -#include "schema.hh" - -// Mutation partition visitor which applies visited data into -// existing mutation_partition. The visited data may be of a different schema. -// Data which is not representable in the new schema is dropped. -// Weak exception guarantees. -class converting_mutation_partition_applier : public mutation_partition_visitor { - const schema& _p_schema; - mutation_partition& _p; - const column_mapping& _visited_column_mapping; - deletable_row* _current_row; -private: - static bool is_compatible(const column_definition& new_def, const data_type& old_type, column_kind kind) { - return ::is_compatible(new_def.kind, kind) && new_def.type->is_value_compatible_with(*old_type); - } - static void accept_cell(row& dst, column_kind kind, const column_definition& new_def, const data_type& old_type, atomic_cell_view cell) { - if (is_compatible(new_def, old_type, kind) && cell.timestamp() > new_def.dropped_at()) { - dst.apply(new_def, atomic_cell_or_collection(cell)); - } - } - static void accept_cell(row& dst, column_kind kind, const column_definition& new_def, const data_type& old_type, collection_mutation_view cell) { - if (!is_compatible(new_def, old_type, kind)) { - return; - } - auto&& ctype = static_pointer_cast(old_type); - auto old_view = ctype->deserialize_mutation_form(cell); - - collection_type_impl::mutation_view new_view; - if (old_view.tomb.timestamp > new_def.dropped_at()) { - new_view.tomb = old_view.tomb; - } - for (auto& c : old_view.cells) { - if (c.second.timestamp() > new_def.dropped_at()) { - new_view.cells.emplace_back(std::move(c)); - } - } - dst.apply(new_def, ctype->serialize_mutation_form(std::move(new_view))); - } -public: - converting_mutation_partition_applier( - const column_mapping& visited_column_mapping, - const schema& target_schema, - mutation_partition& target) - : _p_schema(target_schema) - , _p(target) - , _visited_column_mapping(visited_column_mapping) - { } - - virtual void accept_partition_tombstone(tombstone t) override { - _p.apply(t); - } - - virtual void accept_static_cell(column_id id, atomic_cell_view cell) override { - const column_mapping_entry& col = _visited_column_mapping.static_column_at(id); - const column_definition* def = _p_schema.get_column_definition(col.name()); - if (def) { - accept_cell(_p._static_row, column_kind::static_column, *def, col.type(), cell); - } - } - - virtual void accept_static_cell(column_id id, collection_mutation_view collection) override { - const column_mapping_entry& col = _visited_column_mapping.static_column_at(id); - const column_definition* def = _p_schema.get_column_definition(col.name()); - if (def) { - accept_cell(_p._static_row, column_kind::static_column, *def, col.type(), collection); - } - } - - virtual void accept_row_tombstone(const range_tombstone& rt) override { - _p.apply_row_tombstone(_p_schema, rt); - } - - virtual void accept_row(position_in_partition_view key, const row_tombstone& deleted_at, const row_marker& rm, is_dummy dummy, is_continuous continuous) override { - deletable_row& r = _p.clustered_row(_p_schema, key, dummy, continuous); - r.apply(rm); - r.apply(deleted_at); - _current_row = &r; - } - - virtual void accept_row_cell(column_id id, atomic_cell_view cell) override { - const column_mapping_entry& col = _visited_column_mapping.regular_column_at(id); - const column_definition* def = _p_schema.get_column_definition(col.name()); - if (def) { - accept_cell(_current_row->cells(), column_kind::regular_column, *def, col.type(), cell); - } - } - - virtual void accept_row_cell(column_id id, collection_mutation_view collection) override { - const column_mapping_entry& col = _visited_column_mapping.regular_column_at(id); - const column_definition* def = _p_schema.get_column_definition(col.name()); - if (def) { - accept_cell(_current_row->cells(), column_kind::regular_column, *def, col.type(), collection); - } - } - - // Appends the cell to dst upgrading it to the new schema. - // Cells must have monotonic names. - static void append_cell(row& dst, column_kind kind, const column_definition& new_def, const data_type& old_type, const atomic_cell_or_collection& cell) { - if (new_def.is_atomic()) { - accept_cell(dst, kind, new_def, old_type, cell.as_atomic_cell()); - } else { - accept_cell(dst, kind, new_def, old_type, cell.as_collection_mutation()); - } - } -}; diff --git a/scylla/counters.cc b/scylla/counters.cc deleted file mode 100644 index badefae..0000000 --- a/scylla/counters.cc +++ /dev/null @@ -1,296 +0,0 @@ -/* - * Copyright (C) 2016 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "service/storage_service.hh" -#include "counters.hh" -#include "mutation.hh" -#include "combine.hh" - -counter_id counter_id::local() -{ - return counter_id(service::get_local_storage_service().get_local_id()); -} - -std::ostream& operator<<(std::ostream& os, const counter_id& id) { - return os << id.to_uuid(); -} - -std::ostream& operator<<(std::ostream& os, counter_shard_view csv) { - return os << "{global_shard id: " << csv.id() << " value: " << csv.value() - << " clock: " << csv.logical_clock() << "}"; -} - -std::ostream& operator<<(std::ostream& os, counter_cell_view ccv) { - return os << "{counter_cell timestamp: " << ccv.timestamp() << " shards: {" << ::join(", ", ccv.shards()) << "}}"; -} - -static bool apply_in_place(atomic_cell_or_collection& dst, atomic_cell_or_collection& src) -{ - auto dst_ccmv = counter_cell_mutable_view(dst.as_mutable_atomic_cell()); - auto src_ccmv = counter_cell_mutable_view(src.as_mutable_atomic_cell()); - auto dst_shards = dst_ccmv.shards(); - auto src_shards = src_ccmv.shards(); - - auto dst_it = dst_shards.begin(); - auto src_it = src_shards.begin(); - - while (src_it != src_shards.end()) { - while (dst_it != dst_shards.end() && dst_it->id() < src_it->id()) { - ++dst_it; - } - if (dst_it == dst_shards.end() || dst_it->id() != src_it->id()) { - // Fast-path failed. Revert and fall back to the slow path. - if (dst_it == dst_shards.end()) { - --dst_it; - } - while (src_it != src_shards.begin()) { - --src_it; - while (dst_it->id() != src_it->id()) { - --dst_it; - } - src_it->swap_value_and_clock(*dst_it); - } - return false; - } - if (dst_it->logical_clock() < src_it->logical_clock()) { - dst_it->swap_value_and_clock(*src_it); - } else { - src_it->set_value_and_clock(*dst_it); - } - ++src_it; - } - - auto dst_ts = dst_ccmv.timestamp(); - auto src_ts = src_ccmv.timestamp(); - dst_ccmv.set_timestamp(std::max(dst_ts, src_ts)); - src_ccmv.set_timestamp(dst_ts); - src.as_mutable_atomic_cell().set_counter_in_place_revert(true); - return true; -} - -static void revert_in_place_apply(atomic_cell_or_collection& dst, atomic_cell_or_collection& src) -{ - assert(dst.can_use_mutable_view() && src.can_use_mutable_view()); - auto dst_ccmv = counter_cell_mutable_view(dst.as_mutable_atomic_cell()); - auto src_ccmv = counter_cell_mutable_view(src.as_mutable_atomic_cell()); - auto dst_shards = dst_ccmv.shards(); - auto src_shards = src_ccmv.shards(); - - auto dst_it = dst_shards.begin(); - auto src_it = src_shards.begin(); - - while (src_it != src_shards.end()) { - while (dst_it != dst_shards.end() && dst_it->id() < src_it->id()) { - ++dst_it; - } - assert(dst_it != dst_shards.end() && dst_it->id() == src_it->id()); - dst_it->swap_value_and_clock(*src_it); - ++src_it; - } - - auto dst_ts = dst_ccmv.timestamp(); - auto src_ts = src_ccmv.timestamp(); - dst_ccmv.set_timestamp(src_ts); - src_ccmv.set_timestamp(dst_ts); - src.as_mutable_atomic_cell().set_counter_in_place_revert(false); -} - -bool counter_cell_view::apply_reversibly(atomic_cell_or_collection& dst, atomic_cell_or_collection& src) -{ - auto dst_ac = dst.as_atomic_cell(); - auto src_ac = src.as_atomic_cell(); - - if (!dst_ac.is_live() || !src_ac.is_live()) { - if (dst_ac.is_live() || (!src_ac.is_live() && compare_atomic_cell_for_merge(dst_ac, src_ac) < 0)) { - std::swap(dst, src); - return true; - } - return false; - } - - if (dst_ac.is_counter_update() && src_ac.is_counter_update()) { - auto src_v = src_ac.counter_update_value(); - auto dst_v = dst_ac.counter_update_value(); - dst = atomic_cell::make_live_counter_update(std::max(dst_ac.timestamp(), src_ac.timestamp()), - src_v + dst_v); - return true; - } - - assert(!dst_ac.is_counter_update()); - assert(!src_ac.is_counter_update()); - - if (counter_cell_view(dst_ac).shard_count() >= counter_cell_view(src_ac).shard_count() - && dst.can_use_mutable_view() && src.can_use_mutable_view()) { - if (apply_in_place(dst, src)) { - return true; - } - } - - src.as_mutable_atomic_cell().set_counter_in_place_revert(false); - auto dst_shards = counter_cell_view(dst_ac).shards(); - auto src_shards = counter_cell_view(src_ac).shards(); - - counter_cell_builder result; - combine(dst_shards.begin(), dst_shards.end(), src_shards.begin(), src_shards.end(), - result.inserter(), counter_shard_view::less_compare_by_id(), [] (auto& x, auto& y) { - return x.logical_clock() < y.logical_clock() ? y : x; - }); - - auto cell = result.build(std::max(dst_ac.timestamp(), src_ac.timestamp())); - src = std::exchange(dst, atomic_cell_or_collection(cell)); - return true; -} - -void counter_cell_view::revert_apply(atomic_cell_or_collection& dst, atomic_cell_or_collection& src) -{ - if (dst.as_atomic_cell().is_counter_update()) { - auto src_v = src.as_atomic_cell().counter_update_value(); - auto dst_v = dst.as_atomic_cell().counter_update_value(); - dst = atomic_cell::make_live(dst.as_atomic_cell().timestamp(), - long_type->decompose(dst_v - src_v)); - } else if (src.as_atomic_cell().is_counter_in_place_revert_set()) { - revert_in_place_apply(dst, src); - } else { - std::swap(dst, src); - } -} - -stdx::optional counter_cell_view::difference(atomic_cell_view a, atomic_cell_view b) -{ - assert(!a.is_counter_update()); - assert(!b.is_counter_update()); - - if (!b.is_live() || !a.is_live()) { - if (b.is_live() || (!a.is_live() && compare_atomic_cell_for_merge(b, a) < 0)) { - return atomic_cell(a); - } - return { }; - } - - auto a_shards = counter_cell_view(a).shards(); - auto b_shards = counter_cell_view(b).shards(); - - auto a_it = a_shards.begin(); - auto a_end = a_shards.end(); - auto b_it = b_shards.begin(); - auto b_end = b_shards.end(); - - counter_cell_builder result; - while (a_it != a_end) { - while (b_it != b_end && (*b_it).id() < (*a_it).id()) { - ++b_it; - } - if (b_it == b_end || (*a_it).id() != (*b_it).id() || (*a_it).logical_clock() > (*b_it).logical_clock()) { - result.add_shard(counter_shard(*a_it)); - } - ++a_it; - } - - stdx::optional diff; - if (!result.empty()) { - diff = result.build(std::max(a.timestamp(), b.timestamp())); - } else if (a.timestamp() > b.timestamp()) { - diff = atomic_cell::make_live(a.timestamp(), bytes_view()); - } - return diff; -} - - -void transform_counter_updates_to_shards(mutation& m, const mutation* current_state, uint64_t clock_offset) { - // FIXME: allow current_state to be frozen_mutation - - auto transform_new_row_to_shards = [clock_offset] (auto& cells) { - cells.for_each_cell([clock_offset] (auto, atomic_cell_or_collection& ac_o_c) { - auto acv = ac_o_c.as_atomic_cell(); - if (!acv.is_live()) { - return; // continue -- we are in lambda - } - auto delta = acv.counter_update_value(); - auto cs = counter_shard(counter_id::local(), delta, clock_offset + 1); - ac_o_c = counter_cell_builder::from_single_shard(acv.timestamp(), cs); - }); - }; - - if (!current_state) { - transform_new_row_to_shards(m.partition().static_row()); - for (auto& cr : m.partition().clustered_rows()) { - transform_new_row_to_shards(cr.row().cells()); - } - return; - } - - clustering_key::less_compare cmp(*m.schema()); - - auto transform_row_to_shards = [clock_offset] (auto& transformee, auto& state) { - std::deque> shards; - state.for_each_cell([&] (column_id id, const atomic_cell_or_collection& ac_o_c) { - auto acv = ac_o_c.as_atomic_cell(); - if (!acv.is_live()) { - return; // continue -- we are in lambda - } - counter_cell_view ccv(acv); - auto cs = ccv.local_shard(); - if (!cs) { - return; // continue - } - shards.emplace_back(std::make_pair(id, counter_shard(*cs))); - }); - - transformee.for_each_cell([&] (column_id id, atomic_cell_or_collection& ac_o_c) { - auto acv = ac_o_c.as_atomic_cell(); - if (!acv.is_live()) { - return; // continue -- we are in lambda - } - while (!shards.empty() && shards.front().first < id) { - shards.pop_front(); - } - - auto delta = acv.counter_update_value(); - - if (shards.empty() || shards.front().first > id) { - auto cs = counter_shard(counter_id::local(), delta, clock_offset + 1); - ac_o_c = counter_cell_builder::from_single_shard(acv.timestamp(), cs); - } else { - auto& cs = shards.front().second; - cs.update(delta, clock_offset + 1); - ac_o_c = counter_cell_builder::from_single_shard(acv.timestamp(), cs); - shards.pop_front(); - } - }); - }; - - transform_row_to_shards(m.partition().static_row(), current_state->partition().static_row()); - - auto& cstate = current_state->partition(); - auto it = cstate.clustered_rows().begin(); - auto end = cstate.clustered_rows().end(); - for (auto& cr : m.partition().clustered_rows()) { - while (it != end && cmp(it->key(), cr.key())) { - ++it; - } - if (it == end || cmp(cr.key(), it->key())) { - transform_new_row_to_shards(cr.row().cells()); - continue; - } - - transform_row_to_shards(cr.row().cells(), it->row().cells()); - } -} diff --git a/scylla/counters.hh b/scylla/counters.hh deleted file mode 100644 index 030ac1d..0000000 --- a/scylla/counters.hh +++ /dev/null @@ -1,387 +0,0 @@ -/* - * Copyright (C) 2016 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include - -#include "atomic_cell_or_collection.hh" -#include "types.hh" - -#include "stdx.hh" - -class mutation; - -class mutation; - -class counter_id { - int64_t _least_significant; - int64_t _most_significant; -public: - counter_id() = default; - explicit counter_id(utils::UUID uuid) noexcept - : _least_significant(uuid.get_least_significant_bits()) - , _most_significant(uuid.get_most_significant_bits()) - { } - - utils::UUID to_uuid() const { - return utils::UUID(_most_significant, _least_significant); - } - - bool operator<(const counter_id& other) const { - return to_uuid() < other.to_uuid(); - } - bool operator==(const counter_id& other) const { - return to_uuid() == other.to_uuid(); - } - bool operator!=(const counter_id& other) const { - return !(*this == other); - } -public: - static counter_id local(); - - // For tests. - static counter_id generate_random() { - return counter_id(utils::make_random_uuid()); - } -}; -static_assert(std::is_pod::value, "counter_id should be a POD type"); - -std::ostream& operator<<(std::ostream& os, const counter_id& id); - -template -class basic_counter_shard_view { - enum class offset : unsigned { - id = 0u, - value = unsigned(id) + sizeof(counter_id), - logical_clock = unsigned(value) + sizeof(int64_t), - total_size = unsigned(logical_clock) + sizeof(int64_t), - }; -private: - typename View::pointer _base; -private: - template - T read(offset off) const { - T value; - std::copy_n(_base + static_cast(off), sizeof(T), reinterpret_cast(&value)); - return value; - } -public: - static constexpr auto size = size_t(offset::total_size); -public: - basic_counter_shard_view() = default; - explicit basic_counter_shard_view(typename View::pointer ptr) noexcept - : _base(ptr) { } - - counter_id id() const { return read(offset::id); } - int64_t value() const { return read(offset::value); } - int64_t logical_clock() const { return read(offset::logical_clock); } - - void swap_value_and_clock(basic_counter_shard_view& other) noexcept { - static constexpr size_t off = size_t(offset::value); - static constexpr size_t size = size_t(offset::total_size) - off; - - typename View::value_type tmp[size]; - std::copy_n(_base + off, size, tmp); - std::copy_n(other._base + off, size, _base + off); - std::copy_n(tmp, size, other._base + off); - } - - void set_value_and_clock(const basic_counter_shard_view& other) noexcept { - static constexpr size_t off = size_t(offset::value); - static constexpr size_t size = size_t(offset::total_size) - off; - std::copy_n(other._base + off, size, _base + off); - } - - bool operator==(const basic_counter_shard_view& other) const { - return id() == other.id() && value() == other.value() - && logical_clock() == other.logical_clock(); - } - bool operator!=(const basic_counter_shard_view& other) const { - return !(*this == other); - } - - struct less_compare_by_id { - bool operator()(const basic_counter_shard_view& x, const basic_counter_shard_view& y) const { - return x.id() < y.id(); - } - }; -}; - -using counter_shard_view = basic_counter_shard_view; - -std::ostream& operator<<(std::ostream& os, counter_shard_view csv); - -class counter_shard { - counter_id _id; - int64_t _value; - int64_t _logical_clock; -private: - template - static void write(const T& value, bytes::iterator& out) { - out = std::copy_n(reinterpret_cast(&value), sizeof(T), out); - } -public: - counter_shard(counter_id id, int64_t value, int64_t logical_clock) noexcept - : _id(id) - , _value(value) - , _logical_clock(logical_clock) - { } - - explicit counter_shard(counter_shard_view csv) noexcept - : _id(csv.id()) - , _value(csv.value()) - , _logical_clock(csv.logical_clock()) - { } - - counter_id id() const { return _id; } - int64_t value() const { return _value; } - int64_t logical_clock() const { return _logical_clock; } - - counter_shard& update(int64_t value_delta, int64_t clock_increment) noexcept { - _value += value_delta; - _logical_clock += clock_increment; - return *this; - } - - counter_shard& apply(counter_shard_view other) noexcept { - auto other_clock = other.logical_clock(); - if (_logical_clock < other_clock) { - _logical_clock = other_clock; - _value = other.value(); - } - return *this; - } - - static size_t serialized_size() { - return counter_shard_view::size; - } - void serialize(bytes::iterator& out) const { - write(_id, out); - write(_value, out); - write(_logical_clock, out); - } -}; - -class counter_cell_builder { - std::vector _shards; -public: - counter_cell_builder() = default; - counter_cell_builder(size_t shard_count) { - _shards.reserve(shard_count); - } - - void add_shard(const counter_shard& cs) { - _shards.emplace_back(cs); - } - - size_t serialized_size() const { - return _shards.size() * counter_shard::serialized_size(); - } - void serialize(bytes::iterator& out) const { - for (auto&& cs : _shards) { - cs.serialize(out); - } - } - - bool empty() const { - return _shards.empty(); - } - - atomic_cell build(api::timestamp_type timestamp) const { - return atomic_cell::make_live_from_serializer(timestamp, serialized_size(), [this] (bytes::iterator out) { - serialize(out); - }); - } - - static atomic_cell from_single_shard(api::timestamp_type timestamp, const counter_shard& cs) { - return atomic_cell::make_live_from_serializer(timestamp, counter_shard::serialized_size(), [&cs] (bytes::iterator out) { - cs.serialize(out); - }); - } - - class inserter_iterator : public std::iterator { - counter_cell_builder* _builder; - public: - explicit inserter_iterator(counter_cell_builder& b) : _builder(&b) { } - inserter_iterator& operator=(const counter_shard& cs) { - _builder->add_shard(cs); - return *this; - } - inserter_iterator& operator=(const counter_shard_view& csv) { - return operator=(counter_shard(csv)); - } - inserter_iterator& operator++() { return *this; } - inserter_iterator& operator++(int) { return *this; } - inserter_iterator& operator*() { return *this; }; - }; - - inserter_iterator inserter() { - return inserter_iterator(*this); - } -}; - -// := -// := -// := * -template -class basic_counter_cell_view { -protected: - atomic_cell_base _cell; -private: - class shard_iterator : public std::iterator> { - typename View::pointer _current; - basic_counter_shard_view _current_view; - public: - shard_iterator() = default; - shard_iterator(typename View::pointer ptr) noexcept - : _current(ptr), _current_view(ptr) { } - - basic_counter_shard_view& operator*() noexcept { - return _current_view; - } - basic_counter_shard_view* operator->() noexcept { - return &_current_view; - } - shard_iterator& operator++() noexcept { - _current += counter_shard_view::size; - _current_view = basic_counter_shard_view(_current); - return *this; - } - shard_iterator operator++(int) noexcept { - auto it = *this; - operator++(); - return it; - } - shard_iterator& operator--() noexcept { - _current -= counter_shard_view::size; - _current_view = basic_counter_shard_view(_current); - return *this; - } - shard_iterator operator--(int) noexcept { - auto it = *this; - operator--(); - return it; - } - bool operator==(const shard_iterator& other) const noexcept { - return _current == other._current; - } - bool operator!=(const shard_iterator& other) const noexcept { - return !(*this == other); - } - }; -public: - boost::iterator_range shards() const { - auto bv = _cell.value(); - auto begin = shard_iterator(bv.data()); - auto end = shard_iterator(bv.data() + bv.size()); - return boost::make_iterator_range(begin, end); - } - - size_t shard_count() const { - return _cell.value().size() / counter_shard_view::size; - } -public: - // ac must be a live counter cell - explicit basic_counter_cell_view(atomic_cell_base ac) noexcept : _cell(ac) { - assert(_cell.is_live()); - assert(!_cell.is_counter_update()); - } - - api::timestamp_type timestamp() const { return _cell.timestamp(); } - - static data_type total_value_type() { return long_type; } - - int64_t total_value() const { - return boost::accumulate(shards(), int64_t(0), [] (int64_t v, counter_shard_view cs) { - return v + cs.value(); - }); - } - - stdx::optional get_shard(const counter_id& id) const { - auto it = boost::range::find_if(shards(), [&id] (counter_shard_view csv) { - return csv.id() == id; - }); - if (it == shards().end()) { - return { }; - } - return *it; - } - - stdx::optional local_shard() const { - // TODO: consider caching local shard position - return get_shard(counter_id::local()); - } - - bool operator==(const basic_counter_cell_view& other) const { - return timestamp() == other.timestamp() && boost::equal(shards(), other.shards()); - } -}; - -struct counter_cell_view : basic_counter_cell_view { - using basic_counter_cell_view::basic_counter_cell_view; - - // Reversibly applies two counter cells, at least one of them must be live. - // Returns true iff dst was modified. - static bool apply_reversibly(atomic_cell_or_collection& dst, atomic_cell_or_collection& src); - - // Reverts apply performed by apply_reversible(). - static void revert_apply(atomic_cell_or_collection& dst, atomic_cell_or_collection& src); - - // Computes a counter cell containing minimal amount of data which, when - // applied to 'b' returns the same cell as 'a' and 'b' applied together. - static stdx::optional difference(atomic_cell_view a, atomic_cell_view b); - - friend std::ostream& operator<<(std::ostream& os, counter_cell_view ccv); -}; - -struct counter_cell_mutable_view : basic_counter_cell_view { - using basic_counter_cell_view::basic_counter_cell_view; - - void set_timestamp(api::timestamp_type ts) { _cell.set_timestamp(ts); } -}; - -// Transforms mutation dst from counter updates to counter shards using state -// stored in current_state. -// If current_state is present it has to be in the same schema as dst. -void transform_counter_updates_to_shards(mutation& dst, const mutation* current_state, uint64_t clock_offset); - -template<> -struct appending_hash { - template - void operator()(Hasher& h, const counter_shard_view& cshard) const { - ::feed_hash(h, cshard.id().to_uuid()); - ::feed_hash(h, cshard.value()); - ::feed_hash(h, cshard.logical_clock()); - } -}; - -template<> -struct appending_hash { - template - void operator()(Hasher& h, const counter_cell_view& cell) const { - ::feed_hash(h, true); // is_live - ::feed_hash(h, cell.timestamp()); - for (auto&& csv : cell.shards()) { - ::feed_hash(h, csv); - } - } -}; diff --git a/scylla/cpu_controller.hh b/scylla/cpu_controller.hh deleted file mode 100644 index 62bbf96..0000000 --- a/scylla/cpu_controller.hh +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (C) 2017 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once -#include -#include -#include - -// Simple proportional controller to adjust shares of memtable/streaming flushes. -// -// Goal is to flush as fast as we can, but not so fast that we steal all the CPU from incoming -// requests, and at the same time minimize user-visible fluctuations in the flush quota. -// -// What that translates to is we'll try to keep virtual dirty's firt derivative at 0 (IOW, we keep -// virtual dirty constant), which means that the rate of incoming writes is equal to the rate of -// flushed bytes. -// -// The exact point at which the controller stops determines the desired flush CPU usage. As we -// approach the hard dirty limit, we need to be more aggressive. We will therefore define two -// thresholds, and increase the constant as we cross them. -// -// 1) the soft limit line -// 2) halfway between soft limit and dirty limit -// -// The constants q1 and q2 are used to determine the proportional factor at each stage. -// -// Below the soft limit, we are in no particular hurry to flush, since it means we're set to -// complete flushing before we a new memtable is ready. The quota is dirty * q1, and q1 is set to a -// low number. -// -// The first half of the virtual dirty region is where we expect to be usually, so we have a low -// slope corresponding to a sluggish response between q1 * soft_limit and q2. -// -// In the second half, we're getting close to the hard dirty limit so we increase the slope and -// become more responsive, up to a maximum quota of qmax. -// -// For now we'll just set them in the structure not to complicate the constructor. But q1, q2 and -// qmax can easily become parameters if we find another user. -class flush_cpu_controller { - static constexpr float hard_dirty_limit = 0.50; - static constexpr float q1 = 0.01; - static constexpr float q2 = 0.2; - static constexpr float qmax = 1; - - float _current_quota = 0.0f; - float _goal; - std::function _current_dirty; - std::chrono::milliseconds _interval; - timer<> _update_timer; - - seastar::thread_scheduling_group _scheduling_group; - seastar::thread_scheduling_group *_current_scheduling_group = nullptr; - - void adjust(); -public: - seastar::thread_scheduling_group* scheduling_group() { - return _current_scheduling_group; - } - float current_quota() const { - return _current_quota; - } - - struct disabled { - seastar::thread_scheduling_group *backup; - }; - flush_cpu_controller(disabled d) : _scheduling_group(std::chrono::nanoseconds(0), 0), _current_scheduling_group(d.backup) {} - flush_cpu_controller(std::chrono::milliseconds interval, float soft_limit, std::function current_dirty); - flush_cpu_controller(flush_cpu_controller&&) = default; -}; - - diff --git a/scylla/cql3/Cql.g b/scylla/cql3/Cql.g deleted file mode 100644 index 1694893..0000000 --- a/scylla/cql3/Cql.g +++ /dev/null @@ -1,1831 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -grammar Cql; - -options { - language = Cpp; -} - -@parser::namespace{cql3_parser} - -@lexer::includes { -#include "cql3/error_collector.hh" -#include "cql3/error_listener.hh" -} - -@parser::includes { -#include "cql3/selection/writetime_or_ttl.hh" -#include "cql3/statements/raw/parsed_statement.hh" -#include "cql3/statements/raw/select_statement.hh" -#include "cql3/statements/alter_keyspace_statement.hh" -#include "cql3/statements/alter_table_statement.hh" -#include "cql3/statements/alter_view_statement.hh" -#include "cql3/statements/create_keyspace_statement.hh" -#include "cql3/statements/drop_keyspace_statement.hh" -#include "cql3/statements/create_index_statement.hh" -#include "cql3/statements/create_table_statement.hh" -#include "cql3/statements/create_view_statement.hh" -#include "cql3/statements/create_type_statement.hh" -#include "cql3/statements/drop_type_statement.hh" -#include "cql3/statements/alter_type_statement.hh" -#include "cql3/statements/property_definitions.hh" -#include "cql3/statements/drop_index_statement.hh" -#include "cql3/statements/drop_table_statement.hh" -#include "cql3/statements/drop_view_statement.hh" -#include "cql3/statements/truncate_statement.hh" -#include "cql3/statements/raw/update_statement.hh" -#include "cql3/statements/raw/insert_statement.hh" -#include "cql3/statements/raw/delete_statement.hh" -#include "cql3/statements/index_prop_defs.hh" -#include "cql3/statements/raw/use_statement.hh" -#include "cql3/statements/raw/batch_statement.hh" -#include "cql3/statements/create_user_statement.hh" -#include "cql3/statements/alter_user_statement.hh" -#include "cql3/statements/drop_user_statement.hh" -#include "cql3/statements/list_users_statement.hh" -#include "cql3/statements/grant_statement.hh" -#include "cql3/statements/revoke_statement.hh" -#include "cql3/statements/list_permissions_statement.hh" -#include "cql3/statements/index_target.hh" -#include "cql3/statements/ks_prop_defs.hh" -#include "cql3/selection/raw_selector.hh" -#include "cql3/keyspace_element_name.hh" -#include "cql3/selection/selectable_with_field_selection.hh" -#include "cql3/constants.hh" -#include "cql3/operation_impl.hh" -#include "cql3/error_listener.hh" -#include "cql3/multi_column_relation.hh" -#include "cql3/single_column_relation.hh" -#include "cql3/token_relation.hh" -#include "cql3/index_name.hh" -#include "cql3/cql3_type.hh" -#include "cql3/cf_name.hh" -#include "cql3/maps.hh" -#include "cql3/sets.hh" -#include "cql3/lists.hh" -#include "cql3/type_cast.hh" -#include "cql3/tuples.hh" -#include "cql3/user_types.hh" -#include "cql3/ut_name.hh" -#include "cql3/functions/function_name.hh" -#include "cql3/functions/function_call.hh" -#include "core/sstring.hh" -#include "CqlLexer.hpp" - -#include -#include -} - -@parser::traits { -using namespace cql3::statements; -using namespace cql3::selection; -using cql3::cql3_type; -using conditions_type = std::vector,::shared_ptr>>; -using operations_type = std::vector,::shared_ptr>>; - -// ANTLR forces us to define a default-initialized return value -// for every rule (e.g. [returns ut_name name]), but not every type -// can be naturally zero-initialized. -// -// The uninitialized wrapper can be zero-initialized, and is convertible -// to T (after checking that it was assigned to) implicitly, eliminating the -// problem. It is up to the user to ensure it is actually assigned to. -template -struct uninitialized { - std::experimental::optional _val; - uninitialized() = default; - uninitialized(const uninitialized&) = default; - uninitialized(uninitialized&&) = default; - uninitialized(const T& val) : _val(val) {} - uninitialized(T&& val) : _val(std::move(val)) {} - uninitialized& operator=(const uninitialized&) = default; - uninitialized& operator=(uninitialized&&) = default; - operator const T&() const & { return check(), *_val; } - operator T&&() && { return check(), std::move(*_val); } - void check() const { if (!_val) { throw std::runtime_error("not intitialized"); } } -}; - -} - -@context { - using collector_type = cql3::error_collector; - using listener_type = cql3::error_listener; - - listener_type* listener; - - std::vector<::shared_ptr> _bind_variables; - std::vector> _missing_tokens; - - // Can't use static variable, since it needs to be defined out-of-line - static const std::unordered_set& _reserved_type_names() { - static std::unordered_set s = { - "byte", - "smallint", - "complex", - "enum", - "date", - "interval", - "macaddr", - "bitstring", - }; - return s; - } - - shared_ptr new_bind_variables(shared_ptr name) - { - auto marker = make_shared(_bind_variables.size()); - _bind_variables.push_back(name); - return marker; - } - - shared_ptr new_in_bind_variables(shared_ptr name) { - auto marker = make_shared(_bind_variables.size()); - _bind_variables.push_back(std::move(name)); - return marker; - } - - shared_ptr new_tuple_bind_variables(shared_ptr name) - { - auto marker = make_shared(_bind_variables.size()); - _bind_variables.push_back(std::move(name)); - return marker; - } - - shared_ptr new_tuple_in_bind_variables(shared_ptr name) - { - auto marker = make_shared(_bind_variables.size()); - _bind_variables.push_back(std::move(name)); - return marker; - } - - void set_error_listener(listener_type& listener) { - this->listener = &listener; - } - - void displayRecognitionError(ANTLR_UINT8** token_names, ExceptionBaseType* ex) - { - listener->syntax_error(*this, token_names, ex); - } - - void add_recognition_error(const sstring& msg) { - listener->syntax_error(*this, msg); - } - - bool is_eof_token(CommonTokenType token) const - { - return token == CommonTokenType::TOKEN_EOF; - } - - std::string token_text(const TokenType* token) - { - if (!token) { - return ""; - } - return token->getText(); - } - - std::map convert_property_map(shared_ptr map) { - if (!map || map->entries.empty()) { - return std::map{}; - } - std::map res; - for (auto&& entry : map->entries) { - // Because the parser tries to be smart and recover on error (to - // allow displaying more than one error I suppose), we have null - // entries in there. Just skip those, a proper error will be thrown in the end. - if (!entry.first || !entry.second) { - break; - } - auto left = dynamic_pointer_cast(entry.first); - if (!left) { - sstring msg = "Invalid property name: " + entry.first->to_string(); - if (dynamic_pointer_cast(entry.first)) { - msg += " (bind variables are not supported in DDL queries)"; - } - add_recognition_error(msg); - break; - } - auto right = dynamic_pointer_cast(entry.second); - if (!right) { - sstring msg = "Invalid property value: " + entry.first->to_string() + " for property: " + entry.second->to_string(); - if (dynamic_pointer_cast(entry.second)) { - msg += " (bind variables are not supported in DDL queries)"; - } - add_recognition_error(msg); - break; - } - res.emplace(left->get_raw_text(), right->get_raw_text()); - } - return res; - } - - void add_raw_update(std::vector,::shared_ptr>>& operations, - ::shared_ptr key, ::shared_ptr update) - { - for (auto&& p : operations) { - if (*p.first == *key && !p.second->is_compatible_with(update)) { - // \%s is escaped for antlr - add_recognition_error(sprint("Multiple incompatible setting of column \%s", *key)); - } - } - operations.emplace_back(std::move(key), std::move(update)); - } - - TokenType* getMissingSymbol(IntStreamType* istream, ExceptionBaseType* e, - ANTLR_UINT32 expectedTokenType, BitsetListType* follow) { - auto token = BaseType::getMissingSymbol(istream, e, expectedTokenType, follow); - _missing_tokens.emplace_back(token); - return token; - } -} - -@lexer::namespace{cql3_parser} - -@lexer::traits { - class CqlLexer; - class CqlParser; - typedef antlr3::Traits CqlLexerTraits; - typedef CqlLexerTraits CqlParserTraits; -} - -@lexer::header { -#pragma GCC diagnostic ignored "-Wunused-variable" -#pragma GCC diagnostic ignored "-Wunused-function" -} - -@lexer::context { - using collector_type = cql3::error_collector; - using listener_type = cql3::error_listener; - - listener_type* listener; - - void set_error_listener(listener_type& listener) { - this->listener = &listener; - } - - void displayRecognitionError(ANTLR_UINT8** token_names, ExceptionBaseType* ex) - { - listener->syntax_error(*this, token_names, ex); - } - - bool is_eof_token(CommonTokenType token) const - { - return token == CommonTokenType::TOKEN_EOF; - } - - std::string token_text(const TokenType* token) const - { - if (!token) { - return ""; - } - return std::to_string(int(*token)); - } -} - -/** STATEMENTS **/ - -query returns [shared_ptr stmnt] - : st=cqlStatement (';')* EOF { $stmnt = st; } - ; - -cqlStatement returns [shared_ptr stmt] - @after{ if (stmt) { stmt->set_bound_variables(_bind_variables); } } - : st1= selectStatement { $stmt = st1; } - | st2= insertStatement { $stmt = st2; } - | st3= updateStatement { $stmt = st3; } - | st4= batchStatement { $stmt = st4; } - | st5= deleteStatement { $stmt = st5; } - | st6= useStatement { $stmt = st6; } - | st7= truncateStatement { $stmt = st7; } - | st8= createKeyspaceStatement { $stmt = st8; } - | st9= createTableStatement { $stmt = st9; } - | st10=createIndexStatement { $stmt = st10; } - | st11=dropKeyspaceStatement { $stmt = st11; } - | st12=dropTableStatement { $stmt = st12; } - | st13=dropIndexStatement { $stmt = st13; } - | st14=alterTableStatement { $stmt = st14; } - | st15=alterKeyspaceStatement { $stmt = st15; } - | st16=grantStatement { $stmt = st16; } - | st17=revokeStatement { $stmt = st17; } - | st18=listPermissionsStatement { $stmt = st18; } - | st19=createUserStatement { $stmt = st19; } - | st20=alterUserStatement { $stmt = st20; } - | st21=dropUserStatement { $stmt = st21; } - | st22=listUsersStatement { $stmt = st22; } -#if 0 - | st23=createTriggerStatement { $stmt = st23; } - | st24=dropTriggerStatement { $stmt = st24; } -#endif - | st25=createTypeStatement { $stmt = st25; } - | st26=alterTypeStatement { $stmt = st26; } - | st27=dropTypeStatement { $stmt = st27; } -#if 0 - | st28=createFunctionStatement { $stmt = st28; } - | st29=dropFunctionStatement { $stmt = st29; } - | st30=createAggregateStatement { $stmt = st30; } - | st31=dropAggregateStatement { $stmt = st31; } -#endif - | st32=createViewStatement { $stmt = st32; } - | st33=alterViewStatement { $stmt = st33; } - | st34=dropViewStatement { $stmt = st34; } - ; - -/* - * USE ; - */ -useStatement returns [::shared_ptr stmt] - : K_USE ks=keyspaceName { $stmt = ::make_shared(ks); } - ; - -/** - * SELECT - * FROM - * WHERE KEY = "key1" AND COL > 1 AND COL < 100 - * LIMIT ; - */ -selectStatement returns [shared_ptr expr] - @init { - bool is_distinct = false; - ::shared_ptr limit; - raw::select_statement::parameters::orderings_type orderings; - bool allow_filtering = false; - } - : K_SELECT ( ( K_DISTINCT { is_distinct = true; } )? - sclause=selectClause - | sclause=selectCountClause - ) - K_FROM cf=columnFamilyName - ( K_WHERE wclause=whereClause )? - ( K_ORDER K_BY orderByClause[orderings] ( ',' orderByClause[orderings] )* )? - ( K_LIMIT rows=intValue { limit = rows; } )? - ( K_ALLOW K_FILTERING { allow_filtering = true; } )? - { - auto params = ::make_shared(std::move(orderings), is_distinct, allow_filtering); - $expr = ::make_shared(std::move(cf), std::move(params), - std::move(sclause), std::move(wclause), std::move(limit)); - } - ; - -selectClause returns [std::vector> expr] - : t1=selector { $expr.push_back(t1); } (',' tN=selector { $expr.push_back(tN); })* - | '*' { } - ; - -selector returns [shared_ptr s] - @init{ shared_ptr alias; } - : us=unaliasedSelector (K_AS c=ident { alias = c; })? { $s = make_shared(us, alias); } - ; - -unaliasedSelector returns [shared_ptr s] - @init { shared_ptr tmp; } - : ( c=cident { tmp = c; } - | K_WRITETIME '(' c=cident ')' { tmp = make_shared(c, true); } - | K_TTL '(' c=cident ')' { tmp = make_shared(c, false); } - | f=functionName args=selectionFunctionArgs { tmp = ::make_shared(std::move(f), std::move(args)); } - ) - ( '.' fi=cident { tmp = make_shared(std::move(tmp), std::move(fi)); } )* - { $s = tmp; } - ; - -selectionFunctionArgs returns [std::vector> a] - : '(' ')' - | '(' s1=unaliasedSelector { a.push_back(std::move(s1)); } - ( ',' sn=unaliasedSelector { a.push_back(std::move(sn)); } )* - ')' - ; - -selectCountClause returns [std::vector> expr] - @init{ auto alias = make_shared("count", false); } - : K_COUNT '(' countArgument ')' (K_AS c=ident { alias = c; })? { - auto&& with_fn = ::make_shared( - cql3::functions::function_name::native_function("countRows"), - std::vector>()); - $expr.push_back(make_shared(with_fn, alias)); - } - ; - -countArgument - : '*' - | i=INTEGER { if (i->getText() != "1") { - add_recognition_error("Only COUNT(1) is supported, got COUNT(" + i->getText() + ")"); - } } - ; - -whereClause returns [std::vector clause] - : relation[$clause] (K_AND relation[$clause])* - ; - -orderByClause[raw::select_statement::parameters::orderings_type& orderings] - @init{ - bool reversed = false; - } - : c=cident (K_ASC | K_DESC { reversed = true; })? { orderings.emplace_back(c, reversed); } - ; - -/** - * INSERT INTO (, , , ...) - * VALUES (, , , ...) - * USING TIMESTAMP ; - * - */ -insertStatement returns [::shared_ptr expr] - @init { - auto attrs = ::make_shared(); - std::vector<::shared_ptr> column_names; - std::vector<::shared_ptr> values; - bool if_not_exists = false; - } - : K_INSERT K_INTO cf=columnFamilyName - '(' c1=cident { column_names.push_back(c1); } ( ',' cn=cident { column_names.push_back(cn); } )* ')' - K_VALUES - '(' v1=term { values.push_back(v1); } ( ',' vn=term { values.push_back(vn); } )* ')' - - ( K_IF K_NOT K_EXISTS { if_not_exists = true; } )? - ( usingClause[attrs] )? - { - $expr = ::make_shared(std::move(cf), - std::move(attrs), - std::move(column_names), - std::move(values), - if_not_exists); - } - ; - -usingClause[::shared_ptr attrs] - : K_USING usingClauseObjective[attrs] ( K_AND usingClauseObjective[attrs] )* - ; - -usingClauseObjective[::shared_ptr attrs] - : K_TIMESTAMP ts=intValue { attrs->timestamp = ts; } - | K_TTL t=intValue { attrs->time_to_live = t; } - ; - -/** - * UPDATE - * USING TIMESTAMP - * SET name1 = value1, name2 = value2 - * WHERE key = value; - */ -updateStatement returns [::shared_ptr expr] - @init { - auto attrs = ::make_shared(); - std::vector, ::shared_ptr>> operations; - } - : K_UPDATE cf=columnFamilyName - ( usingClause[attrs] )? - K_SET columnOperation[operations] (',' columnOperation[operations])* - K_WHERE wclause=whereClause - ( K_IF conditions=updateConditions )? - { - return ::make_shared(std::move(cf), - std::move(attrs), - std::move(operations), - std::move(wclause), - std::move(conditions)); - } - ; - -updateConditions returns [conditions_type conditions] - : columnCondition[conditions] ( K_AND columnCondition[conditions] )* - ; - -/** - * DELETE name1, name2 - * FROM - * USING TIMESTAMP - * WHERE KEY = keyname - [IF (EXISTS | name = value, ...)]; - */ -deleteStatement returns [::shared_ptr expr] - @init { - auto attrs = ::make_shared(); - std::vector<::shared_ptr> column_deletions; - bool if_exists = false; - } - : K_DELETE ( dels=deleteSelection { column_deletions = std::move(dels); } )? - K_FROM cf=columnFamilyName - ( usingClauseDelete[attrs] )? - K_WHERE wclause=whereClause - ( K_IF ( K_EXISTS { if_exists = true; } | conditions=updateConditions ))? - { - return ::make_shared(cf, - std::move(attrs), - std::move(column_deletions), - std::move(wclause), - std::move(conditions), - if_exists); - } - ; - -deleteSelection returns [std::vector<::shared_ptr> operations] - : t1=deleteOp { $operations.emplace_back(std::move(t1)); } - (',' tN=deleteOp { $operations.emplace_back(std::move(tN)); })* - ; - -deleteOp returns [::shared_ptr op] - : c=cident { $op = ::make_shared(std::move(c)); } - | c=cident '[' t=term ']' { $op = ::make_shared(std::move(c), std::move(t)); } - ; - -usingClauseDelete[::shared_ptr attrs] - : K_USING K_TIMESTAMP ts=intValue { attrs->timestamp = ts; } - ; - -/** - * BEGIN BATCH - * UPDATE SET name1 = value1 WHERE KEY = keyname1; - * UPDATE SET name2 = value2 WHERE KEY = keyname2; - * UPDATE SET name3 = value3 WHERE KEY = keyname3; - * ... - * APPLY BATCH - * - * OR - * - * BEGIN BATCH - * INSERT INTO (KEY, ) VALUES ('', ''); - * INSERT INTO (KEY, ) VALUES ('', ''); - * ... - * APPLY BATCH - * - * OR - * - * BEGIN BATCH - * DELETE name1, name2 FROM WHERE key = - * DELETE name3, name4 FROM WHERE key = - * ... - * APPLY BATCH - */ -batchStatement returns [shared_ptr expr] - @init { - using btype = cql3::statements::raw::batch_statement::type; - btype type = btype::LOGGED; - std::vector> statements; - auto attrs = make_shared(); - } - : K_BEGIN - ( K_UNLOGGED { type = btype::UNLOGGED; } | K_COUNTER { type = btype::COUNTER; } )? - K_BATCH ( usingClause[attrs] )? - ( s=batchStatementObjective ';'? { statements.push_back(std::move(s)); } )* - K_APPLY K_BATCH - { - $expr = ::make_shared(type, std::move(attrs), std::move(statements)); - } - ; - -batchStatementObjective returns [shared_ptr statement] - : i=insertStatement { $statement = i; } - | u=updateStatement { $statement = u; } - | d=deleteStatement { $statement = d; } - ; - -#if 0 -createAggregateStatement returns [CreateAggregateStatement expr] - @init { - boolean orReplace = false; - boolean ifNotExists = false; - - List argsTypes = new ArrayList<>(); - } - : K_CREATE (K_OR K_REPLACE { orReplace = true; })? - K_AGGREGATE - (K_IF K_NOT K_EXISTS { ifNotExists = true; })? - fn=functionName - '(' - ( - v=comparatorType { argsTypes.add(v); } - ( ',' v=comparatorType { argsTypes.add(v); } )* - )? - ')' - K_SFUNC sfunc = allowedFunctionName - K_STYPE stype = comparatorType - ( - K_FINALFUNC ffunc = allowedFunctionName - )? - ( - K_INITCOND ival = term - )? - { $expr = new CreateAggregateStatement(fn, argsTypes, sfunc, stype, ffunc, ival, orReplace, ifNotExists); } - ; - -dropAggregateStatement returns [DropAggregateStatement expr] - @init { - boolean ifExists = false; - List argsTypes = new ArrayList<>(); - boolean argsPresent = false; - } - : K_DROP K_AGGREGATE - (K_IF K_EXISTS { ifExists = true; } )? - fn=functionName - ( - '(' - ( - v=comparatorType { argsTypes.add(v); } - ( ',' v=comparatorType { argsTypes.add(v); } )* - )? - ')' - { argsPresent = true; } - )? - { $expr = new DropAggregateStatement(fn, argsTypes, argsPresent, ifExists); } - ; - -createFunctionStatement returns [CreateFunctionStatement expr] - @init { - boolean orReplace = false; - boolean ifNotExists = false; - - boolean deterministic = true; - List argsNames = new ArrayList<>(); - List argsTypes = new ArrayList<>(); - } - : K_CREATE (K_OR K_REPLACE { orReplace = true; })? - ((K_NON { deterministic = false; })? K_DETERMINISTIC)? - K_FUNCTION - (K_IF K_NOT K_EXISTS { ifNotExists = true; })? - fn=functionName - '(' - ( - k=ident v=comparatorType { argsNames.add(k); argsTypes.add(v); } - ( ',' k=ident v=comparatorType { argsNames.add(k); argsTypes.add(v); } )* - )? - ')' - K_RETURNS rt = comparatorType - K_LANGUAGE language = IDENT - K_AS body = STRING_LITERAL - { $expr = new CreateFunctionStatement(fn, $language.text.toLowerCase(), $body.text, deterministic, argsNames, argsTypes, rt, orReplace, ifNotExists); } - ; - -dropFunctionStatement returns [DropFunctionStatement expr] - @init { - boolean ifExists = false; - List argsTypes = new ArrayList<>(); - boolean argsPresent = false; - } - : K_DROP K_FUNCTION - (K_IF K_EXISTS { ifExists = true; } )? - fn=functionName - ( - '(' - ( - v=comparatorType { argsTypes.add(v); } - ( ',' v=comparatorType { argsTypes.add(v); } )* - )? - ')' - { argsPresent = true; } - )? - { $expr = new DropFunctionStatement(fn, argsTypes, argsPresent, ifExists); } - ; -#endif - -/** - * CREATE KEYSPACE [IF NOT EXISTS] WITH attr1 = value1 AND attr2 = value2; - */ -createKeyspaceStatement returns [shared_ptr expr] - @init { - auto attrs = make_shared(); - bool if_not_exists = false; - } - : K_CREATE K_KEYSPACE (K_IF K_NOT K_EXISTS { if_not_exists = true; } )? ks=keyspaceName - K_WITH properties[attrs] { $expr = make_shared(ks, attrs, if_not_exists); } - ; - -/** - * CREATE COLUMNFAMILY [IF NOT EXISTS] ( - * , - * , - * - * ) WITH = AND ...; - */ -createTableStatement returns [shared_ptr expr] - @init { bool if_not_exists = false; } - : K_CREATE K_COLUMNFAMILY (K_IF K_NOT K_EXISTS { if_not_exists = true; } )? - cf=columnFamilyName { $expr = make_shared(cf, if_not_exists); } - cfamDefinition[expr] - ; - -cfamDefinition[shared_ptr expr] - : '(' cfamColumns[expr] ( ',' cfamColumns[expr]? )* ')' - ( K_WITH cfamProperty[$expr->properties()] ( K_AND cfamProperty[$expr->properties()] )*)? - ; - -cfamColumns[shared_ptr expr] - @init { bool is_static=false; } - : k=ident v=comparatorType (K_STATIC {is_static = true;})? { $expr->add_definition(k, v, is_static); } - (K_PRIMARY K_KEY { $expr->add_key_aliases(std::vector>{k}); })? - | K_PRIMARY K_KEY '(' pkDef[expr] (',' c=ident { $expr->add_column_alias(c); } )* ')' - ; - -pkDef[shared_ptr expr] - @init { std::vector> l; } - : k=ident { $expr->add_key_aliases(std::vector>{k}); } - | '(' k1=ident { l.push_back(k1); } ( ',' kn=ident { l.push_back(kn); } )* ')' { $expr->add_key_aliases(l); } - ; - -cfamProperty[cql3::statements::cf_properties& expr] - : property[$expr.properties()] - | K_COMPACT K_STORAGE { $expr.set_compact_storage(); } - | K_CLUSTERING K_ORDER K_BY '(' cfamOrdering[expr] (',' cfamOrdering[expr])* ')' - ; - -cfamOrdering[cql3::statements::cf_properties& expr] - @init{ bool reversed=false; } - : k=ident (K_ASC | K_DESC { reversed=true;} ) { $expr.set_ordering(k, reversed); } - ; - - -/** - * CREATE TYPE foo ( - * , - * , - * .... - * ) - */ -createTypeStatement returns [::shared_ptr expr] - @init { bool if_not_exists = false; } - : K_CREATE K_TYPE (K_IF K_NOT K_EXISTS { if_not_exists = true; } )? - tn=userTypeName { $expr = ::make_shared(tn, if_not_exists); } - '(' typeColumns[expr] ( ',' typeColumns[expr]? )* ')' - ; - -typeColumns[::shared_ptr expr] - : k=ident v=comparatorType { $expr->add_definition(k, v); } - ; - - -/** - * CREATE INDEX [IF NOT EXISTS] [indexName] ON (); - * CREATE CUSTOM INDEX [IF NOT EXISTS] [indexName] ON () USING ; - */ -createIndexStatement returns [::shared_ptr expr] - @init { - auto props = make_shared(); - bool if_not_exists = false; - auto name = ::make_shared(); - std::vector<::shared_ptr> targets; - } - : K_CREATE (K_CUSTOM { props->is_custom = true; })? K_INDEX (K_IF K_NOT K_EXISTS { if_not_exists = true; } )? - (idxName[name])? K_ON cf=columnFamilyName '(' (target1=indexIdent { targets.emplace_back(target1); } (',' target2=indexIdent { targets.emplace_back(target2); } )*)? ')' - (K_USING cls=STRING_LITERAL { props->custom_class = sstring{$cls.text}; })? - (K_WITH properties[props])? - { $expr = ::make_shared(cf, name, targets, props, if_not_exists); } - ; - -indexIdent returns [::shared_ptr id] - : c=cident { $id = index_target::raw::values_of(c); } - | K_KEYS '(' c=cident ')' { $id = index_target::raw::keys_of(c); } - | K_ENTRIES '(' c=cident ')' { $id = index_target::raw::keys_and_values_of(c); } - | K_FULL '(' c=cident ')' { $id = index_target::raw::full_collection(c); } - ; - -/** - * CREATE MATERIALIZED VIEW AS - * SELECT - * FROM - * WHERE IS NOT NULL - * PRIMARY KEY () - * WITH = AND ...; - */ -createViewStatement returns [::shared_ptr expr] - @init { - bool if_not_exists = false; - std::vector<::shared_ptr> partition_keys; - std::vector<::shared_ptr> composite_keys; - } - : K_CREATE K_MATERIALIZED K_VIEW (K_IF K_NOT K_EXISTS { if_not_exists = true; })? cf=columnFamilyName K_AS - K_SELECT sclause=selectClause K_FROM basecf=columnFamilyName - (K_WHERE wclause=whereClause)? - K_PRIMARY K_KEY ( - '(' '(' k1=cident { partition_keys.push_back(k1); } ( ',' kn=cident { partition_keys.push_back(kn); } )* ')' ( ',' c1=cident { composite_keys.push_back(c1); } )* ')' - | '(' k1=cident { partition_keys.push_back(k1); } ( ',' cn=cident { composite_keys.push_back(cn); } )* ')' - ) - { - $expr = ::make_shared( - std::move(cf), - std::move(basecf), - std::move(sclause), - std::move(wclause), - std::move(partition_keys), - std::move(composite_keys), - if_not_exists); - } - ( K_WITH cfamProperty[{ $expr->properties() }] ( K_AND cfamProperty[{ $expr->properties() }] )*)? - ; - -#if 0 -/** - * CREATE TRIGGER triggerName ON columnFamily USING 'triggerClass'; - */ -createTriggerStatement returns [CreateTriggerStatement expr] - @init { - boolean ifNotExists = false; - } - : K_CREATE K_TRIGGER (K_IF K_NOT K_EXISTS { ifNotExists = true; } )? (name=cident) - K_ON cf=columnFamilyName K_USING cls=STRING_LITERAL - { $expr = new CreateTriggerStatement(cf, name.toString(), $cls.text, ifNotExists); } - ; - -/** - * DROP TRIGGER [IF EXISTS] triggerName ON columnFamily; - */ -dropTriggerStatement returns [DropTriggerStatement expr] - @init { boolean ifExists = false; } - : K_DROP K_TRIGGER (K_IF K_EXISTS { ifExists = true; } )? (name=cident) K_ON cf=columnFamilyName - { $expr = new DropTriggerStatement(cf, name.toString(), ifExists); } - ; - -#endif - -/** - * ALTER KEYSPACE WITH = ; - */ -alterKeyspaceStatement returns [shared_ptr expr] - @init { - auto attrs = make_shared(); - } - : K_ALTER K_KEYSPACE ks=keyspaceName - K_WITH properties[attrs] { $expr = make_shared(ks, attrs); } - ; - -/** - * ALTER COLUMN FAMILY ALTER TYPE ; - * ALTER COLUMN FAMILY ADD ; - * ALTER COLUMN FAMILY DROP ; - * ALTER COLUMN FAMILY WITH = ; - * ALTER COLUMN FAMILY RENAME TO ; - */ -alterTableStatement returns [shared_ptr expr] - @init { - alter_table_statement::type type; - auto props = make_shared(); - std::vector, shared_ptr>> renames; - bool is_static = false; - } - : K_ALTER K_COLUMNFAMILY cf=columnFamilyName - ( K_ALTER id=cident K_TYPE v=comparatorType { type = alter_table_statement::type::alter; } - | K_ADD id=cident v=comparatorType ({ is_static=true; } K_STATIC)? { type = alter_table_statement::type::add; } - | K_DROP id=cident { type = alter_table_statement::type::drop; } - | K_WITH properties[props] { type = alter_table_statement::type::opts; } - | K_RENAME { type = alter_table_statement::type::rename; } - id1=cident K_TO toId1=cident { renames.emplace_back(id1, toId1); } - ( K_AND idn=cident K_TO toIdn=cident { renames.emplace_back(idn, toIdn); } )* - ) - { - $expr = ::make_shared(std::move(cf), type, std::move(id), - std::move(v), std::move(props), std::move(renames), is_static); - } - ; - -/** - * ALTER TYPE ALTER TYPE ; - * ALTER TYPE ADD ; - * ALTER TYPE RENAME TO AND ...; - */ -alterTypeStatement returns [::shared_ptr expr] - : K_ALTER K_TYPE name=userTypeName - ( K_ALTER f=ident K_TYPE v=comparatorType { $expr = ::make_shared(name, false, f, v); } - | K_ADD f=ident v=comparatorType { $expr = ::make_shared(name, true, f, v); } - | K_RENAME - { $expr = ::make_shared(name); } - renames[{ static_pointer_cast($expr) }] - ) - ; - -/** - * ALTER MATERIALIZED VIEW WITH = ; - */ -alterViewStatement returns [::shared_ptr expr] - @init { - auto props = make_shared(); - } - : K_ALTER K_MATERIALIZED K_VIEW cf=columnFamilyName K_WITH properties[props] - { - $expr = ::make_shared(std::move(cf), std::move(props)); - } - ; - -renames[::shared_ptr expr] - : fromId=ident K_TO toId=ident { $expr->add_rename(fromId, toId); } - ( K_AND renames[$expr] )? - ; - -/** - * DROP KEYSPACE [IF EXISTS] ; - */ -dropKeyspaceStatement returns [::shared_ptr ksp] - @init { bool if_exists = false; } - : K_DROP K_KEYSPACE (K_IF K_EXISTS { if_exists = true; } )? ks=keyspaceName { $ksp = ::make_shared(ks, if_exists); } - ; - -/** - * DROP COLUMNFAMILY [IF EXISTS] ; - */ -dropTableStatement returns [::shared_ptr stmt] - @init { bool if_exists = false; } - : K_DROP K_COLUMNFAMILY (K_IF K_EXISTS { if_exists = true; } )? cf=columnFamilyName { $stmt = ::make_shared(cf, if_exists); } - ; - -/** - * DROP TYPE ; - */ -dropTypeStatement returns [::shared_ptr stmt] - @init { bool if_exists = false; } - : K_DROP K_TYPE (K_IF K_EXISTS { if_exists = true; } )? name=userTypeName { $stmt = ::make_shared(name, if_exists); } - ; - -/** - * DROP MATERIALIZED VIEW [IF EXISTS] - */ -dropViewStatement returns [::shared_ptr stmt] - @init { bool if_exists = false; } - : K_DROP K_MATERIALIZED K_VIEW (K_IF K_EXISTS { if_exists = true; } )? cf=columnFamilyName - { $stmt = ::make_shared(cf, if_exists); } - ; - -/** - * DROP INDEX [IF EXISTS] - */ -dropIndexStatement returns [::shared_ptr expr] - @init { bool if_exists = false; } - : K_DROP K_INDEX (K_IF K_EXISTS { if_exists = true; } )? index=indexName - { $expr = ::make_shared(index, if_exists); } - ; - -/** - * TRUNCATE ; - */ -truncateStatement returns [::shared_ptr stmt] - : K_TRUNCATE (K_COLUMNFAMILY)? cf=columnFamilyName { $stmt = ::make_shared(cf); } - ; - -/** - * GRANT ON TO - */ -grantStatement returns [::shared_ptr stmt] - : K_GRANT - permissionOrAll - K_ON - resource - K_TO - username - { $stmt = ::make_shared($permissionOrAll.perms, $resource.res, $username.text); } - ; - -/** - * REVOKE ON FROM - */ -revokeStatement returns [::shared_ptr stmt] - : K_REVOKE - permissionOrAll - K_ON - resource - K_FROM - username - { $stmt = ::make_shared($permissionOrAll.perms, $resource.res, $username.text); } - ; - -listPermissionsStatement returns [::shared_ptr stmt] - @init { - std::experimental::optional r; - std::experimental::optional u; - bool recursive = true; - } - : K_LIST - permissionOrAll - ( K_ON resource { r = $resource.res; } )? - ( K_OF username { u = sstring($username.text); } )? - ( K_NORECURSIVE { recursive = false; } )? - { $stmt = ::make_shared($permissionOrAll.perms, std::move(r), std::move(u), recursive); } - ; - -permission returns [auth::permission perm] - : p=(K_CREATE | K_ALTER | K_DROP | K_SELECT | K_MODIFY | K_AUTHORIZE) - { $perm = auth::permissions::from_string($p.text); } - ; - -permissionOrAll returns [auth::permission_set perms] - : K_ALL ( K_PERMISSIONS )? { $perms = auth::permissions::ALL_DATA; } - | p=permission ( K_PERMISSION )? { $perms = auth::permission_set::from_mask(auth::permission_set::mask_for($p.perm)); } - ; - -resource returns [auth::data_resource res] - : r=dataResource { $res = $r.res; } - ; - -dataResource returns [auth::data_resource res] - : K_ALL K_KEYSPACES { $res = auth::data_resource(); } - | K_KEYSPACE ks = keyspaceName { $res = auth::data_resource($ks.id); } - | ( K_COLUMNFAMILY )? cf = columnFamilyName - { $res = auth::data_resource($cf.name->get_keyspace(), $cf.name->get_column_family()); } - ; - -/** - * CREATE USER [IF NOT EXISTS] [WITH PASSWORD ] [SUPERUSER|NOSUPERUSER] - */ -createUserStatement returns [::shared_ptr stmt] - @init { - auto opts = ::make_shared(); - bool superuser = false; - bool ifNotExists = false; - } - : K_CREATE K_USER (K_IF K_NOT K_EXISTS { ifNotExists = true; })? username - ( K_WITH userOptions[opts] )? - ( K_SUPERUSER { superuser = true; } | K_NOSUPERUSER { superuser = false; } )? - { $stmt = ::make_shared($username.text, std::move(opts), superuser, ifNotExists); } - ; - -/** - * ALTER USER [WITH PASSWORD ] [SUPERUSER|NOSUPERUSER] - */ -alterUserStatement returns [::shared_ptr stmt] - @init { - auto opts = ::make_shared(); - std::experimental::optional superuser; - } - : K_ALTER K_USER username - ( K_WITH userOptions[opts] )? - ( K_SUPERUSER { superuser = true; } | K_NOSUPERUSER { superuser = false; } )? - { $stmt = ::make_shared($username.text, std::move(opts), std::move(superuser)); } - ; - -/** - * DROP USER [IF EXISTS] - */ -dropUserStatement returns [::shared_ptr stmt] - @init { bool ifExists = false; } - : K_DROP K_USER (K_IF K_EXISTS { ifExists = true; })? username { $stmt = ::make_shared($username.text, ifExists); } - ; - -/** - * LIST USERS - */ -listUsersStatement returns [::shared_ptr stmt] - : K_LIST K_USERS { $stmt = ::make_shared(); } - ; - -userOptions[::shared_ptr opts] - : userOption[opts] - ; - -userOption[::shared_ptr opts] - : k=K_PASSWORD v=STRING_LITERAL { opts->put($k.text, $v.text); } - ; - -/** DEFINITIONS **/ - -// Column Identifiers. These need to be treated differently from other -// identifiers because the underlying comparator is not necessarily text. See -// CASSANDRA-8178 for details. -cident returns [shared_ptr id] - : t=IDENT { $id = make_shared(sstring{$t.text}, false); } - | t=QUOTED_NAME { $id = make_shared(sstring{$t.text}, true); } - | k=unreserved_keyword { $id = make_shared(k, false); } - ; - -// Identifiers that do not refer to columns or where the comparator is known to be text -ident returns [shared_ptr id] - : t=IDENT { $id = make_shared(sstring{$t.text}, false); } - | t=QUOTED_NAME { $id = make_shared(sstring{$t.text}, true); } - | k=unreserved_keyword { $id = make_shared(k, false); } - ; - -// Keyspace & Column family names -keyspaceName returns [sstring id] - @init { auto name = make_shared(); } - : ksName[name] { $id = name->get_keyspace(); } - ; - -indexName returns [::shared_ptr name] - @init { $name = ::make_shared(); } - : (ksName[name] '.')? idxName[name] - ; - -columnFamilyName returns [::shared_ptr name] - @init { $name = ::make_shared(); } - : (ksName[name] '.')? cfName[name] - ; - -userTypeName returns [uninitialized name] - : (ks=ident '.')? ut=non_type_ident { $name = cql3::ut_name(ks, ut); } - ; - -#if 0 -userOrRoleName returns [RoleName name] - @init { $name = new RoleName(); } - : roleName[name] {return $name;} - ; -#endif - -ksName[::shared_ptr name] - : t=IDENT { $name->set_keyspace($t.text, false);} - | t=QUOTED_NAME { $name->set_keyspace($t.text, true);} - | k=unreserved_keyword { $name->set_keyspace(k, false);} - | QMARK {add_recognition_error("Bind variables cannot be used for keyspace names");} - ; - -cfName[::shared_ptr name] - : t=IDENT { $name->set_column_family($t.text, false); } - | t=QUOTED_NAME { $name->set_column_family($t.text, true); } - | k=unreserved_keyword { $name->set_column_family(k, false); } - | QMARK {add_recognition_error("Bind variables cannot be used for table names");} - ; - -idxName[::shared_ptr name] - : t=IDENT { $name->set_index($t.text, false); } - | t=QUOTED_NAME { $name->set_index($t.text, true);} - | k=unreserved_keyword { $name->set_index(k, false); } - | QMARK {add_recognition_error("Bind variables cannot be used for index names");} - ; - -#if 0 -roleName[RoleName name] - : t=IDENT { $name.setName($t.text, false); } - | t=QUOTED_NAME { $name.setName($t.text, true); } - | k=unreserved_keyword { $name.setName(k, false); } - | QMARK {addRecognitionError("Bind variables cannot be used for role names");} - ; -#endif - -constant returns [shared_ptr constant] - @init{std::string sign;} - : t=STRING_LITERAL { $constant = cql3::constants::literal::string(sstring{$t.text}); } - | t=INTEGER { $constant = cql3::constants::literal::integer(sstring{$t.text}); } - | t=FLOAT { $constant = cql3::constants::literal::floating_point(sstring{$t.text}); } - | t=BOOLEAN { $constant = cql3::constants::literal::bool_(sstring{$t.text}); } - | t=UUID { $constant = cql3::constants::literal::uuid(sstring{$t.text}); } - | t=HEXNUMBER { $constant = cql3::constants::literal::hex(sstring{$t.text}); } - | { sign=""; } ('-' {sign = "-"; } )? t=(K_NAN | K_INFINITY) { $constant = cql3::constants::literal::floating_point(sstring{sign + $t.text}); } - ; - -mapLiteral returns [shared_ptr map] - @init{std::vector, ::shared_ptr>> m;} - : '{' { } - ( k1=term ':' v1=term { m.push_back(std::pair, shared_ptr>{k1, v1}); } ( ',' kn=term ':' vn=term { m.push_back(std::pair, shared_ptr>{kn, vn}); } )* )? - '}' { $map = ::make_shared(m); } - ; - -setOrMapLiteral[shared_ptr t] returns [shared_ptr value] - @init{ std::vector, shared_ptr>> m; - std::vector> s; - } - : ':' v=term { m.push_back({t, v}); } - ( ',' kn=term ':' vn=term { m.push_back({kn, vn}); } )* - { $value = ::make_shared(std::move(m)); } - | { s.push_back(t); } - ( ',' tn=term { s.push_back(tn); } )* - { $value = make_shared(cql3::sets::literal(std::move(s))); } - ; - -collectionLiteral returns [shared_ptr value] - @init{ std::vector> l; } - : '[' - ( t1=term { l.push_back(t1); } ( ',' tn=term { l.push_back(tn); } )* )? - ']' { $value = ::make_shared(std::move(l)); } - | '{' t=term v=setOrMapLiteral[t] { $value = v; } '}' - // Note that we have an ambiguity between maps and set for "{}". So we force it to a set literal, - // and deal with it later based on the type of the column (SetLiteral.java). - | '{' '}' { $value = make_shared(cql3::sets::literal({})); } - ; - -usertypeLiteral returns [shared_ptr ut] - @init{ cql3::user_types::literal::elements_map_type m; } - @after{ $ut = ::make_shared(std::move(m)); } - // We don't allow empty literals because that conflicts with sets/maps and is currently useless since we don't allow empty user types - : '{' k1=ident ':' v1=term { m.emplace(std::move(*k1), std::move(v1)); } ( ',' kn=ident ':' vn=term { m.emplace(std::move(*kn), std::move(vn)); } )* '}' - ; - -tupleLiteral returns [shared_ptr tt] - @init{ std::vector> l; } - @after{ $tt = ::make_shared(std::move(l)); } - : '(' t1=term { l.push_back(t1); } ( ',' tn=term { l.push_back(tn); } )* ')' - ; - -value returns [::shared_ptr value] - : c=constant { $value = c; } - | l=collectionLiteral { $value = l; } - | u=usertypeLiteral { $value = u; } - | t=tupleLiteral { $value = t; } - | K_NULL { $value = cql3::constants::NULL_LITERAL; } - | ':' id=ident { $value = new_bind_variables(id); } - | QMARK { $value = new_bind_variables(shared_ptr{}); } - ; - -intValue returns [::shared_ptr value] - : - | t=INTEGER { $value = cql3::constants::literal::integer(sstring{$t.text}); } - | ':' id=ident { $value = new_bind_variables(id); } - | QMARK { $value = new_bind_variables(shared_ptr{}); } - ; - -functionName returns [cql3::functions::function_name s] - : (ks=keyspaceName '.')? f=allowedFunctionName { $s.keyspace = std::move(ks); $s.name = std::move(f); } - ; - -allowedFunctionName returns [sstring s] - : f=IDENT { $s = $f.text; std::transform(s.begin(), s.end(), s.begin(), ::tolower); } - | f=QUOTED_NAME { $s = $f.text; } - | u=unreserved_function_keyword { $s = u; } - | K_TOKEN { $s = "token"; } - | K_COUNT { $s = "count"; } - ; - -functionArgs returns [std::vector> a] - : '(' ')' - | '(' t1=term { a.push_back(std::move(t1)); } - ( ',' tn=term { a.push_back(std::move(tn)); } )* - ')' - ; - -term returns [::shared_ptr term1] - : v=value { $term1 = v; } - | f=functionName args=functionArgs { $term1 = ::make_shared(std::move(f), std::move(args)); } - | '(' c=comparatorType ')' t=term { $term1 = make_shared(c, t); } - ; - -columnOperation[operations_type& operations] - : key=cident columnOperationDifferentiator[operations, key] - ; - -columnOperationDifferentiator[operations_type& operations, ::shared_ptr key] - : '=' normalColumnOperation[operations, key] - | '[' k=term ']' specializedColumnOperation[operations, key, k, false] - | '[' K_SCYLLA_TIMEUUID_LIST_INDEX '(' k=term ')' ']' specializedColumnOperation[operations, key, k, true] - ; - -normalColumnOperation[operations_type& operations, ::shared_ptr key] - : t=term ('+' c=cident )? - { - if (!c) { - add_raw_update(operations, key, ::make_shared(t)); - } else { - if (*key != *c) { - add_recognition_error("Only expressions of the form X = + X are supported."); - } - add_raw_update(operations, key, ::make_shared(t)); - } - } - | c=cident sig=('+' | '-') t=term - { - if (*key != *c) { - add_recognition_error("Only expressions of the form X = X " + $sig.text + " are supported."); - } - shared_ptr op; - if ($sig.text == "+") { - op = make_shared(t); - } else { - op = make_shared(t); - } - add_raw_update(operations, key, std::move(op)); - } - | c=cident i=INTEGER - { - // Note that this production *is* necessary because X = X - 3 will in fact be lexed as [ X, '=', X, INTEGER]. - if (*key != *c) { - // We don't yet allow a '+' in front of an integer, but we could in the future really, so let's be future-proof in our error message - add_recognition_error("Only expressions of the form X = X " + sstring($i.text[0] == '-' ? "-" : "+") + " are supported."); - } - add_raw_update(operations, key, make_shared(cql3::constants::literal::integer($i.text))); - } - | K_SCYLLA_COUNTER_SHARD_LIST '(' t=term ')' - { - add_raw_update(operations, key, ::make_shared(t)); - } - ; - -specializedColumnOperation[std::vector, - shared_ptr>>& operations, - shared_ptr key, - shared_ptr k, - bool by_uuid] - - : '=' t=term - { - add_raw_update(operations, key, make_shared(k, t, by_uuid)); - } - ; - -columnCondition[conditions_type& conditions] - // Note: we'll reject duplicates later - : key=cident - ( op=relationType t=term { conditions.emplace_back(key, cql3::column_condition::raw::simple_condition(t, *op)); } - | K_IN - ( values=singleColumnInValues { conditions.emplace_back(key, cql3::column_condition::raw::simple_in_condition(values)); } - | marker=inMarker { conditions.emplace_back(key, cql3::column_condition::raw::simple_in_condition(marker)); } - ) - | '[' element=term ']' - ( op=relationType t=term { conditions.emplace_back(key, cql3::column_condition::raw::collection_condition(t, element, *op)); } - | K_IN - ( values=singleColumnInValues { conditions.emplace_back(key, cql3::column_condition::raw::collection_in_condition(element, values)); } - | marker=inMarker { conditions.emplace_back(key, cql3::column_condition::raw::collection_in_condition(element, marker)); } - ) - ) - ) - ; - -properties[::shared_ptr props] - : property[props] (K_AND property[props])* - ; - -property[::shared_ptr props] - : k=ident '=' simple=propertyValue { try { $props->add_property(k->to_string(), simple); } catch (exceptions::syntax_exception e) { add_recognition_error(e.what()); } } - | k=ident '=' map=mapLiteral { try { $props->add_property(k->to_string(), convert_property_map(map)); } catch (exceptions::syntax_exception e) { add_recognition_error(e.what()); } } - ; - -propertyValue returns [sstring str] - : c=constant { $str = c->get_raw_text(); } - | u=unreserved_keyword { $str = u; } - ; - -relationType returns [const cql3::operator_type* op = nullptr] - : '=' { $op = &cql3::operator_type::EQ; } - | '<' { $op = &cql3::operator_type::LT; } - | '<=' { $op = &cql3::operator_type::LTE; } - | '>' { $op = &cql3::operator_type::GT; } - | '>=' { $op = &cql3::operator_type::GTE; } - | '!=' { $op = &cql3::operator_type::NEQ; } - ; - -relation[std::vector& clauses] - @init{ const cql3::operator_type* rt = nullptr; } - : name=cident type=relationType t=term { $clauses.emplace_back(::make_shared(std::move(name), *type, std::move(t))); } - - | K_TOKEN l=tupleOfIdentifiers type=relationType t=term - { $clauses.emplace_back(::make_shared(std::move(l), *type, std::move(t))); } - | name=cident K_IS K_NOT K_NULL { - $clauses.emplace_back(make_shared(std::move(name), cql3::operator_type::IS_NOT, cql3::constants::NULL_LITERAL)); } - | name=cident K_IN marker=inMarker - { $clauses.emplace_back(make_shared(std::move(name), cql3::operator_type::IN, std::move(marker))); } - | name=cident K_IN in_values=singleColumnInValues - { $clauses.emplace_back(cql3::single_column_relation::create_in_relation(std::move(name), std::move(in_values))); } - | name=cident K_CONTAINS { rt = &cql3::operator_type::CONTAINS; } (K_KEY { rt = &cql3::operator_type::CONTAINS_KEY; })? - t=term { $clauses.emplace_back(make_shared(std::move(name), *rt, std::move(t))); } - | name=cident '[' key=term ']' type=relationType t=term { $clauses.emplace_back(make_shared(std::move(name), std::move(key), *type, std::move(t))); } - | ids=tupleOfIdentifiers - ( K_IN - ( '(' ')' - { $clauses.emplace_back(cql3::multi_column_relation::create_in_relation(ids, std::vector>())); } - | tupleInMarker=inMarkerForTuple /* (a, b, c) IN ? */ - { $clauses.emplace_back(cql3::multi_column_relation::create_single_marker_in_relation(ids, tupleInMarker)); } - | literals=tupleOfTupleLiterals /* (a, b, c) IN ((1, 2, 3), (4, 5, 6), ...) */ - { - $clauses.emplace_back(cql3::multi_column_relation::create_in_relation(ids, literals)); - } - | markers=tupleOfMarkersForTuples /* (a, b, c) IN (?, ?, ...) */ - { $clauses.emplace_back(cql3::multi_column_relation::create_in_relation(ids, markers)); } - ) - | type=relationType literal=tupleLiteral /* (a, b, c) > (1, 2, 3) or (a, b, c) > (?, ?, ?) */ - { - $clauses.emplace_back(cql3::multi_column_relation::create_non_in_relation(ids, *type, literal)); - } - | type=relationType tupleMarker=markerForTuple /* (a, b, c) >= ? */ - { $clauses.emplace_back(cql3::multi_column_relation::create_non_in_relation(ids, *type, tupleMarker)); } - ) - | '(' relation[$clauses] ')' - ; - -inMarker returns [shared_ptr marker] - : QMARK { $marker = new_in_bind_variables(nullptr); } - | ':' name=ident { $marker = new_in_bind_variables(name); } - ; - -tupleOfIdentifiers returns [std::vector<::shared_ptr> ids] - : '(' n1=cident { $ids.push_back(n1); } (',' ni=cident { $ids.push_back(ni); })* ')' - ; - -singleColumnInValues returns [std::vector<::shared_ptr> terms] - : '(' ( t1 = term { $terms.push_back(t1); } (',' ti=term { $terms.push_back(ti); })* )? ')' - ; - -tupleOfTupleLiterals returns [std::vector<::shared_ptr> literals] - : '(' t1=tupleLiteral { $literals.emplace_back(t1); } (',' ti=tupleLiteral { $literals.emplace_back(ti); })* ')' - ; - -markerForTuple returns [shared_ptr marker] - : QMARK { $marker = new_tuple_bind_variables(nullptr); } - | ':' name=ident { $marker = new_tuple_bind_variables(name); } - ; - -tupleOfMarkersForTuples returns [std::vector<::shared_ptr> markers] - : '(' m1=markerForTuple { $markers.emplace_back(m1); } (',' mi=markerForTuple { $markers.emplace_back(mi); })* ')' - ; - -inMarkerForTuple returns [shared_ptr marker] - : QMARK { $marker = new_tuple_in_bind_variables(nullptr); } - | ':' name=ident { $marker = new_tuple_in_bind_variables(name); } - ; - -comparatorType returns [shared_ptr t] - : n=native_type { $t = cql3_type::raw::from(n); } - | c=collection_type { $t = c; } - | tt=tuple_type { $t = tt; } - | id=userTypeName { $t = cql3::cql3_type::raw::user_type(id); } - | K_FROZEN '<' f=comparatorType '>' - { - try { - $t = cql3::cql3_type::raw::frozen(f); - } catch (exceptions::invalid_request_exception& e) { - add_recognition_error(e.what()); - } - } -#if 0 - | s=STRING_LITERAL - { - try { - $t = CQL3Type.Raw.from(new CQL3Type.Custom($s.text)); - } catch (SyntaxException e) { - addRecognitionError("Cannot parse type " + $s.text + ": " + e.getMessage()); - } catch (ConfigurationException e) { - addRecognitionError("Error setting type " + $s.text + ": " + e.getMessage()); - } - } -#endif - ; - -native_type returns [shared_ptr t] - : K_ASCII { $t = cql3_type::ascii; } - | K_BIGINT { $t = cql3_type::bigint; } - | K_BLOB { $t = cql3_type::blob; } - | K_BOOLEAN { $t = cql3_type::boolean; } - | K_COUNTER { $t = cql3_type::counter; } - | K_DECIMAL { $t = cql3_type::decimal; } - | K_DOUBLE { $t = cql3_type::double_; } - | K_FLOAT { $t = cql3_type::float_; } - | K_INET { $t = cql3_type::inet; } - | K_INT { $t = cql3_type::int_; } - | K_SMALLINT { $t = cql3_type::smallint; } - | K_TEXT { $t = cql3_type::text; } - | K_TIMESTAMP { $t = cql3_type::timestamp; } - | K_TINYINT { $t = cql3_type::tinyint; } - | K_UUID { $t = cql3_type::uuid; } - | K_VARCHAR { $t = cql3_type::varchar; } - | K_VARINT { $t = cql3_type::varint; } - | K_TIMEUUID { $t = cql3_type::timeuuid; } - | K_DATE { $t = cql3_type::date; } - | K_TIME { $t = cql3_type::time; } - ; - -collection_type returns [shared_ptr pt] - : K_MAP '<' t1=comparatorType ',' t2=comparatorType '>' - { - // if we can't parse either t1 or t2, antlr will "recover" and we may have t1 or t2 null. - if (t1 && t2) { - $pt = cql3::cql3_type::raw::map(t1, t2); - } - } - | K_LIST '<' t=comparatorType '>' - { if (t) { $pt = cql3::cql3_type::raw::list(t); } } - | K_SET '<' t=comparatorType '>' - { if (t) { $pt = cql3::cql3_type::raw::set(t); } } - ; - -tuple_type returns [shared_ptr t] - @init{ std::vector> types; } - : K_TUPLE '<' - t1=comparatorType { types.push_back(t1); } (',' tn=comparatorType { types.push_back(tn); })* - '>' { $t = cql3::cql3_type::raw::tuple(std::move(types)); } - ; - -username - : IDENT - | STRING_LITERAL - ; - -// Basically the same as cident, but we need to exlude existing CQL3 types -// (which for some reason are not reserved otherwise) -non_type_ident returns [shared_ptr id] - : t=IDENT { if (_reserved_type_names().count($t.text)) { add_recognition_error("Invalid (reserved) user type name " + $t.text); } $id = ::make_shared($t.text, false); } - | t=QUOTED_NAME { $id = ::make_shared($t.text, true); } - | k=basic_unreserved_keyword { $id = ::make_shared(k, false); } - | kk=K_KEY { $id = ::make_shared($kk.text, false); } - ; - -unreserved_keyword returns [sstring str] - : u=unreserved_function_keyword { $str = u; } - | k=(K_TTL | K_COUNT | K_WRITETIME | K_KEY) { $str = $k.text; } - ; - -unreserved_function_keyword returns [sstring str] - : u=basic_unreserved_keyword { $str = u; } - | t=native_type { $str = t->to_string(); } - ; - -basic_unreserved_keyword returns [sstring str] - : k=( K_KEYS - | K_AS - | K_CLUSTERING - | K_COMPACT - | K_STORAGE - | K_TYPE - | K_VALUES - | K_MAP - | K_LIST - | K_FILTERING - | K_PERMISSION - | K_PERMISSIONS - | K_KEYSPACES - | K_ALL - | K_USER - | K_USERS - | K_SUPERUSER - | K_NOSUPERUSER - | K_PASSWORD - | K_EXISTS - | K_CUSTOM - | K_TRIGGER - | K_DISTINCT - | K_CONTAINS - | K_STATIC - | K_FROZEN - | K_TUPLE - | K_FUNCTION - | K_AGGREGATE - | K_SFUNC - | K_STYPE - | K_FINALFUNC - | K_INITCOND - | K_RETURNS - | K_LANGUAGE - | K_NON - | K_DETERMINISTIC - ) { $str = $k.text; } - ; - -// Case-insensitive keywords -K_SELECT: S E L E C T; -K_FROM: F R O M; -K_AS: A S; -K_WHERE: W H E R E; -K_AND: A N D; -K_KEY: K E Y; -K_KEYS: K E Y S; -K_ENTRIES: E N T R I E S; -K_FULL: F U L L; -K_INSERT: I N S E R T; -K_UPDATE: U P D A T E; -K_WITH: W I T H; -K_LIMIT: L I M I T; -K_USING: U S I N G; -K_USE: U S E; -K_DISTINCT: D I S T I N C T; -K_COUNT: C O U N T; -K_SET: S E T; -K_BEGIN: B E G I N; -K_UNLOGGED: U N L O G G E D; -K_BATCH: B A T C H; -K_APPLY: A P P L Y; -K_TRUNCATE: T R U N C A T E; -K_DELETE: D E L E T E; -K_IN: I N; -K_CREATE: C R E A T E; -K_KEYSPACE: ( K E Y S P A C E - | S C H E M A ); -K_KEYSPACES: K E Y S P A C E S; -K_COLUMNFAMILY:( C O L U M N F A M I L Y - | T A B L E ); -K_MATERIALIZED:M A T E R I A L I Z E D; -K_VIEW: V I E W; -K_INDEX: I N D E X; -K_CUSTOM: C U S T O M; -K_ON: O N; -K_TO: T O; -K_DROP: D R O P; -K_PRIMARY: P R I M A R Y; -K_INTO: I N T O; -K_VALUES: V A L U E S; -K_TIMESTAMP: T I M E S T A M P; -K_TTL: T T L; -K_ALTER: A L T E R; -K_RENAME: R E N A M E; -K_ADD: A D D; -K_TYPE: T Y P E; -K_COMPACT: C O M P A C T; -K_STORAGE: S T O R A G E; -K_ORDER: O R D E R; -K_BY: B Y; -K_ASC: A S C; -K_DESC: D E S C; -K_ALLOW: A L L O W; -K_FILTERING: F I L T E R I N G; -K_IF: I F; -K_IS: I S; -K_CONTAINS: C O N T A I N S; - -K_GRANT: G R A N T; -K_ALL: A L L; -K_PERMISSION: P E R M I S S I O N; -K_PERMISSIONS: P E R M I S S I O N S; -K_OF: O F; -K_REVOKE: R E V O K E; -K_MODIFY: M O D I F Y; -K_AUTHORIZE: A U T H O R I Z E; -K_NORECURSIVE: N O R E C U R S I V E; - -K_USER: U S E R; -K_USERS: U S E R S; -K_SUPERUSER: S U P E R U S E R; -K_NOSUPERUSER: N O S U P E R U S E R; -K_PASSWORD: P A S S W O R D; - -K_CLUSTERING: C L U S T E R I N G; -K_ASCII: A S C I I; -K_BIGINT: B I G I N T; -K_BLOB: B L O B; -K_BOOLEAN: B O O L E A N; -K_COUNTER: C O U N T E R; -K_DECIMAL: D E C I M A L; -K_DOUBLE: D O U B L E; -K_FLOAT: F L O A T; -K_INET: I N E T; -K_INT: I N T; -K_SMALLINT: S M A L L I N T; -K_TINYINT: T I N Y I N T; -K_TEXT: T E X T; -K_UUID: U U I D; -K_VARCHAR: V A R C H A R; -K_VARINT: V A R I N T; -K_TIMEUUID: T I M E U U I D; -K_TOKEN: T O K E N; -K_WRITETIME: W R I T E T I M E; -K_DATE: D A T E; -K_TIME: T I M E; - -K_NULL: N U L L; -K_NOT: N O T; -K_EXISTS: E X I S T S; - -K_MAP: M A P; -K_LIST: L I S T; -K_NAN: N A N; -K_INFINITY: I N F I N I T Y; -K_TUPLE: T U P L E; - -K_TRIGGER: T R I G G E R; -K_STATIC: S T A T I C; -K_FROZEN: F R O Z E N; - -K_FUNCTION: F U N C T I O N; -K_AGGREGATE: A G G R E G A T E; -K_SFUNC: S F U N C; -K_STYPE: S T Y P E; -K_FINALFUNC: F I N A L F U N C; -K_INITCOND: I N I T C O N D; -K_RETURNS: R E T U R N S; -K_LANGUAGE: L A N G U A G E; -K_NON: N O N; -K_OR: O R; -K_REPLACE: R E P L A C E; -K_DETERMINISTIC: D E T E R M I N I S T I C; - -K_SCYLLA_TIMEUUID_LIST_INDEX: S C Y L L A '_' T I M E U U I D '_' L I S T '_' I N D E X; -K_SCYLLA_COUNTER_SHARD_LIST: S C Y L L A '_' C O U N T E R '_' S H A R D '_' L I S T; - -// Case-insensitive alpha characters -fragment A: ('a'|'A'); -fragment B: ('b'|'B'); -fragment C: ('c'|'C'); -fragment D: ('d'|'D'); -fragment E: ('e'|'E'); -fragment F: ('f'|'F'); -fragment G: ('g'|'G'); -fragment H: ('h'|'H'); -fragment I: ('i'|'I'); -fragment J: ('j'|'J'); -fragment K: ('k'|'K'); -fragment L: ('l'|'L'); -fragment M: ('m'|'M'); -fragment N: ('n'|'N'); -fragment O: ('o'|'O'); -fragment P: ('p'|'P'); -fragment Q: ('q'|'Q'); -fragment R: ('r'|'R'); -fragment S: ('s'|'S'); -fragment T: ('t'|'T'); -fragment U: ('u'|'U'); -fragment V: ('v'|'V'); -fragment W: ('w'|'W'); -fragment X: ('x'|'X'); -fragment Y: ('y'|'Y'); -fragment Z: ('z'|'Z'); - -STRING_LITERAL - @init{ - std::string txt; // temporary to build pg-style-string - } - @after{ - // This is an ugly hack that allows returning empty string literals. - // If setText() was called with an empty string antlr3 would decide - // that setText() was never called and just return the unmodified - // token value. To prevent that we call setText() with non-empty string - // that is not valid utf8 which will be later changed to an empty - // string once it leaves antlr3 code. - if (txt.empty()) { - txt.push_back(-1); - } - setText(txt); - } - : - /* pg-style string literal */ - ( - '$' '$' - ( - (c=~('$') { txt.push_back(c); }) - | - ('$' (c=~('$') { txt.push_back('$'); txt.push_back(c); })) - )* - '$' '$' - ) - | - /* conventional quoted string literal */ - ( - '\'' (c=~('\'') { txt.push_back(c);} | '\'' '\'' { txt.push_back('\''); })* '\'' - ) - ; - -QUOTED_NAME - @init{ std::string b; } - @after{ setText(b); } - : '\"' (c=~('\"') { b.push_back(c); } | '\"' '\"' { b.push_back('\"'); })+ '\"' - ; - -fragment DIGIT - : '0'..'9' - ; - -fragment LETTER - : ('A'..'Z' | 'a'..'z') - ; - -fragment HEX - : ('A'..'F' | 'a'..'f' | '0'..'9') - ; - -fragment EXPONENT - : E ('+' | '-')? DIGIT+ - ; - -INTEGER - : '-'? DIGIT+ - ; - -QMARK - : '?' - ; - -/* - * Normally a lexer only emits one token at a time, but ours is tricked out - * to support multiple (see @lexer::members near the top of the grammar). - */ -FLOAT - : INTEGER EXPONENT - | INTEGER '.' DIGIT* EXPONENT? - ; - -/* - * This has to be before IDENT so it takes precendence over it. - */ -BOOLEAN - : T R U E | F A L S E - ; - -IDENT - : LETTER (LETTER | DIGIT | '_')* - ; - -HEXNUMBER - : '0' X HEX* - ; - -UUID - : HEX HEX HEX HEX HEX HEX HEX HEX '-' - HEX HEX HEX HEX '-' - HEX HEX HEX HEX '-' - HEX HEX HEX HEX '-' - HEX HEX HEX HEX HEX HEX HEX HEX HEX HEX HEX HEX - ; - -WS - : (' ' | '\t' | '\n' | '\r')+ { $channel = HIDDEN; } - ; - -COMMENT - : ('--' | '//') .* ('\n'|'\r') { $channel = HIDDEN; } - ; - -MULTILINE_COMMENT - : '/*' .* '*/' { $channel = HIDDEN; } - ; diff --git a/scylla/cql3/abstract_marker.cc b/scylla/cql3/abstract_marker.cc deleted file mode 100644 index a556a49..0000000 --- a/scylla/cql3/abstract_marker.cc +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "cql3/abstract_marker.hh" - -#include "cql3/constants.hh" -#include "cql3/lists.hh" -#include "cql3/maps.hh" -#include "cql3/sets.hh" - -namespace cql3 { - -abstract_marker::abstract_marker(int32_t bind_index, ::shared_ptr&& receiver) - : _bind_index{bind_index} - , _receiver{std::move(receiver)} -{ } - -void abstract_marker::collect_marker_specification(::shared_ptr bound_names) { - bound_names->add(_bind_index, _receiver); -} - -bool abstract_marker::contains_bind_marker() const { - return true; -} - -abstract_marker::raw::raw(int32_t bind_index) - : _bind_index{bind_index} -{ } - -::shared_ptr abstract_marker::raw::prepare(database& db, const sstring& keyspace, ::shared_ptr receiver) -{ - auto receiver_type = ::dynamic_pointer_cast(receiver->type); - if (receiver_type == nullptr) { - return ::make_shared(_bind_index, receiver); - } - if (&receiver_type->_kind == &collection_type_impl::kind::list) { - return ::make_shared(_bind_index, receiver); - } else if (&receiver_type->_kind == &collection_type_impl::kind::set) { - return ::make_shared(_bind_index, receiver); - } else if (&receiver_type->_kind == &collection_type_impl::kind::map) { - return ::make_shared(_bind_index, receiver); - } - assert(0); -} - -assignment_testable::test_result abstract_marker::raw::test_assignment(database& db, const sstring& keyspace, ::shared_ptr receiver) { - return assignment_testable::test_result::WEAKLY_ASSIGNABLE; -} - -sstring abstract_marker::raw::to_string() const { - return "?"; -} - -abstract_marker::in_raw::in_raw(int32_t bind_index) - : raw{bind_index} -{ } - -::shared_ptr abstract_marker::in_raw::make_in_receiver(::shared_ptr receiver) { - auto in_name = ::make_shared(sstring("in(") + receiver->name->to_string() + sstring(")"), true); - return ::make_shared(receiver->ks_name, receiver->cf_name, in_name, list_type_impl::get_instance(receiver->type, false)); -} - -::shared_ptr abstract_marker::in_raw::prepare(database& db, const sstring& keyspace, ::shared_ptr receiver) { - return ::make_shared(_bind_index, make_in_receiver(receiver)); -} - -} diff --git a/scylla/cql3/abstract_marker.hh b/scylla/cql3/abstract_marker.hh deleted file mode 100644 index 070179c..0000000 --- a/scylla/cql3/abstract_marker.hh +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "cql3/variable_specifications.hh" -#include "cql3/column_specification.hh" -#include "cql3/term.hh" - -namespace cql3 { - -/** - * A single bind marker. - */ -class abstract_marker : public non_terminal { -protected: - const int32_t _bind_index; - const ::shared_ptr _receiver; -public: - abstract_marker(int32_t bind_index, ::shared_ptr&& receiver); - - virtual void collect_marker_specification(::shared_ptr bound_names) override; - - virtual bool contains_bind_marker() const override; - - /** - * A parsed, but non prepared, bind marker. - */ - class raw : public term::raw { - protected: - const int32_t _bind_index; - public: - raw(int32_t bind_index); - - virtual ::shared_ptr prepare(database& db, const sstring& keyspace, ::shared_ptr receiver) override; - - virtual assignment_testable::test_result test_assignment(database& db, const sstring& keyspace, ::shared_ptr receiver) override; - - virtual sstring to_string() const override; - }; - - /** - * A raw placeholder for multiple values of the same type for a single column. - * For example, "SELECT ... WHERE user_id IN ?'. - * - * Because a single type is used, a List is used to represent the values. - */ - class in_raw : public raw { - public: - in_raw(int32_t bind_index); - private: - static ::shared_ptr make_in_receiver(::shared_ptr receiver); - public: - virtual ::shared_ptr prepare(database& db, const sstring& keyspace, ::shared_ptr receiver) override; - }; -}; - -} diff --git a/scylla/cql3/assignment_testable.hh b/scylla/cql3/assignment_testable.hh deleted file mode 100644 index 79130be..0000000 --- a/scylla/cql3/assignment_testable.hh +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2014 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "column_specification.hh" -#include -#include - -class database; - -namespace cql3 { - -class assignment_testable { -public: - virtual ~assignment_testable() {} - - enum class test_result { - EXACT_MATCH, - WEAKLY_ASSIGNABLE, - NOT_ASSIGNABLE, - }; - - static bool is_assignable(test_result tr) { - return tr != test_result::NOT_ASSIGNABLE; - } - - static bool is_exact_match(test_result tr) { - return tr != test_result::EXACT_MATCH; - } - - // Test all elements of toTest for assignment. If all are exact match, return exact match. If any is not assignable, - // return not assignable. Otherwise, return weakly assignable. - template - static test_result test_all(database& db, const sstring& keyspace, ::shared_ptr receiver, - AssignmentTestablePtrRange&& to_test) { - test_result res = test_result::EXACT_MATCH; - for (auto&& rt : to_test) { - if (rt == nullptr) { - res = test_result::WEAKLY_ASSIGNABLE; - continue; - } - - test_result t = rt->test_assignment(db, keyspace, receiver); - if (t == test_result::NOT_ASSIGNABLE) { - return test_result::NOT_ASSIGNABLE; - } - if (t == test_result::WEAKLY_ASSIGNABLE) { - res = test_result::WEAKLY_ASSIGNABLE; - } - } - return res; - } - - /** - * @return whether this object can be assigned to the provided receiver. We distinguish - * between 3 values: - * - EXACT_MATCH if this object is exactly of the type expected by the receiver - * - WEAKLY_ASSIGNABLE if this object is not exactly the expected type but is assignable nonetheless - * - NOT_ASSIGNABLE if it's not assignable - * Most caller should just call the isAssignable() method on the result, though functions have a use for - * testing "strong" equality to decide the most precise overload to pick when multiple could match. - */ - virtual test_result test_assignment(database& db, const sstring& keyspace, ::shared_ptr receiver) = 0; - - // for error reporting - virtual sstring assignment_testable_source_context() const = 0; -}; - -inline bool is_assignable(assignment_testable::test_result tr) { - return assignment_testable::is_assignable(tr); -} - -inline bool is_exact_match(assignment_testable::test_result tr) { - return assignment_testable::is_exact_match(tr); -} - -inline -std::ostream& -operator<<(std::ostream& os, const assignment_testable& at) { - return os << at.assignment_testable_source_context(); -} - -} diff --git a/scylla/cql3/attributes.cc b/scylla/cql3/attributes.cc deleted file mode 100644 index ceac4df..0000000 --- a/scylla/cql3/attributes.cc +++ /dev/null @@ -1,142 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "cql3/attributes.hh" - -namespace cql3 { - -std::unique_ptr attributes::none() { - return std::unique_ptr{new attributes{{}, {}}}; -} - -attributes::attributes(::shared_ptr&& timestamp, ::shared_ptr&& time_to_live) - : _timestamp{std::move(timestamp)} - , _time_to_live{std::move(time_to_live)} -{ } - -bool attributes::uses_function(const sstring& ks_name, const sstring& function_name) const { - return (_timestamp && _timestamp->uses_function(ks_name, function_name)) - || (_time_to_live && _time_to_live->uses_function(ks_name, function_name)); -} - -bool attributes::is_timestamp_set() const { - return bool(_timestamp); -} - -bool attributes::is_time_to_live_set() const { - return bool(_time_to_live); -} - -int64_t attributes::get_timestamp(int64_t now, const query_options& options) { - if (!_timestamp) { - return now; - } - - auto tval = _timestamp->bind_and_get(options); - if (tval.is_null()) { - throw exceptions::invalid_request_exception("Invalid null value of timestamp"); - } - if (tval.is_unset_value()) { - return now; - } - try { - data_type_for()->validate(*tval); - } catch (marshal_exception e) { - throw exceptions::invalid_request_exception("Invalid timestamp value"); - } - return value_cast(data_type_for()->deserialize(*tval)); -} - -int32_t attributes::get_time_to_live(const query_options& options) { - if (!_time_to_live) - return 0; - - auto tval = _time_to_live->bind_and_get(options); - if (tval.is_null()) { - throw exceptions::invalid_request_exception("Invalid null value of TTL"); - } - if (tval.is_unset_value()) { - return 0; - } - try { - data_type_for()->validate(*tval); - } - catch (marshal_exception e) { - throw exceptions::invalid_request_exception("Invalid TTL value"); - } - - auto ttl = value_cast(data_type_for()->deserialize(*tval)); - if (ttl < 0) { - throw exceptions::invalid_request_exception("A TTL must be greater or equal to 0"); - } - - if (ttl > max_ttl.count()) { - throw exceptions::invalid_request_exception("ttl is too large. requested (" + std::to_string(ttl) + - ") maximum (" + std::to_string(max_ttl.count()) + ")"); - } - - return ttl; -} - -void attributes::collect_marker_specification(::shared_ptr bound_names) { - if (_timestamp) { - _timestamp->collect_marker_specification(bound_names); - } - if (_time_to_live) { - _time_to_live->collect_marker_specification(bound_names); - } -} - -std::unique_ptr attributes::raw::prepare(database& db, const sstring& ks_name, const sstring& cf_name) { - auto ts = !timestamp ? ::shared_ptr{} : timestamp->prepare(db, ks_name, timestamp_receiver(ks_name, cf_name)); - auto ttl = !time_to_live ? ::shared_ptr{} : time_to_live->prepare(db, ks_name, time_to_live_receiver(ks_name, cf_name)); - return std::unique_ptr{new attributes{std::move(ts), std::move(ttl)}}; -} - -::shared_ptr attributes::raw::timestamp_receiver(const sstring& ks_name, const sstring& cf_name) { - return ::make_shared(ks_name, cf_name, ::make_shared("[timestamp]", true), data_type_for()); -} - -::shared_ptr attributes::raw::time_to_live_receiver(const sstring& ks_name, const sstring& cf_name) { - return ::make_shared(ks_name, cf_name, ::make_shared("[ttl]", true), data_type_for()); -} - -} diff --git a/scylla/cql3/attributes.hh b/scylla/cql3/attributes.hh deleted file mode 100644 index 6de492b..0000000 --- a/scylla/cql3/attributes.hh +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "exceptions/exceptions.hh" -#include "cql3/term.hh" -#include - -namespace cql3 { -/** - * Utility class for the Parser to gather attributes for modification - * statements. - */ -class attributes final { -private: - const ::shared_ptr _timestamp; - const ::shared_ptr _time_to_live; -public: - static std::unique_ptr none(); -private: - attributes(::shared_ptr&& timestamp, ::shared_ptr&& time_to_live); -public: - bool uses_function(const sstring& ks_name, const sstring& function_name) const; - - bool is_timestamp_set() const; - - bool is_time_to_live_set() const; - - int64_t get_timestamp(int64_t now, const query_options& options); - - int32_t get_time_to_live(const query_options& options); - - void collect_marker_specification(::shared_ptr bound_names); - - class raw { - public: - ::shared_ptr timestamp; - ::shared_ptr time_to_live; - - std::unique_ptr prepare(database& db, const sstring& ks_name, const sstring& cf_name); - private: - ::shared_ptr timestamp_receiver(const sstring& ks_name, const sstring& cf_name); - - ::shared_ptr time_to_live_receiver(const sstring& ks_name, const sstring& cf_name); - }; -}; - -} diff --git a/scylla/cql3/cf_name.cc b/scylla/cql3/cf_name.cc deleted file mode 100644 index 22ecdf7..0000000 --- a/scylla/cql3/cf_name.cc +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "cql3/cf_name.hh" - -namespace cql3 { - -void cf_name::set_column_family(const sstring& cf, bool keep_case) -{ - _cf_name = to_internal_name(cf, keep_case); -} - -const sstring& cf_name::get_column_family() const -{ - return _cf_name; -} - -sstring cf_name::to_string() const -{ - return keyspace_element_name::to_string() + _cf_name; -} - -} diff --git a/scylla/cql3/cf_name.hh b/scylla/cql3/cf_name.hh deleted file mode 100644 index c3371bc..0000000 --- a/scylla/cql3/cf_name.hh +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "cql3/keyspace_element_name.hh" - -namespace cql3 { - -class cf_name : public keyspace_element_name { - sstring _cf_name = ""; -public: - void set_column_family(const sstring& cf, bool keep_case); - - const sstring& get_column_family() const; - - virtual sstring to_string() const override; -}; - -inline -std::ostream& -operator<<(std::ostream& os, const cf_name& n) { - os << n.to_string(); - return os; -} - -} diff --git a/scylla/cql3/column_condition.cc b/scylla/cql3/column_condition.cc deleted file mode 100644 index bab3bdf..0000000 --- a/scylla/cql3/column_condition.cc +++ /dev/null @@ -1,139 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "cql3/column_condition.hh" -#include "unimplemented.hh" -#include "lists.hh" -#include "maps.hh" -#include - -namespace cql3 { - -bool -column_condition::uses_function(const sstring& ks_name, const sstring& function_name) { - if (bool(_collection_element) && _collection_element->uses_function(ks_name, function_name)) { - return true; - } - if (bool(_value) && _value->uses_function(ks_name, function_name)) { - return true; - } - if (!_in_values.empty()) { - for (auto&& value : _in_values) { - if (bool(value) && value->uses_function(ks_name, function_name)) { - return true; - } - } - } - return false; -} - -void column_condition::collect_marker_specificaton(::shared_ptr bound_names) { - if (_collection_element) { - _collection_element->collect_marker_specification(bound_names); - } - if (!_in_values.empty()) { - for (auto&& value : _in_values) { - value->collect_marker_specification(bound_names); - } - } - _value->collect_marker_specification(bound_names); -} - -::shared_ptr -column_condition::raw::prepare(database& db, const sstring& keyspace, const column_definition& receiver) { - if (receiver.type->is_counter()) { - throw exceptions::invalid_request_exception("Conditions on counters are not supported"); - } - - if (!_collection_element) { - if (_op == operator_type::IN) { - if (_in_values.empty()) { // ? - return column_condition::in_condition(receiver, _in_marker->prepare(db, keyspace, receiver.column_specification)); - } - - std::vector<::shared_ptr> terms; - for (auto&& value : _in_values) { - terms.push_back(value->prepare(db, keyspace, receiver.column_specification)); - } - return column_condition::in_condition(receiver, std::move(terms)); - } else { - return column_condition::condition(receiver, _value->prepare(db, keyspace, receiver.column_specification), _op); - } - } - - if (!receiver.type->is_collection()) { - throw exceptions::invalid_request_exception(sprint("Invalid element access syntax for non-collection column %s", receiver.name_as_text())); - } - - shared_ptr element_spec, value_spec; - auto ctype = static_cast(receiver.type.get()); - if (&ctype->_kind == &collection_type_impl::kind::list) { - element_spec = lists::index_spec_of(receiver.column_specification); - value_spec = lists::value_spec_of(receiver.column_specification); - } else if (&ctype->_kind == &collection_type_impl::kind::map) { - element_spec = maps::key_spec_of(*receiver.column_specification); - value_spec = maps::value_spec_of(*receiver.column_specification); - } else if (&ctype->_kind == &collection_type_impl::kind::set) { - throw exceptions::invalid_request_exception(sprint("Invalid element access syntax for set column %s", receiver.name())); - } else { - abort(); - } - - if (_op == operator_type::IN) { - if (_in_values.empty()) { - return column_condition::in_condition(receiver, - _collection_element->prepare(db, keyspace, element_spec), - _in_marker->prepare(db, keyspace, value_spec)); - } - std::vector> terms; - terms.reserve(_in_values.size()); - boost::push_back(terms, _in_values - | boost::adaptors::transformed(std::bind(&term::raw::prepare, std::placeholders::_1, std::ref(db), std::ref(keyspace), value_spec))); - return column_condition::in_condition(receiver, _collection_element->prepare(db, keyspace, element_spec), terms); - } else { - return column_condition::condition(receiver, - _collection_element->prepare(db, keyspace, element_spec), - _value->prepare(db, keyspace, value_spec), - _op); - } -} - -} diff --git a/scylla/cql3/column_condition.hh b/scylla/cql3/column_condition.hh deleted file mode 100644 index 34259fe..0000000 --- a/scylla/cql3/column_condition.hh +++ /dev/null @@ -1,761 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "cql3/term.hh" -#include "cql3/abstract_marker.hh" -#include "cql3/operator.hh" - -namespace cql3 { - -/** - * A CQL3 condition on the value of a column or collection element. For example, "UPDATE .. IF a = 0". - */ -class column_condition final { -public: - const column_definition& column; -private: - // For collection, when testing the equality of a specific element, nullptr otherwise. - ::shared_ptr _collection_element; - ::shared_ptr _value; - std::vector<::shared_ptr> _in_values; - const operator_type& _op; -public: - column_condition(const column_definition& column, ::shared_ptr collection_element, - ::shared_ptr value, std::vector<::shared_ptr> in_values, const operator_type& op) - : column(column) - , _collection_element(std::move(collection_element)) - , _value(std::move(value)) - , _in_values(std::move(in_values)) - , _op(op) - { - if (op != operator_type::IN) { - assert(_in_values.empty()); - } - } - - static ::shared_ptr condition(const column_definition& def, ::shared_ptr value, const operator_type& op) { - return ::make_shared(def, ::shared_ptr{}, std::move(value), std::vector<::shared_ptr>{}, op); - } - - static ::shared_ptr condition(const column_definition& def, ::shared_ptr collection_element, - ::shared_ptr value, const operator_type& op) { - return ::make_shared(def, std::move(collection_element), std::move(value), - std::vector<::shared_ptr>{}, op); - } - - static ::shared_ptr in_condition(const column_definition& def, std::vector<::shared_ptr> in_values) { - return ::make_shared(def, ::shared_ptr{}, ::shared_ptr{}, - std::move(in_values), operator_type::IN); - } - - static ::shared_ptr in_condition(const column_definition& def, ::shared_ptr collection_element, - std::vector<::shared_ptr> in_values) { - return ::make_shared(def, std::move(collection_element), ::shared_ptr{}, - std::move(in_values), operator_type::IN); - } - - static ::shared_ptr in_condition(const column_definition& def, ::shared_ptr in_marker) { - return ::make_shared(def, ::shared_ptr{}, std::move(in_marker), - std::vector<::shared_ptr>{}, operator_type::IN); - } - - static ::shared_ptr in_condition(const column_definition& def, ::shared_ptr collection_element, - ::shared_ptr in_marker) { - return ::make_shared(def, std::move(collection_element), std::move(in_marker), - std::vector<::shared_ptr>{}, operator_type::IN); - } - - bool uses_function(const sstring& ks_name, const sstring& function_name); -public: - /** - * Collects the column specification for the bind variables of this operation. - * - * @param boundNames the list of column specification where to collect the - * bind variables of this term in. - */ - void collect_marker_specificaton(::shared_ptr bound_names); - -#if 0 - public ColumnCondition.Bound bind(QueryOptions options) throws InvalidRequestException - { - boolean isInCondition = operator == Operator.IN; - if (column.type instanceof CollectionType) - { - if (collectionElement == null) - return isInCondition ? new CollectionInBound(this, options) : new CollectionBound(this, options); - else - return isInCondition ? new ElementAccessInBound(this, options) : new ElementAccessBound(this, options); - } - return isInCondition ? new SimpleInBound(this, options) : new SimpleBound(this, options); - } - - public static abstract class Bound - { - public final ColumnDefinition column; - public final Operator operator; - - protected Bound(ColumnDefinition column, Operator operator) - { - this.column = column; - this.operator = operator; - } - - /** - * Validates whether this condition applies to {@code current}. - */ - public abstract boolean appliesTo(Composite rowPrefix, ColumnFamily current, long now) throws InvalidRequestException; - - public ByteBuffer getCollectionElementValue() - { - return null; - } - - protected boolean isSatisfiedByValue(ByteBuffer value, Cell c, AbstractType type, Operator operator, long now) throws InvalidRequestException - { - ByteBuffer columnValue = (c == null || !c.isLive(now)) ? null : c.value(); - return compareWithOperator(operator, type, value, columnValue); - } - - /** Returns true if the operator is satisfied (i.e. "value operator otherValue == true"), false otherwise. */ - protected boolean compareWithOperator(Operator operator, AbstractType type, ByteBuffer value, ByteBuffer otherValue) throws InvalidRequestException - { - if (value == null) - { - switch (operator) - { - case EQ: - return otherValue == null; - case NEQ: - return otherValue != null; - default: - throw new InvalidRequestException(String.format("Invalid comparison with null for operator \"%s\"", operator)); - } - } - else if (otherValue == null) - { - // the condition value is not null, so only NEQ can return true - return operator == Operator.NEQ; - } - int comparison = type.compare(otherValue, value); - switch (operator) - { - case EQ: - return comparison == 0; - case LT: - return comparison < 0; - case LTE: - return comparison <= 0; - case GT: - return comparison > 0; - case GTE: - return comparison >= 0; - case NEQ: - return comparison != 0; - default: - // we shouldn't get IN, CONTAINS, or CONTAINS KEY here - throw new AssertionError(); - } - } - - protected Iterator collectionColumns(CellName collection, ColumnFamily cf, final long now) - { - // We are testing for collection equality, so we need to have the expected values *and* only those. - ColumnSlice[] collectionSlice = new ColumnSlice[]{ collection.slice() }; - // Filter live columns, this makes things simpler afterwards - return Iterators.filter(cf.iterator(collectionSlice), new Predicate() - { - public boolean apply(Cell c) - { - // we only care about live columns - return c.isLive(now); - } - }); - } - } - - /** - * A condition on a single non-collection column. This does not support IN operators (see SimpleInBound). - */ - static class SimpleBound extends Bound - { - public final ByteBuffer value; - - private SimpleBound(ColumnCondition condition, QueryOptions options) throws InvalidRequestException - { - super(condition.column, condition.operator); - assert !(column.type instanceof CollectionType) && condition.collectionElement == null; - assert condition.operator != Operator.IN; - this.value = condition.value.bindAndGet(options); - } - - public boolean appliesTo(Composite rowPrefix, ColumnFamily current, long now) throws InvalidRequestException - { - CellName name = current.metadata().comparator.create(rowPrefix, column); - return isSatisfiedByValue(value, current.getColumn(name), column.type, operator, now); - } - } - - /** - * An IN condition on a single non-collection column. - */ - static class SimpleInBound extends Bound - { - public final List inValues; - - private SimpleInBound(ColumnCondition condition, QueryOptions options) throws InvalidRequestException - { - super(condition.column, condition.operator); - assert !(column.type instanceof CollectionType) && condition.collectionElement == null; - assert condition.operator == Operator.IN; - if (condition.inValues == null) - this.inValues = ((Lists.Marker) condition.value).bind(options).getElements(); - else - { - this.inValues = new ArrayList<>(condition.inValues.size()); - for (Term value : condition.inValues) - this.inValues.add(value.bindAndGet(options)); - } - } - - public boolean appliesTo(Composite rowPrefix, ColumnFamily current, long now) throws InvalidRequestException - { - CellName name = current.metadata().comparator.create(rowPrefix, column); - for (ByteBuffer value : inValues) - { - if (isSatisfiedByValue(value, current.getColumn(name), column.type, Operator.EQ, now)) - return true; - } - return false; - } - } - - /** A condition on an element of a collection column. IN operators are not supported here, see ElementAccessInBound. */ - static class ElementAccessBound extends Bound - { - public final ByteBuffer collectionElement; - public final ByteBuffer value; - - private ElementAccessBound(ColumnCondition condition, QueryOptions options) throws InvalidRequestException - { - super(condition.column, condition.operator); - assert column.type instanceof CollectionType && condition.collectionElement != null; - assert condition.operator != Operator.IN; - this.collectionElement = condition.collectionElement.bindAndGet(options); - this.value = condition.value.bindAndGet(options); - } - - public boolean appliesTo(Composite rowPrefix, ColumnFamily current, final long now) throws InvalidRequestException - { - if (collectionElement == null) - throw new InvalidRequestException("Invalid null value for " + (column.type instanceof MapType ? "map" : "list") + " element access"); - - if (column.type instanceof MapType) - { - MapType mapType = (MapType) column.type; - if (column.type.isMultiCell()) - { - Cell cell = current.getColumn(current.metadata().comparator.create(rowPrefix, column, collectionElement)); - return isSatisfiedByValue(value, cell, mapType.getValuesType(), operator, now); - } - else - { - Cell cell = current.getColumn(current.metadata().comparator.create(rowPrefix, column)); - ByteBuffer mapElementValue = cell.isLive(now) ? mapType.getSerializer().getSerializedValue(cell.value(), collectionElement, mapType.getKeysType()) - : null; - return compareWithOperator(operator, mapType.getValuesType(), value, mapElementValue); - } - } - - // sets don't have element access, so it's a list - ListType listType = (ListType) column.type; - if (column.type.isMultiCell()) - { - ByteBuffer columnValue = getListItem( - collectionColumns(current.metadata().comparator.create(rowPrefix, column), current, now), - getListIndex(collectionElement)); - return compareWithOperator(operator, listType.getElementsType(), value, columnValue); - } - else - { - Cell cell = current.getColumn(current.metadata().comparator.create(rowPrefix, column)); - ByteBuffer listElementValue = cell.isLive(now) ? listType.getSerializer().getElement(cell.value(), getListIndex(collectionElement)) - : null; - return compareWithOperator(operator, listType.getElementsType(), value, listElementValue); - } - } - - static int getListIndex(ByteBuffer collectionElement) throws InvalidRequestException - { - int idx = ByteBufferUtil.toInt(collectionElement); - if (idx < 0) - throw new InvalidRequestException(String.format("Invalid negative list index %d", idx)); - return idx; - } - - static ByteBuffer getListItem(Iterator iter, int index) - { - int adv = Iterators.advance(iter, index); - if (adv == index && iter.hasNext()) - return iter.next().value(); - else - return null; - } - - public ByteBuffer getCollectionElementValue() - { - return collectionElement; - } - } - - static class ElementAccessInBound extends Bound - { - public final ByteBuffer collectionElement; - public final List inValues; - - private ElementAccessInBound(ColumnCondition condition, QueryOptions options) throws InvalidRequestException - { - super(condition.column, condition.operator); - assert column.type instanceof CollectionType && condition.collectionElement != null; - this.collectionElement = condition.collectionElement.bindAndGet(options); - - if (condition.inValues == null) - this.inValues = ((Lists.Marker) condition.value).bind(options).getElements(); - else - { - this.inValues = new ArrayList<>(condition.inValues.size()); - for (Term value : condition.inValues) - this.inValues.add(value.bindAndGet(options)); - } - } - - public boolean appliesTo(Composite rowPrefix, ColumnFamily current, final long now) throws InvalidRequestException - { - if (collectionElement == null) - throw new InvalidRequestException("Invalid null value for " + (column.type instanceof MapType ? "map" : "list") + " element access"); - - CellNameType nameType = current.metadata().comparator; - if (column.type instanceof MapType) - { - MapType mapType = (MapType) column.type; - AbstractType valueType = mapType.getValuesType(); - if (column.type.isMultiCell()) - { - CellName name = nameType.create(rowPrefix, column, collectionElement); - Cell item = current.getColumn(name); - for (ByteBuffer value : inValues) - { - if (isSatisfiedByValue(value, item, valueType, Operator.EQ, now)) - return true; - } - return false; - } - else - { - Cell cell = current.getColumn(nameType.create(rowPrefix, column)); - ByteBuffer mapElementValue = null; - if (cell != null && cell.isLive(now)) - mapElementValue = mapType.getSerializer().getSerializedValue(cell.value(), collectionElement, mapType.getKeysType()); - for (ByteBuffer value : inValues) - { - if (value == null) - { - if (mapElementValue == null) - return true; - continue; - } - if (valueType.compare(value, mapElementValue) == 0) - return true; - } - return false; - } - } - - ListType listType = (ListType) column.type; - AbstractType elementsType = listType.getElementsType(); - if (column.type.isMultiCell()) - { - ByteBuffer columnValue = ElementAccessBound.getListItem( - collectionColumns(nameType.create(rowPrefix, column), current, now), - ElementAccessBound.getListIndex(collectionElement)); - - for (ByteBuffer value : inValues) - { - if (compareWithOperator(Operator.EQ, elementsType, value, columnValue)) - return true; - } - } - else - { - Cell cell = current.getColumn(nameType.create(rowPrefix, column)); - ByteBuffer listElementValue = null; - if (cell != null && cell.isLive(now)) - listElementValue = listType.getSerializer().getElement(cell.value(), ElementAccessBound.getListIndex(collectionElement)); - - for (ByteBuffer value : inValues) - { - if (value == null) - { - if (listElementValue == null) - return true; - continue; - } - if (elementsType.compare(value, listElementValue) == 0) - return true; - } - } - return false; - } - } - - /** A condition on an entire collection column. IN operators are not supported here, see CollectionInBound. */ - static class CollectionBound extends Bound - { - private final Term.Terminal value; - - private CollectionBound(ColumnCondition condition, QueryOptions options) throws InvalidRequestException - { - super(condition.column, condition.operator); - assert column.type.isCollection() && condition.collectionElement == null; - assert condition.operator != Operator.IN; - this.value = condition.value.bind(options); - } - - public boolean appliesTo(Composite rowPrefix, ColumnFamily current, final long now) throws InvalidRequestException - { - CollectionType type = (CollectionType)column.type; - - if (type.isMultiCell()) - { - Iterator iter = collectionColumns(current.metadata().comparator.create(rowPrefix, column), current, now); - if (value == null) - { - if (operator == Operator.EQ) - return !iter.hasNext(); - else if (operator == Operator.NEQ) - return iter.hasNext(); - else - throw new InvalidRequestException(String.format("Invalid comparison with null for operator \"%s\"", operator)); - } - - return valueAppliesTo(type, iter, value, operator); - } - - // frozen collections - Cell cell = current.getColumn(current.metadata().comparator.create(rowPrefix, column)); - if (value == null) - { - if (operator == Operator.EQ) - return cell == null || !cell.isLive(now); - else if (operator == Operator.NEQ) - return cell != null && cell.isLive(now); - else - throw new InvalidRequestException(String.format("Invalid comparison with null for operator \"%s\"", operator)); - } - - // make sure we use v3 serialization format for comparison - ByteBuffer conditionValue; - if (type.kind == CollectionType.Kind.LIST) - conditionValue = ((Lists.Value) value).getWithProtocolVersion(Server.VERSION_3); - else if (type.kind == CollectionType.Kind.SET) - conditionValue = ((Sets.Value) value).getWithProtocolVersion(Server.VERSION_3); - else - conditionValue = ((Maps.Value) value).getWithProtocolVersion(Server.VERSION_3); - - return compareWithOperator(operator, type, conditionValue, cell.value()); - } - - static boolean valueAppliesTo(CollectionType type, Iterator iter, Term.Terminal value, Operator operator) - { - if (value == null) - return !iter.hasNext(); - - switch (type.kind) - { - case LIST: return listAppliesTo((ListType)type, iter, ((Lists.Value)value).elements, operator); - case SET: return setAppliesTo((SetType)type, iter, ((Sets.Value)value).elements, operator); - case MAP: return mapAppliesTo((MapType)type, iter, ((Maps.Value)value).map, operator); - } - throw new AssertionError(); - } - - private static boolean setOrListAppliesTo(AbstractType type, Iterator iter, Iterator conditionIter, Operator operator, boolean isSet) - { - while(iter.hasNext()) - { - if (!conditionIter.hasNext()) - return (operator == Operator.GT) || (operator == Operator.GTE) || (operator == Operator.NEQ); - - // for lists we use the cell value; for sets we use the cell name - ByteBuffer cellValue = isSet? iter.next().name().collectionElement() : iter.next().value(); - int comparison = type.compare(cellValue, conditionIter.next()); - if (comparison != 0) - return evaluateComparisonWithOperator(comparison, operator); - } - - if (conditionIter.hasNext()) - return (operator == Operator.LT) || (operator == Operator.LTE) || (operator == Operator.NEQ); - - // they're equal - return operator == Operator.EQ || operator == Operator.LTE || operator == Operator.GTE; - } - - private static boolean evaluateComparisonWithOperator(int comparison, Operator operator) - { - // called when comparison != 0 - switch (operator) - { - case EQ: - return false; - case LT: - case LTE: - return comparison < 0; - case GT: - case GTE: - return comparison > 0; - case NEQ: - return true; - default: - throw new AssertionError(); - } - } - - static boolean listAppliesTo(ListType type, Iterator iter, List elements, Operator operator) - { - return setOrListAppliesTo(type.getElementsType(), iter, elements.iterator(), operator, false); - } - - static boolean setAppliesTo(SetType type, Iterator iter, Set elements, Operator operator) - { - ArrayList sortedElements = new ArrayList<>(elements.size()); - sortedElements.addAll(elements); - Collections.sort(sortedElements, type.getElementsType()); - return setOrListAppliesTo(type.getElementsType(), iter, sortedElements.iterator(), operator, true); - } - - static boolean mapAppliesTo(MapType type, Iterator iter, Map elements, Operator operator) - { - Iterator> conditionIter = elements.entrySet().iterator(); - while(iter.hasNext()) - { - if (!conditionIter.hasNext()) - return (operator == Operator.GT) || (operator == Operator.GTE) || (operator == Operator.NEQ); - - Map.Entry conditionEntry = conditionIter.next(); - Cell c = iter.next(); - - // compare the keys - int comparison = type.getKeysType().compare(c.name().collectionElement(), conditionEntry.getKey()); - if (comparison != 0) - return evaluateComparisonWithOperator(comparison, operator); - - // compare the values - comparison = type.getValuesType().compare(c.value(), conditionEntry.getValue()); - if (comparison != 0) - return evaluateComparisonWithOperator(comparison, operator); - } - - if (conditionIter.hasNext()) - return (operator == Operator.LT) || (operator == Operator.LTE) || (operator == Operator.NEQ); - - // they're equal - return operator == Operator.EQ || operator == Operator.LTE || operator == Operator.GTE; - } - } - - public static class CollectionInBound extends Bound - { - private final List inValues; - - private CollectionInBound(ColumnCondition condition, QueryOptions options) throws InvalidRequestException - { - super(condition.column, condition.operator); - assert column.type instanceof CollectionType && condition.collectionElement == null; - assert condition.operator == Operator.IN; - inValues = new ArrayList<>(); - if (condition.inValues == null) - { - // We have a list of serialized collections that need to be deserialized for later comparisons - CollectionType collectionType = (CollectionType) column.type; - Lists.Marker inValuesMarker = (Lists.Marker) condition.value; - if (column.type instanceof ListType) - { - ListType deserializer = ListType.getInstance(collectionType.valueComparator(), false); - for (ByteBuffer buffer : inValuesMarker.bind(options).elements) - { - if (buffer == null) - this.inValues.add(null); - else - this.inValues.add(Lists.Value.fromSerialized(buffer, deserializer, options.getProtocolVersion())); - } - } - else if (column.type instanceof MapType) - { - MapType deserializer = MapType.getInstance(collectionType.nameComparator(), collectionType.valueComparator(), false); - for (ByteBuffer buffer : inValuesMarker.bind(options).elements) - { - if (buffer == null) - this.inValues.add(null); - else - this.inValues.add(Maps.Value.fromSerialized(buffer, deserializer, options.getProtocolVersion())); - } - } - else if (column.type instanceof SetType) - { - SetType deserializer = SetType.getInstance(collectionType.valueComparator(), false); - for (ByteBuffer buffer : inValuesMarker.bind(options).elements) - { - if (buffer == null) - this.inValues.add(null); - else - this.inValues.add(Sets.Value.fromSerialized(buffer, deserializer, options.getProtocolVersion())); - } - } - } - else - { - for (Term value : condition.inValues) - this.inValues.add(value.bind(options)); - } - } - - public boolean appliesTo(Composite rowPrefix, ColumnFamily current, final long now) throws InvalidRequestException - { - CollectionType type = (CollectionType)column.type; - CellName name = current.metadata().comparator.create(rowPrefix, column); - if (type.isMultiCell()) - { - // copy iterator contents so that we can properly reuse them for each comparison with an IN value - List cells = newArrayList(collectionColumns(name, current, now)); - for (Term.Terminal value : inValues) - { - if (CollectionBound.valueAppliesTo(type, cells.iterator(), value, Operator.EQ)) - return true; - } - return false; - } - else - { - Cell cell = current.getColumn(name); - for (Term.Terminal value : inValues) - { - if (value == null) - { - if (cell == null || !cell.isLive(now)) - return true; - } - else if (type.compare(((Term.CollectionTerminal)value).getWithProtocolVersion(Server.VERSION_3), cell.value()) == 0) - { - return true; - } - } - return false; - } - } - } -#endif - - class raw final { - private: - ::shared_ptr _value; - std::vector<::shared_ptr> _in_values; - ::shared_ptr _in_marker; - - // Can be nullptr, only used with the syntax "IF m[e] = ..." (in which case it's 'e') - ::shared_ptr _collection_element; - const operator_type& _op; - public: - raw(::shared_ptr value, - std::vector<::shared_ptr> in_values, - ::shared_ptr in_marker, - ::shared_ptr collection_element, - const operator_type& op) - : _value(std::move(value)) - , _in_values(std::move(in_values)) - , _in_marker(std::move(in_marker)) - , _collection_element(std::move(collection_element)) - , _op(op) - { } - - /** A condition on a column. For example: "IF col = 'foo'" */ - static ::shared_ptr simple_condition(::shared_ptr value, const operator_type& op) { - return ::make_shared(std::move(value), std::vector<::shared_ptr>{}, - ::shared_ptr{}, ::shared_ptr{}, op); - } - - /** An IN condition on a column. For example: "IF col IN ('foo', 'bar', ...)" */ - static ::shared_ptr simple_in_condition(std::vector<::shared_ptr> in_values) { - return ::make_shared(::shared_ptr{}, std::move(in_values), - ::shared_ptr{}, ::shared_ptr{}, operator_type::IN); - } - - /** An IN condition on a column with a single marker. For example: "IF col IN ?" */ - static ::shared_ptr simple_in_condition(::shared_ptr in_marker) { - return ::make_shared(::shared_ptr{}, std::vector<::shared_ptr>{}, - std::move(in_marker), ::shared_ptr{}, operator_type::IN); - } - - /** A condition on a collection element. For example: "IF col['key'] = 'foo'" */ - static ::shared_ptr collection_condition(::shared_ptr value, ::shared_ptr collection_element, - const operator_type& op) { - return ::make_shared(std::move(value), std::vector<::shared_ptr>{}, ::shared_ptr{}, std::move(collection_element), op); - } - - /** An IN condition on a collection element. For example: "IF col['key'] IN ('foo', 'bar', ...)" */ - static ::shared_ptr collection_in_condition(::shared_ptr collection_element, - std::vector<::shared_ptr> in_values) { - return ::make_shared(::shared_ptr{}, std::move(in_values), ::shared_ptr{}, - std::move(collection_element), operator_type::IN); - } - - /** An IN condition on a collection element with a single marker. For example: "IF col['key'] IN ?" */ - static ::shared_ptr collection_in_condition(::shared_ptr collection_element, - ::shared_ptr in_marker) { - return ::make_shared(::shared_ptr{}, std::vector<::shared_ptr>{}, std::move(in_marker), - std::move(collection_element), operator_type::IN); - } - - ::shared_ptr prepare(database& db, const sstring& keyspace, const column_definition& receiver); - }; -}; - -} diff --git a/scylla/cql3/column_identifier.cc b/scylla/cql3/column_identifier.cc deleted file mode 100644 index bbbff38..0000000 --- a/scylla/cql3/column_identifier.cc +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "cql3/column_identifier.hh" -#include "exceptions/exceptions.hh" -#include "cql3/selection/simple_selector.hh" - -#include - -namespace cql3 { - -column_identifier::column_identifier(sstring raw_text, bool keep_case) { - _text = std::move(raw_text); - if (!keep_case) { - std::transform(_text.begin(), _text.end(), _text.begin(), ::tolower); - } - bytes_ = to_bytes(_text); -} - -column_identifier::column_identifier(bytes bytes_, data_type type) - : bytes_(std::move(bytes_)) - , _text(type->get_string(this->bytes_)) -{ } - -column_identifier::column_identifier(bytes bytes_, sstring text) - : bytes_(std::move(bytes_)) - , _text(std::move(text)) -{ } - -bool column_identifier::operator==(const column_identifier& other) const { - return bytes_ == other.bytes_; -} - -const sstring& column_identifier::text() const { - return _text; -} - -const bytes& column_identifier::name() const { - return bytes_; -} - -sstring column_identifier::to_string() const { - return _text; -} - -sstring column_identifier::to_cql_string() const { - static const std::regex unquoted_identifier_re("[a-z][a-z0-9_]*"); - if (std::regex_match(_text.begin(), _text.end(), unquoted_identifier_re)) { - return _text; - } - static const std::regex double_quote_re("\""); - std::string result = _text; - std::regex_replace(result, double_quote_re, "\"\""); - return '"' + result + '"'; -} - -column_identifier::raw::raw(sstring raw_text, bool keep_case) - : _raw_text{raw_text} - , _text{raw_text} -{ - if (!keep_case) { - std::transform(_text.begin(), _text.end(), _text.begin(), ::tolower); - } -} - -::shared_ptr column_identifier::raw::prepare(schema_ptr s) { - return prepare_column_identifier(s); -} - -::shared_ptr -column_identifier::raw::prepare_column_identifier(schema_ptr schema) { - if (schema->regular_column_name_type() == utf8_type) { - return ::make_shared(_text, true); - } - - // We have a Thrift-created table with a non-text comparator. We need to parse column names with the comparator - // to get the correct ByteBuffer representation. However, this doesn't apply to key aliases, so we need to - // make a special check for those and treat them normally. See CASSANDRA-8178. - auto text_bytes = to_bytes(_text); - auto def = schema->get_column_definition(text_bytes); - if (def) { - return ::make_shared(std::move(text_bytes), _text); - } - - return ::make_shared(schema->regular_column_name_type()->from_string(_raw_text), _text); -} - -bool column_identifier::raw::processes_selection() const { - return false; -} - -bool column_identifier::raw::operator==(const raw& other) const { - return _text == other._text; -} - -bool column_identifier::raw::operator!=(const raw& other) const { - return !operator==(other); -} - -sstring column_identifier::raw::to_string() const { - return _text; -} - -std::ostream& operator<<(std::ostream& out, const column_identifier::raw& id) { - return out << id._text; -} - -::shared_ptr -column_identifier::new_selector_factory(database& db, schema_ptr schema, std::vector& defs) { - auto def = get_column_definition(schema, *this); - if (!def) { - throw exceptions::invalid_request_exception(sprint("Undefined name %s in selection clause", _text)); - } - - return selection::simple_selector::new_factory(def->name_as_text(), add_and_get_index(*def, defs), def->type); -} - -} - -bool cql3::column_identifier::text_comparator::operator()(const cql3::column_identifier& c1, const cql3::column_identifier& c2) const { - return c1.text() < c2.text(); -} diff --git a/scylla/cql3/column_identifier.hh b/scylla/cql3/column_identifier.hh deleted file mode 100644 index d3e90d3..0000000 --- a/scylla/cql3/column_identifier.hh +++ /dev/null @@ -1,169 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "cql3/selection/selectable.hh" - -#include "schema.hh" - -#include -#include -#include - -namespace cql3 { - -/** - * Represents an identifer for a CQL column definition. - * TODO : should support light-weight mode without text representation for when not interned - */ -class column_identifier final : public selection::selectable { -public: - bytes bytes_; -private: - sstring _text; -public: - // less comparator sorting by text - struct text_comparator { - bool operator()(const column_identifier& c1, const column_identifier& c2) const; - }; - - column_identifier(sstring raw_text, bool keep_case); - - column_identifier(bytes bytes_, data_type type); - - column_identifier(bytes bytes_, sstring text); - - bool operator==(const column_identifier& other) const; - - const sstring& text() const; - - const bytes& name() const; - - sstring to_string() const; - - sstring to_cql_string() const; - - friend std::ostream& operator<<(std::ostream& out, const column_identifier& i) { - return out << i._text; - } - -#if 0 - public ColumnIdentifier clone(AbstractAllocator allocator) - { - return new ColumnIdentifier(allocator.clone(bytes), text); - } -#endif - - virtual ::shared_ptr new_selector_factory(database& db, schema_ptr schema, - std::vector& defs) override; - - class raw; -}; - -/** - * Because Thrift-created tables may have a non-text comparator, we cannot determine the proper 'key' until - * we know the comparator. ColumnIdentifier.Raw is a placeholder that can be converted to a real ColumnIdentifier - * once the comparator is known with prepare(). This should only be used with identifiers that are actual - * column names. See CASSANDRA-8178 for more background. - */ -class column_identifier::raw final : public selectable::raw { -private: - const sstring _raw_text; - sstring _text; -public: - raw(sstring raw_text, bool keep_case); - - virtual ::shared_ptr prepare(schema_ptr s) override; - - ::shared_ptr prepare_column_identifier(schema_ptr s); - - virtual bool processes_selection() const override; - - bool operator==(const raw& other) const; - - bool operator!=(const raw& other) const; - - virtual sstring to_string() const; - - friend std::hash; - friend std::ostream& operator<<(std::ostream& out, const column_identifier::raw& id); -}; - -static inline -const column_definition* get_column_definition(schema_ptr schema, const column_identifier& id) { - return schema->get_column_definition(id.bytes_); -} - -static inline -::shared_ptr to_identifier(const column_definition& def) { - return def.column_specification->name; -} - -static inline -std::vector<::shared_ptr> to_identifiers(const std::vector& defs) { - std::vector<::shared_ptr> r; - r.reserve(defs.size()); - for (auto&& def : defs) { - r.push_back(to_identifier(*def)); - } - return r; -} - -} - -namespace std { - -template<> -struct hash { - size_t operator()(const cql3::column_identifier& i) const { - return std::hash()(i.bytes_); - } -}; - -template<> -struct hash { - size_t operator()(const cql3::column_identifier::raw& r) const { - return std::hash()(r._text); - } -}; - -} diff --git a/scylla/cql3/column_specification.cc b/scylla/cql3/column_specification.cc deleted file mode 100644 index 255e383..0000000 --- a/scylla/cql3/column_specification.cc +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2016 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "cql3/column_specification.hh" - -namespace cql3 { - -bool column_specification::all_in_same_table(const std::vector<::shared_ptr>& names) -{ - assert(!names.empty()); - - auto first = names.front(); - return std::all_of(std::next(names.begin()), names.end(), [first] (auto&& spec) { - return spec->ks_name == first->ks_name && spec->cf_name == first->cf_name; - }); -} - -} diff --git a/scylla/cql3/column_specification.hh b/scylla/cql3/column_specification.hh deleted file mode 100644 index c3ab3f7..0000000 --- a/scylla/cql3/column_specification.hh +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "types.hh" - -namespace cql3 { - -class column_specification; -class column_identifier; - -class column_specification final { -public: - const sstring ks_name; - const sstring cf_name; - const ::shared_ptr name; - const data_type type; - - column_specification(sstring ks_name_, sstring cf_name_, ::shared_ptr name_, data_type type_) - : ks_name(std::move(ks_name_)) - , cf_name(std::move(cf_name_)) - , name(name_) - , type(type_) - { } - - /** - * Returns a new ColumnSpecification for the same column but with the specified alias. - * - * @param alias the column alias - * @return a new ColumnSpecification for the same column but with the specified alias. - */ - ::shared_ptr with_alias(::shared_ptr alias) { - return ::make_shared(ks_name, cf_name, alias, type); - } - - bool is_reversed_type() const { - return ::dynamic_pointer_cast(type) != nullptr; - } - - static bool all_in_same_table(const std::vector<::shared_ptr>& names); -}; - -} diff --git a/scylla/cql3/constants.cc b/scylla/cql3/constants.cc deleted file mode 100644 index 42c2e9d..0000000 --- a/scylla/cql3/constants.cc +++ /dev/null @@ -1,173 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "cql3/constants.hh" -#include "cql3/cql3_type.hh" - -namespace cql3 { - -thread_local const ::shared_ptr constants::UNSET_VALUE = ::make_shared(cql3::raw_value::make_unset_value()); -thread_local const ::shared_ptr constants::NULL_LITERAL = ::make_shared(); -thread_local const ::shared_ptr constants::null_literal::NULL_VALUE = ::make_shared(); - -std::ostream& -operator<<(std::ostream&out, constants::type t) -{ - switch (t) { - case constants::type::STRING: return out << "STRING"; - case constants::type::INTEGER: return out << "INTEGER"; - case constants::type::UUID: return out << "UUID"; - case constants::type::FLOAT: return out << "FLOAT"; - case constants::type::BOOLEAN: return out << "BOOLEAN"; - case constants::type::HEX: return out << "HEX"; - }; - assert(0); -} - -bytes -constants::literal::parsed_value(data_type validator) -{ - try { - if (_type == type::HEX && validator == bytes_type) { - auto v = static_cast(_text); - v.remove_prefix(2); - return validator->from_string(v); - } - if (validator->is_counter()) { - return long_type->from_string(_text); - } - return validator->from_string(_text); - } catch (const marshal_exception& e) { - throw exceptions::invalid_request_exception(e.what()); - } -} - -assignment_testable::test_result -constants::literal::test_assignment(database& db, const sstring& keyspace, ::shared_ptr receiver) -{ - auto receiver_type = receiver->type->as_cql3_type(); - if (receiver_type->is_collection()) { - return test_result::NOT_ASSIGNABLE; - } - if (!receiver_type->is_native()) { - return test_result::WEAKLY_ASSIGNABLE; - } - auto kind = receiver_type.get()->get_kind(); - switch (_type) { - case type::STRING: - if (cql3_type::kind_enum_set::frozen< - cql3_type::kind::ASCII, - cql3_type::kind::TEXT, - cql3_type::kind::INET, - cql3_type::kind::VARCHAR, - cql3_type::kind::TIMESTAMP, - cql3_type::kind::DATE, - cql3_type::kind::TIME>::contains(kind)) { - return assignment_testable::test_result::WEAKLY_ASSIGNABLE; - } - break; - case type::INTEGER: - if (cql3_type::kind_enum_set::frozen< - cql3_type::kind::BIGINT, - cql3_type::kind::COUNTER, - cql3_type::kind::DECIMAL, - cql3_type::kind::DOUBLE, - cql3_type::kind::FLOAT, - cql3_type::kind::INT, - cql3_type::kind::SMALLINT, - cql3_type::kind::TIMESTAMP, - cql3_type::kind::DATE, - cql3_type::kind::TINYINT, - cql3_type::kind::VARINT>::contains(kind)) { - return assignment_testable::test_result::WEAKLY_ASSIGNABLE; - } - break; - case type::UUID: - if (cql3_type::kind_enum_set::frozen< - cql3_type::kind::UUID, - cql3_type::kind::TIMEUUID>::contains(kind)) { - return assignment_testable::test_result::WEAKLY_ASSIGNABLE; - } - break; - case type::FLOAT: - if (cql3_type::kind_enum_set::frozen< - cql3_type::kind::DECIMAL, - cql3_type::kind::DOUBLE, - cql3_type::kind::FLOAT>::contains(kind)) { - return assignment_testable::test_result::WEAKLY_ASSIGNABLE; - } - break; - case type::BOOLEAN: - if (kind == cql3_type::kind_enum_set::prepare()) { - return assignment_testable::test_result::WEAKLY_ASSIGNABLE; - } - break; - case type::HEX: - if (kind == cql3_type::kind_enum_set::prepare()) { - return assignment_testable::test_result::WEAKLY_ASSIGNABLE; - } - break; - } - return assignment_testable::test_result::NOT_ASSIGNABLE; -} - -::shared_ptr -constants::literal::prepare(database& db, const sstring& keyspace, ::shared_ptr receiver) -{ - if (!is_assignable(test_assignment(db, keyspace, receiver))) { - throw exceptions::invalid_request_exception(sprint("Invalid %s constant (%s) for \"%s\" of type %s", - _type, _text, *receiver->name, receiver->type->as_cql3_type()->to_string())); - } - return ::make_shared(cql3::raw_value::make_value(parsed_value(receiver->type))); -} - -void constants::deleter::execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) { - if (column.type->is_multi_cell()) { - collection_type_impl::mutation coll_m; - coll_m.tomb = params.make_tombstone(); - auto ctype = static_pointer_cast(column.type); - m.set_cell(prefix, column, atomic_cell_or_collection::from_collection_mutation(ctype->serialize_mutation_form(coll_m))); - } else { - m.set_cell(prefix, column, make_dead_cell(params)); - } -} - -} diff --git a/scylla/cql3/constants.hh b/scylla/cql3/constants.hh deleted file mode 100644 index bf0d813..0000000 --- a/scylla/cql3/constants.hh +++ /dev/null @@ -1,255 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "cql3/abstract_marker.hh" -#include "cql3/update_parameters.hh" -#include "cql3/operation.hh" -#include "cql3/values.hh" -#include "cql3/term.hh" -#include "core/shared_ptr.hh" - -namespace cql3 { - -/** - * Static helper methods and classes for constants. - */ -class constants { -public: -#if 0 - private static final Logger logger = LoggerFactory.getLogger(Constants.class); -#endif -public: - enum class type { - STRING, INTEGER, UUID, FLOAT, BOOLEAN, HEX - }; - - /** - * A constant value, i.e. a ByteBuffer. - */ - class value : public terminal { - public: - cql3::raw_value _bytes; - value(cql3::raw_value bytes_) : _bytes(std::move(bytes_)) {} - virtual cql3::raw_value get(const query_options& options) override { return _bytes; } - virtual cql3::raw_value_view bind_and_get(const query_options& options) override { return _bytes.to_view(); } - virtual sstring to_string() const override { return to_hex(*_bytes); } - }; - - static thread_local const ::shared_ptr UNSET_VALUE; - - class null_literal final : public term::raw { - private: - class null_value final : public value { - public: - null_value() : value(cql3::raw_value::make_null()) {} - virtual ::shared_ptr bind(const query_options& options) override { return {}; } - virtual sstring to_string() const override { return "null"; } - }; - static thread_local const ::shared_ptr NULL_VALUE; - public: - virtual ::shared_ptr prepare(database& db, const sstring& keyspace, ::shared_ptr receiver) override { - if (!is_assignable(test_assignment(db, keyspace, receiver))) { - throw exceptions::invalid_request_exception("Invalid null value for counter increment/decrement"); - } - return NULL_VALUE; - } - - virtual assignment_testable::test_result test_assignment(database& db, - const sstring& keyspace, - ::shared_ptr receiver) override { - return receiver->type->is_counter() - ? assignment_testable::test_result::NOT_ASSIGNABLE - : assignment_testable::test_result::WEAKLY_ASSIGNABLE; - } - - virtual sstring to_string() const override { - return "null"; - } - }; - - static thread_local const ::shared_ptr NULL_LITERAL; - - class literal : public term::raw { - private: - const type _type; - const sstring _text; - public: - literal(type type_, sstring text) - : _type{type_} - , _text{text} - { } - - static ::shared_ptr string(sstring text) { - // This is a workaround for antlr3 not distinguishing between - // calling in lexer setText() with an empty string and not calling - // setText() at all. - if (text.size() == 1 && text[0] == -1) { - text.reset(); - } - return ::make_shared(type::STRING, text); - } - - static ::shared_ptr integer(sstring text) { - return ::make_shared(type::INTEGER, text); - } - - static ::shared_ptr floating_point(sstring text) { - return ::make_shared(type::FLOAT, text); - } - - static ::shared_ptr uuid(sstring text) { - return ::make_shared(type::UUID, text); - } - - static ::shared_ptr bool_(sstring text) { - return ::make_shared(type::BOOLEAN, text); - } - - static ::shared_ptr hex(sstring text) { - return ::make_shared(type::HEX, text); - } - - virtual ::shared_ptr prepare(database& db, const sstring& keyspace, ::shared_ptr receiver); - private: - bytes parsed_value(data_type validator); - public: - const sstring& get_raw_text() { - return _text; - } - - virtual assignment_testable::test_result test_assignment(database& db, const sstring& keyspace, ::shared_ptr receiver); - - virtual sstring to_string() const override { - return _type == type::STRING ? sstring(sprint("'%s'", _text)) : _text; - } - }; - - class marker : public abstract_marker { - public: - marker(int32_t bind_index, ::shared_ptr receiver) - : abstract_marker{bind_index, std::move(receiver)} - { - assert(!_receiver->type->is_collection()); - } - - virtual cql3::raw_value_view bind_and_get(const query_options& options) override { - try { - auto value = options.get_value_at(_bind_index); - if (value) { - _receiver->type->validate(*value); - } - return value; - } catch (const marshal_exception& e) { - throw exceptions::invalid_request_exception(e.what()); - } - } - - virtual ::shared_ptr bind(const query_options& options) override { - auto bytes = bind_and_get(options); - if (!bytes) { - return ::shared_ptr{}; - } - return ::make_shared(std::move(cql3::raw_value::make_value(to_bytes(*bytes)))); - } - }; - - class setter : public operation { - public: - using operation::operation; - - virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) override { - auto value = _t->bind_and_get(params._options); - if (value.is_null()) { - m.set_cell(prefix, column, std::move(make_dead_cell(params))); - } else if (value.is_value()) { - m.set_cell(prefix, column, std::move(make_cell(*value, params))); - } - } - }; - - struct adder final : operation { - using operation::operation; - - virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) override { - auto value = _t->bind_and_get(params._options); - if (value.is_null()) { - throw exceptions::invalid_request_exception("Invalid null value for counter increment"); - } else if (value.is_unset_value()) { - return; - } - auto increment = value_cast(long_type->deserialize_value(*value)); - m.set_cell(prefix, column, make_counter_update_cell(increment, params)); - } - }; - - struct subtracter final : operation { - using operation::operation; - - virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) override { - auto value = _t->bind_and_get(params._options); - if (value.is_null()) { - throw exceptions::invalid_request_exception("Invalid null value for counter increment"); - } else if (value.is_unset_value()) { - return; - } - auto increment = value_cast(long_type->deserialize_value(*value)); - if (increment == std::numeric_limits::min()) { - throw exceptions::invalid_request_exception(sprint("The negation of %d overflows supported counter precision (signed 8 bytes integer)", increment)); - } - m.set_cell(prefix, column, make_counter_update_cell(-increment, params)); - } - }; - - class deleter : public operation { - public: - deleter(const column_definition& column) - : operation(column, {}) - { } - - virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) override; - }; -}; - -std::ostream& operator<<(std::ostream&out, constants::type t); - -} diff --git a/scylla/cql3/cql3_type.cc b/scylla/cql3/cql3_type.cc deleted file mode 100644 index 56a4bfb..0000000 --- a/scylla/cql3/cql3_type.cc +++ /dev/null @@ -1,395 +0,0 @@ -/* - * Copyright (C) 2014 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include -#include -#include - -#include "cql3_type.hh" -#include "cql3/util.hh" -#include "ut_name.hh" - -namespace cql3 { - -sstring cql3_type::to_string() const { - if (_type->is_user_type()) { - return "frozen<" + util::maybe_quote(_name) + ">"; - } - if (_type->is_tuple()) { - return "frozen<" + _name + ">"; - } - return _name; -} - -shared_ptr cql3_type::raw::prepare(database& db, const sstring& keyspace) { - try { - auto&& ks = db.find_keyspace(keyspace); - return prepare_internal(keyspace, ks.metadata()->user_types()); - } catch (no_such_keyspace& nsk) { - throw exceptions::invalid_request_exception("Unknown keyspace " + keyspace); - } -} - -bool cql3_type::raw::references_user_type(const sstring& name) const { - return false; -} - -class cql3_type::raw_type : public raw { -private: - shared_ptr _type; -public: - raw_type(shared_ptr type) - : _type{type} - { } -public: - virtual shared_ptr prepare(database& db, const sstring& keyspace) { - return _type; - } - shared_ptr prepare_internal(const sstring&, lw_shared_ptr) override { - return _type; - } - - virtual bool supports_freezing() const { - return false; - } - - virtual bool is_counter() const { - return _type->is_counter(); - } - - virtual sstring to_string() const { - return _type->to_string(); - } -}; - -class cql3_type::raw_collection : public raw { - const collection_type_impl::kind* _kind; - shared_ptr _keys; - shared_ptr _values; -public: - raw_collection(const collection_type_impl::kind* kind, shared_ptr keys, shared_ptr values) - : _kind(kind), _keys(std::move(keys)), _values(std::move(values)) { - } - - virtual void freeze() override { - if (_keys && _keys->supports_freezing()) { - _keys->freeze(); - } - if (_values && _values->supports_freezing()) { - _values->freeze(); - } - _frozen = true; - } - - virtual bool supports_freezing() const override { - return true; - } - - virtual bool is_collection() const override { - return true; - } - - virtual shared_ptr prepare_internal(const sstring& keyspace, lw_shared_ptr user_types) override { - assert(_values); // "Got null values type for a collection"; - - if (!_frozen && _values->supports_freezing() && !_values->_frozen) { - throw exceptions::invalid_request_exception(sprint("Non-frozen collections are not allowed inside collections: %s", *this)); - } - if (_values->is_counter()) { - throw exceptions::invalid_request_exception(sprint("Counters are not allowed inside collections: %s", *this)); - } - - if (_keys) { - if (!_frozen && _keys->supports_freezing() && !_keys->_frozen) { - throw exceptions::invalid_request_exception(sprint("Non-frozen collections are not allowed inside collections: %s", *this)); - } - } - - if (_kind == &collection_type_impl::kind::list) { - return make_shared(cql3_type(to_string(), list_type_impl::get_instance(_values->prepare_internal(keyspace, user_types)->get_type(), !_frozen), false)); - } else if (_kind == &collection_type_impl::kind::set) { - return make_shared(cql3_type(to_string(), set_type_impl::get_instance(_values->prepare_internal(keyspace, user_types)->get_type(), !_frozen), false)); - } else if (_kind == &collection_type_impl::kind::map) { - assert(_keys); // "Got null keys type for a collection"; - return make_shared(cql3_type(to_string(), map_type_impl::get_instance(_keys->prepare_internal(keyspace, user_types)->get_type(), _values->prepare_internal(keyspace, user_types)->get_type(), !_frozen), false)); - } - abort(); - } - - bool references_user_type(const sstring& name) const override { - return (_keys && _keys->references_user_type(name)) || _values->references_user_type(name); - } - - virtual sstring to_string() const override { - sstring start = _frozen ? "frozen<" : ""; - sstring end = _frozen ? ">" : ""; - if (_kind == &collection_type_impl::kind::list) { - return sprint("%slist<%s>%s", start, _values, end); - } else if (_kind == &collection_type_impl::kind::set) { - return sprint("%sset<%s>%s", start, _values, end); - } else if (_kind == &collection_type_impl::kind::map) { - return sprint("%smap<%s, %s>%s", start, _keys, _values, end); - } - abort(); - } -}; - -class cql3_type::raw_ut : public raw { - ut_name _name; -public: - raw_ut(ut_name name) - : _name(std::move(name)) { - } - - virtual std::experimental::optional keyspace() const override { - return _name.get_keyspace(); - } - - virtual void freeze() override { - _frozen = true; - } - - virtual shared_ptr prepare_internal(const sstring& keyspace, lw_shared_ptr user_types) override { - if (_name.has_keyspace()) { - // The provided keyspace is the one of the current statement this is part of. If it's different from the keyspace of - // the UTName, we reject since we want to limit user types to their own keyspace (see #6643) - if (!keyspace.empty() && keyspace != _name.get_keyspace()) { - throw exceptions::invalid_request_exception(sprint("Statement on keyspace %s cannot refer to a user type in keyspace %s; " - "user types can only be used in the keyspace they are defined in", - keyspace, _name.get_keyspace())); - } - } else { - _name.set_keyspace(keyspace); - } - if (!user_types) { - // bootstrap mode. - throw exceptions::invalid_request_exception(sprint("Unknown type %s", _name)); - } - try { - auto&& type = user_types->get_type(_name.get_user_type_name()); - if (!_frozen) { - throw exceptions::invalid_request_exception("Non-frozen User-Defined types are not supported, please use frozen<>"); - } - return make_shared(_name.to_string(), std::move(type)); - } catch (std::out_of_range& e) { - throw exceptions::invalid_request_exception(sprint("Unknown type %s", _name)); - } - } - bool references_user_type(const sstring& name) const override { - return _name.get_string_type_name() == name; - } - virtual bool supports_freezing() const override { - return true; - } - - virtual sstring to_string() const override { - return _name.to_string(); - } -}; - - -class cql3_type::raw_tuple : public raw { - std::vector> _types; -public: - raw_tuple(std::vector> types) - : _types(std::move(types)) { - } - virtual bool supports_freezing() const override { - return true; - } - virtual bool is_collection() const override { - return false; - } - virtual void freeze() override { - for (auto&& t : _types) { - if (t->supports_freezing()) { - t->freeze(); - } - } - _frozen = true; - } - virtual shared_ptr prepare_internal(const sstring& keyspace, lw_shared_ptr user_types) override { - if (!_frozen) { - freeze(); - } - std::vector ts; - for (auto&& t : _types) { - if (t->is_counter()) { - throw exceptions::invalid_request_exception("Counters are not allowed inside tuples"); - } - ts.push_back(t->prepare_internal(keyspace, user_types)->get_type()); - } - return make_cql3_tuple_type(tuple_type_impl::get_instance(std::move(ts))); - } - - bool references_user_type(const sstring& name) const override { - return std::any_of(_types.begin(), _types.end(), [&name](auto t) { - return t->references_user_type(name); - }); - } - - virtual sstring to_string() const override { - return sprint("tuple<%s>", join(", ", _types)); - } -}; - -bool -cql3_type::raw::is_collection() const { - return false; -} - -bool -cql3_type::raw::is_counter() const { - return false; -} - -std::experimental::optional -cql3_type::raw::keyspace() const { - return std::experimental::nullopt; -} - -void -cql3_type::raw::freeze() { - sstring message = sprint("frozen<> is only allowed on collections, tuples, and user-defined types (got %s)", to_string()); - throw exceptions::invalid_request_exception(message); -} - -shared_ptr -cql3_type::raw::from(shared_ptr type) { - return ::make_shared(type); -} - -shared_ptr -cql3_type::raw::user_type(ut_name name) { - return ::make_shared(name); -} - -shared_ptr -cql3_type::raw::map(shared_ptr t1, shared_ptr t2) { - return make_shared(raw_collection(&collection_type_impl::kind::map, std::move(t1), std::move(t2))); -} - -shared_ptr -cql3_type::raw::list(shared_ptr t) { - return make_shared(raw_collection(&collection_type_impl::kind::list, {}, std::move(t))); -} - -shared_ptr -cql3_type::raw::set(shared_ptr t) { - return make_shared(raw_collection(&collection_type_impl::kind::set, {}, std::move(t))); -} - -shared_ptr -cql3_type::raw::tuple(std::vector> ts) { - return make_shared(raw_tuple(std::move(ts))); -} - -shared_ptr -cql3_type::raw::frozen(shared_ptr t) { - t->freeze(); - return t; -} - -thread_local shared_ptr cql3_type::ascii = make("ascii", ascii_type, cql3_type::kind::ASCII); -thread_local shared_ptr cql3_type::bigint = make("bigint", long_type, cql3_type::kind::BIGINT); -thread_local shared_ptr cql3_type::blob = make("blob", bytes_type, cql3_type::kind::BLOB); -thread_local shared_ptr cql3_type::boolean = make("boolean", boolean_type, cql3_type::kind::BOOLEAN); -thread_local shared_ptr cql3_type::double_ = make("double", double_type, cql3_type::kind::DOUBLE); -thread_local shared_ptr cql3_type::empty = make("empty", empty_type, cql3_type::kind::EMPTY); -thread_local shared_ptr cql3_type::float_ = make("float", float_type, cql3_type::kind::FLOAT); -thread_local shared_ptr cql3_type::int_ = make("int", int32_type, cql3_type::kind::INT); -thread_local shared_ptr cql3_type::smallint = make("smallint", short_type, cql3_type::kind::SMALLINT); -thread_local shared_ptr cql3_type::text = make("text", utf8_type, cql3_type::kind::TEXT); -thread_local shared_ptr cql3_type::timestamp = make("timestamp", timestamp_type, cql3_type::kind::TIMESTAMP); -thread_local shared_ptr cql3_type::tinyint = make("tinyint", byte_type, cql3_type::kind::TINYINT); -thread_local shared_ptr cql3_type::uuid = make("uuid", uuid_type, cql3_type::kind::UUID); -thread_local shared_ptr cql3_type::varchar = make("varchar", utf8_type, cql3_type::kind::TEXT); -thread_local shared_ptr cql3_type::timeuuid = make("timeuuid", timeuuid_type, cql3_type::kind::TIMEUUID); -thread_local shared_ptr cql3_type::date = make("date", simple_date_type, cql3_type::kind::DATE); -thread_local shared_ptr cql3_type::time = make("time", time_type, cql3_type::kind::TIME); -thread_local shared_ptr cql3_type::inet = make("inet", inet_addr_type, cql3_type::kind::INET); -thread_local shared_ptr cql3_type::varint = make("varint", varint_type, cql3_type::kind::VARINT); -thread_local shared_ptr cql3_type::decimal = make("decimal", decimal_type, cql3_type::kind::DECIMAL); -thread_local shared_ptr cql3_type::counter = make("counter", counter_type, cql3_type::kind::COUNTER); - -const std::vector>& -cql3_type::values() { - static thread_local std::vector> v = { - cql3_type::ascii, - cql3_type::bigint, - cql3_type::blob, - cql3_type::boolean, - cql3_type::counter, - cql3_type::decimal, - cql3_type::double_, - cql3_type::empty, - cql3_type::float_, - cql3_type::inet, - cql3_type::int_, - cql3_type::smallint, - cql3_type::text, - cql3_type::timestamp, - cql3_type::tinyint, - cql3_type::uuid, - cql3_type::varchar, - cql3_type::varint, - cql3_type::timeuuid, - cql3_type::date, - cql3_type::time, - }; - return v; -} - -shared_ptr -make_cql3_tuple_type(tuple_type t) { - auto name = sprint("tuple<%s>", - ::join(", ", - t->all_types() - | boost::adaptors::transformed(std::mem_fn(&abstract_type::as_cql3_type)))); - return ::make_shared(std::move(name), std::move(t), false); -} - - -std::ostream& -operator<<(std::ostream& os, const cql3_type::raw& r) { - return os << r.to_string(); -} - -namespace util { - -sstring maybe_quote(const sstring& s) { - static const std::regex unquoted("\\w*"); - static const std::regex double_quote("\""); - - if (std::regex_match(s.begin(), s.end(), unquoted)) { - return s; - } - std::ostringstream ss; - ss << "\""; - std::regex_replace(std::ostreambuf_iterator(ss), s.begin(), s.end(), double_quote, "\"\""); - ss << "\""; - return ss.str(); -} - -} - -} - diff --git a/scylla/cql3/cql3_type.hh b/scylla/cql3/cql3_type.hh deleted file mode 100644 index 17d7a84..0000000 --- a/scylla/cql3/cql3_type.hh +++ /dev/null @@ -1,391 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Modified by ScyllaDB - * - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "types.hh" -#include "exceptions/exceptions.hh" -#include -#include "enum_set.hh" - -class database; -class user_types_metadata; - -namespace cql3 { - -class ut_name; - -class cql3_type final { - sstring _name; - data_type _type; - bool _native; -public: - cql3_type(sstring name, data_type type, bool native = true) - : _name(std::move(name)), _type(std::move(type)), _native(native) {} - bool is_collection() const { return _type->is_collection(); } - bool is_counter() const { return _type->is_counter(); } - bool is_native() const { return _native; } - data_type get_type() const { return _type; } - sstring to_string() const; - - // For UserTypes, we need to know the current keyspace to resolve the - // actual type used, so Raw is a "not yet prepared" CQL3Type. - class raw { - public: - virtual ~raw() {} - bool _frozen = false; - virtual bool supports_freezing() const = 0; - virtual bool is_collection() const; - virtual bool is_counter() const; - virtual bool references_user_type(const sstring&) const; - virtual std::experimental::optional keyspace() const; - virtual void freeze(); - virtual shared_ptr prepare_internal(const sstring& keyspace, lw_shared_ptr) = 0; - virtual shared_ptr prepare(database& db, const sstring& keyspace); - static shared_ptr from(shared_ptr type); - static shared_ptr user_type(ut_name name); - static shared_ptr map(shared_ptr t1, shared_ptr t2); - static shared_ptr list(shared_ptr t); - static shared_ptr set(shared_ptr t); - static shared_ptr tuple(std::vector> ts); - static shared_ptr frozen(shared_ptr t); - virtual sstring to_string() const = 0; - friend std::ostream& operator<<(std::ostream& os, const raw& r); - }; - -private: - class raw_type; - class raw_collection; - class raw_ut; - class raw_tuple; - friend std::ostream& operator<<(std::ostream& os, const cql3_type& t) { - return os << t.to_string(); - } - -public: - enum class kind : int8_t { - ASCII, BIGINT, BLOB, BOOLEAN, COUNTER, DECIMAL, DOUBLE, EMPTY, FLOAT, INT, SMALLINT, TINYINT, INET, TEXT, TIMESTAMP, UUID, VARCHAR, VARINT, TIMEUUID, DATE, TIME - }; - using kind_enum = super_enum; - using kind_enum_set = enum_set; -private: - std::experimental::optional _kind; - static shared_ptr make(sstring name, data_type type, kind kind_) { - return make_shared(std::move(name), std::move(type), kind_); - } -public: - static thread_local shared_ptr ascii; - static thread_local shared_ptr bigint; - static thread_local shared_ptr blob; - static thread_local shared_ptr boolean; - static thread_local shared_ptr double_; - static thread_local shared_ptr empty; - static thread_local shared_ptr float_; - static thread_local shared_ptr int_; - static thread_local shared_ptr smallint; - static thread_local shared_ptr text; - static thread_local shared_ptr timestamp; - static thread_local shared_ptr tinyint; - static thread_local shared_ptr uuid; - static thread_local shared_ptr varchar; - static thread_local shared_ptr timeuuid; - static thread_local shared_ptr date; - static thread_local shared_ptr time; - static thread_local shared_ptr inet; - static thread_local shared_ptr varint; - static thread_local shared_ptr decimal; - static thread_local shared_ptr counter; - - static const std::vector>& values(); -public: - cql3_type(sstring name, data_type type, kind kind_) - : _name(std::move(name)), _type(std::move(type)), _native(true), _kind(kind_enum_set::prepare(kind_)) { - } - kind_enum_set::prepared get_kind() const { - assert(_kind); - return *_kind; - } -}; - -shared_ptr make_cql3_tuple_type(tuple_type t); - -#if 0 - public static class Custom implements CQL3Type - { - private final AbstractType type; - - public Custom(AbstractType type) - { - this.type = type; - } - - public Custom(String className) throws SyntaxException, ConfigurationException - { - this(TypeParser.parse(className)); - } - - public boolean isCollection() - { - return false; - } - - public AbstractType getType() - { - return type; - } - - @Override - public final boolean equals(Object o) - { - if(!(o instanceof Custom)) - return false; - - Custom that = (Custom)o; - return type.equals(that.type); - } - - @Override - public final int hashCode() - { - return type.hashCode(); - } - - @Override - public String toString() - { - return "'" + type + "'"; - } - } - - public static class Collection implements CQL3Type - { - private final CollectionType type; - - public Collection(CollectionType type) - { - this.type = type; - } - - public AbstractType getType() - { - return type; - } - - public boolean isCollection() - { - return true; - } - - @Override - public final boolean equals(Object o) - { - if(!(o instanceof Collection)) - return false; - - Collection that = (Collection)o; - return type.equals(that.type); - } - - @Override - public final int hashCode() - { - return type.hashCode(); - } - - @Override - public String toString() - { - boolean isFrozen = !this.type.isMultiCell(); - StringBuilder sb = new StringBuilder(isFrozen ? "frozen<" : ""); - switch (type.kind) - { - case LIST: - AbstractType listType = ((ListType)type).getElementsType(); - sb.append("list<").append(listType.asCQL3Type()); - break; - case SET: - AbstractType setType = ((SetType)type).getElementsType(); - sb.append("set<").append(setType.asCQL3Type()); - break; - case MAP: - AbstractType keysType = ((MapType)type).getKeysType(); - AbstractType valuesType = ((MapType)type).getValuesType(); - sb.append("map<").append(keysType.asCQL3Type()).append(", ").append(valuesType.asCQL3Type()); - break; - default: - throw new AssertionError(); - } - sb.append(">"); - if (isFrozen) - sb.append(">"); - return sb.toString(); - } - } - - public static class UserDefined implements CQL3Type - { - // Keeping this separatly from type just to simplify toString() - private final String name; - private final UserType type; - - private UserDefined(String name, UserType type) - { - this.name = name; - this.type = type; - } - - public static UserDefined create(UserType type) - { - return new UserDefined(UTF8Type.instance.compose(type.name), type); - } - - public boolean isCollection() - { - return false; - } - - public AbstractType getType() - { - return type; - } - - @Override - public final boolean equals(Object o) - { - if(!(o instanceof UserDefined)) - return false; - - UserDefined that = (UserDefined)o; - return type.equals(that.type); - } - - @Override - public final int hashCode() - { - return type.hashCode(); - } - - @Override - public String toString() - { - return name; - } - } - - public static class Tuple implements CQL3Type - { - private final TupleType type; - - private Tuple(TupleType type) - { - this.type = type; - } - - public static Tuple create(TupleType type) - { - return new Tuple(type); - } - - public boolean isCollection() - { - return false; - } - - public AbstractType getType() - { - return type; - } - - @Override - public final boolean equals(Object o) - { - if(!(o instanceof Tuple)) - return false; - - Tuple that = (Tuple)o; - return type.equals(that.type); - } - - @Override - public final int hashCode() - { - return type.hashCode(); - } - - @Override - public String toString() - { - StringBuilder sb = new StringBuilder(); - sb.append("tuple<"); - for (int i = 0; i < type.size(); i++) - { - if (i > 0) - sb.append(", "); - sb.append(type.type(i).asCQL3Type()); - } - sb.append(">"); - return sb.toString(); - } - } -#endif - -} diff --git a/scylla/cql3/cql_statement.hh b/scylla/cql3/cql_statement.hh deleted file mode 100644 index 8a5e0cb..0000000 --- a/scylla/cql3/cql_statement.hh +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2014 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "service/client_state.hh" -#include "service/query_state.hh" -#include "service/storage_proxy.hh" -#include "cql3/query_options.hh" - -namespace cql_transport { - -namespace messages { - -class result_message; - -} - -} - -namespace cql3 { - -class metadata; -shared_ptr make_empty_metadata(); - -class cql_statement { -public: - virtual ~cql_statement() - { } - - virtual uint32_t get_bound_terms() = 0; - - /** - * Perform any access verification necessary for the statement. - * - * @param state the current client state - */ - virtual future<> check_access(const service::client_state& state) = 0; - - /** - * Perform additional validation required by the statment. - * To be overriden by subclasses if needed. - * - * @param state the current client state - */ - virtual void validate(distributed& proxy, const service::client_state& state) = 0; - - /** - * Execute the statement and return the resulting result or null if there is no result. - * - * @param state the current query state - * @param options options for this query (consistency, variables, pageSize, ...) - */ - virtual future<::shared_ptr> - execute(distributed& proxy, service::query_state& state, const query_options& options) = 0; - - /** - * Variant of execute used for internal query against the system tables, and thus only query the local node = 0. - * - * @param state the current query state - */ - virtual future<::shared_ptr> - execute_internal(distributed& proxy, service::query_state& state, const query_options& options) = 0; - - virtual bool uses_function(const sstring& ks_name, const sstring& function_name) const = 0; - - virtual bool depends_on_keyspace(const sstring& ks_name) const = 0; - - virtual bool depends_on_column_family(const sstring& cf_name) const = 0; - - virtual shared_ptr get_result_metadata() const = 0; -}; - -class cql_statement_no_metadata : public cql_statement { -public: - virtual shared_ptr get_result_metadata() const override { - return make_empty_metadata(); - } -}; - -} diff --git a/scylla/cql3/error_collector.hh b/scylla/cql3/error_collector.hh deleted file mode 100644 index c7ce4fc..0000000 --- a/scylla/cql3/error_collector.hh +++ /dev/null @@ -1,356 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "cql3/error_listener.hh" -#include "exceptions/exceptions.hh" -#include "types.hh" - -namespace cql3 { - -/** - * ErrorListener that collect and enhance the errors send by the CQL lexer and parser. - */ -template -class error_collector : public error_listener { - /** - * The offset of the first token of the snippet. - */ - static const int32_t FIRST_TOKEN_OFFSET = 10; - - /** - * The offset of the last token of the snippet. - */ - static const int32_t LAST_TOKEN_OFFSET = 2; - - /** - * The CQL query. - */ - const sstring_view _query; - - /** - * The error messages. - */ - std::vector _error_msgs; -public: - - /** - * Creates a new ErrorCollector instance to collect the syntax errors associated to the specified CQL - * query. - * - * @param query the CQL query that will be parsed - */ - error_collector(const sstring_view& query) : _query(query) {} - - virtual void syntax_error(RecognizerType& recognizer, ANTLR_UINT8** token_names, ExceptionBaseType* ex) override { - auto hdr = get_error_header(ex); - auto msg = get_error_message(recognizer, ex, token_names); - std::stringstream result; - result << hdr << ' ' << msg; -#if 0 - if (recognizer instanceof Parser) - appendQuerySnippet((Parser) recognizer, builder); -#endif - _error_msgs.emplace_back(result.str()); - } - - virtual void syntax_error(RecognizerType& recognizer, const sstring& msg) override { - _error_msgs.emplace_back(msg); - } - - /** - * Throws the first syntax error found by the lexer or the parser if it exists. - * - * @throws SyntaxException the syntax error. - */ - void throw_first_syntax_error() { - if (!_error_msgs.empty()) { - throw exceptions::syntax_exception(_error_msgs[0]); - } - } - -private: - std::string get_error_header(ExceptionBaseType* ex) { - std::stringstream result; - result << "line " << ex->get_line() << ":" << ex->get_charPositionInLine(); - return result.str(); - } - - std::string get_error_message(RecognizerType& recognizer, ExceptionBaseType* ex, ANTLR_UINT8** token_names) - { - using namespace antlr3; - std::stringstream msg; - switch (ex->getType()) { - case ExceptionType::UNWANTED_TOKEN_EXCEPTION: { - msg << "extraneous input " << get_token_error_display(recognizer, ex->get_token()); - if (token_names != nullptr) { - std::string token_name; - if (recognizer.is_eof_token(ex->get_expecting())) { - token_name = "EOF"; - } else { - token_name = reinterpret_cast(token_names[ex->get_expecting()]); - } - msg << " expecting " << token_name; - } - break; - } - case ExceptionType::MISSING_TOKEN_EXCEPTION: { - std::string token_name; - if (token_names == nullptr) { - token_name = "(" + std::to_string(ex->get_expecting()) + ")"; - } else { - if (recognizer.is_eof_token(ex->get_expecting())) { - token_name = "EOF"; - } else { - token_name = reinterpret_cast(token_names[ex->get_expecting()]); - } - } - msg << "missing " << token_name << " at " << get_token_error_display(recognizer, ex->get_token()); - break; - } - case ExceptionType::NO_VIABLE_ALT_EXCEPTION: { - msg << "no viable alternative at input " << get_token_error_display(recognizer, ex->get_token()); - break; - } - default: - ex->displayRecognitionError(token_names, msg); - } - return msg.str(); - } - - std::string get_token_error_display(RecognizerType& recognizer, const TokenType* token) - { - return "'" + recognizer.token_text(token) + "'"; - } - -#if 0 - - /** - * Appends a query snippet to the message to help the user to understand the problem. - * - * @param parser the parser used to parse the query - * @param builder the StringBuilder used to build the error message - */ - private void appendQuerySnippet(Parser parser, StringBuilder builder) - { - TokenStream tokenStream = parser.getTokenStream(); - int index = tokenStream.index(); - int size = tokenStream.size(); - - Token from = tokenStream.get(getSnippetFirstTokenIndex(index)); - Token to = tokenStream.get(getSnippetLastTokenIndex(index, size)); - Token offending = tokenStream.get(getOffendingTokenIndex(index, size)); - - appendSnippet(builder, from, to, offending); - } - - /** - * Appends a query snippet to the message to help the user to understand the problem. - * - * @param from the first token to include within the snippet - * @param to the last token to include within the snippet - * @param offending the token which is responsible for the error - */ - final void appendSnippet(StringBuilder builder, - Token from, - Token to, - Token offending) - { - if (!areTokensValid(from, to, offending)) - return; - - String[] lines = query.split("\n"); - - boolean includeQueryStart = (from.getLine() == 1) && (from.getCharPositionInLine() == 0); - boolean includeQueryEnd = (to.getLine() == lines.length) - && (getLastCharPositionInLine(to) == lines[lines.length - 1].length()); - - builder.append(" ("); - - if (!includeQueryStart) - builder.append("..."); - - String toLine = lines[lineIndex(to)]; - int toEnd = getLastCharPositionInLine(to); - lines[lineIndex(to)] = toEnd >= toLine.length() ? toLine : toLine.substring(0, toEnd); - lines[lineIndex(offending)] = highlightToken(lines[lineIndex(offending)], offending); - lines[lineIndex(from)] = lines[lineIndex(from)].substring(from.getCharPositionInLine()); - - for (int i = lineIndex(from), m = lineIndex(to); i <= m; i++) - builder.append(lines[i]); - - if (!includeQueryEnd) - builder.append("..."); - - builder.append(")"); - } - - /** - * Checks if the specified tokens are valid. - * - * @param tokens the tokens to check - * @return true if all the specified tokens are valid ones, - * false otherwise. - */ - private static boolean areTokensValid(Token... tokens) - { - for (Token token : tokens) - { - if (!isTokenValid(token)) - return false; - } - return true; - } - - /** - * Checks that the specified token is valid. - * - * @param token the token to check - * @return true if it is considered as valid, false otherwise. - */ - private static boolean isTokenValid(Token token) - { - return token.getLine() > 0 && token.getCharPositionInLine() >= 0; - } - - /** - * Returns the index of the offending token.

In the case where the offending token is an extra - * character at the end, the index returned by the TokenStream might be after the last token. - * To avoid that problem we need to make sure that the index of the offending token is a valid index - * (one for which a token exist).

- * - * @param index the token index returned by the TokenStream - * @param size the TokenStream size - * @return the valid index of the offending token - */ - private static int getOffendingTokenIndex(int index, int size) - { - return Math.min(index, size - 1); - } - - /** - * Puts the specified token within square brackets. - * - * @param line the line containing the token - * @param token the token to put within square brackets - */ - private static String highlightToken(String line, Token token) - { - String newLine = insertChar(line, getLastCharPositionInLine(token), ']'); - return insertChar(newLine, token.getCharPositionInLine(), '['); - } - - /** - * Returns the index of the last character relative to the beginning of the line 0..n-1 - * - * @param token the token - * @return the index of the last character relative to the beginning of the line 0..n-1 - */ - private static int getLastCharPositionInLine(Token token) - { - return token.getCharPositionInLine() + getLength(token); - } - - /** - * Return the token length. - * - * @param token the token - * @return the token length - */ - private static int getLength(Token token) - { - return token.getText().length(); - } - - /** - * Inserts a character at a given position within a String. - * - * @param s the String in which the character must be inserted - * @param index the position where the character must be inserted - * @param c the character to insert - * @return the modified String - */ - private static String insertChar(String s, int index, char c) - { - return new StringBuilder().append(s.substring(0, index)) - .append(c) - .append(s.substring(index)) - .toString(); - } - - /** - * Returns the index of the line number on which this token was matched; index=0..n-1 - * - * @param token the token - * @return the index of the line number on which this token was matched; index=0..n-1 - */ - private static int lineIndex(Token token) - { - return token.getLine() - 1; - } - - /** - * Returns the index of the last token which is part of the snippet. - * - * @param index the index of the token causing the error - * @param size the total number of tokens - * @return the index of the last token which is part of the snippet. - */ - private static int getSnippetLastTokenIndex(int index, int size) - { - return Math.min(size - 1, index + LAST_TOKEN_OFFSET); - } - - /** - * Returns the index of the first token which is part of the snippet. - * - * @param index the index of the token causing the error - * @return the index of the first token which is part of the snippet. - */ - private static int getSnippetFirstTokenIndex(int index) - { - return Math.max(0, index - FIRST_TOKEN_OFFSET); - } -#endif -}; - -} diff --git a/scylla/cql3/error_listener.hh b/scylla/cql3/error_listener.hh deleted file mode 100644 index 7cff980..0000000 --- a/scylla/cql3/error_listener.hh +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "seastarx.hh" -#include -#include - -namespace cql3 { - -/** - * Listener used to collect the syntax errors emitted by the Lexer and Parser. - */ -template -class error_listener { -public: - - /** - * Invoked when a syntax error occurs. - * - * @param recognizer the parser or lexer that emitted the error - * @param tokenNames the token names - * @param e the exception - */ - virtual void syntax_error(RecognizerType& recognizer, ANTLR_UINT8** token_names, ExceptionBaseType* ex) = 0; - - /** - * Invoked when a syntax error with a specified message occurs. - * - * @param recognizer the parser or lexer that emitted the error - * @param errorMsg the error message - */ - virtual void syntax_error(RecognizerType& recognizer, const sstring& error_msg) = 0; -}; - -} diff --git a/scylla/cql3/functions/abstract_function.hh b/scylla/cql3/functions/abstract_function.hh deleted file mode 100644 index 751b573..0000000 --- a/scylla/cql3/functions/abstract_function.hh +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2014 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "types.hh" -#include -#include -#include - -namespace cql3 { - -namespace functions { - -/** - * Base class for our native/hardcoded functions. - */ -class abstract_function : public virtual function { -protected: - function_name _name; - std::vector _arg_types; - data_type _return_type; - - abstract_function(function_name name, std::vector arg_types, data_type return_type) - : _name(std::move(name)), _arg_types(std::move(arg_types)), _return_type(std::move(return_type)) { - } - -public: - virtual const function_name& name() const override { - return _name; - } - - virtual const std::vector& arg_types() const override { - return _arg_types; - } - - virtual data_type return_type() const { - return _return_type; - } - - bool operator==(const abstract_function& x) const { - return _name == x._name - && _arg_types == x._arg_types - && _return_type == x._return_type; - } - - virtual bool uses_function(const sstring& ks_name, const sstring& function_name) override { - return _name.keyspace == ks_name && _name.name == function_name; - } - - virtual bool has_reference_to(function& f) override { - return false; - } - - virtual void print(std::ostream& os) const override; -}; - -inline -void -abstract_function::print(std::ostream& os) const { - os << _name << " : ("; - for (size_t i = 0; i < _arg_types.size(); ++i) { - if (i > 0) { - os << ", "; - } - os << _arg_types[i]->name(); // FIXME: asCQL3Type() - } - os << ") -> " << _return_type->name(); // FIXME: asCQL3Type() -} - -} -} - -namespace std { - -template <> -struct hash { - size_t operator()(const cql3::functions::abstract_function& f) const { - using namespace cql3::functions; - size_t v = 0; - boost::hash_combine(v, std::hash()(f.name())); - boost::hash_combine(v, boost::hash_value(f.arg_types())); - // FIXME: type hash - //boost::hash_combine(v, std::hash>()(f.return_type())); - return v; - } -}; - -} diff --git a/scylla/cql3/functions/aggregate_fcts.hh b/scylla/cql3/functions/aggregate_fcts.hh deleted file mode 100644 index 458bb6d..0000000 --- a/scylla/cql3/functions/aggregate_fcts.hh +++ /dev/null @@ -1,293 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2014 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "aggregate_function.hh" -#include "native_aggregate_function.hh" - -namespace cql3 { -namespace functions { - -/** - * Factory methods for aggregate functions. - */ -namespace aggregate_fcts { - -class impl_count_function : public aggregate_function::aggregate { - int64_t _count; -public: - virtual void reset() override { - _count = 0; - } - virtual opt_bytes compute(cql_serialization_format sf) override { - return long_type->decompose(_count); - } - virtual void add_input(cql_serialization_format sf, const std::vector& values) override { - ++_count; - } -}; - - /** - * The function used to count the number of rows of a result set. This function is called when COUNT(*) or COUNT(1) - * is specified. - */ -inline -shared_ptr -make_count_rows_function() { - return make_native_aggregate_function_using("countRows", long_type); -} - -template -class impl_sum_function_for final : public aggregate_function::aggregate { - Type _sum{}; -public: - virtual void reset() override { - _sum = {}; - } - virtual opt_bytes compute(cql_serialization_format sf) override { - return data_type_for()->decompose(_sum); - } - virtual void add_input(cql_serialization_format sf, const std::vector& values) override { - if (!values[0]) { - return; - } - _sum += value_cast(data_type_for()->deserialize(*values[0])); - } -}; - -template -class sum_function_for final : public native_aggregate_function { -public: - sum_function_for() : native_aggregate_function("sum", data_type_for(), { data_type_for() }) {} - virtual std::unique_ptr new_aggregate() override { - return std::make_unique>(); - } -}; - - -template -inline -shared_ptr -make_sum_function() { - return make_shared>(); -} - -template -class impl_avg_function_for final : public aggregate_function::aggregate { - Type _sum{}; - int64_t _count = 0; -public: - virtual void reset() override { - _sum = {}; - _count = 0; - } - virtual opt_bytes compute(cql_serialization_format sf) override { - Type ret = 0; - if (_count) { - ret = _sum / _count; - } - return data_type_for()->decompose(ret); - } - virtual void add_input(cql_serialization_format sf, const std::vector& values) override { - if (!values[0]) { - return; - } - ++_count; - _sum += value_cast(data_type_for()->deserialize(*values[0])); - } -}; - -template -class avg_function_for final : public native_aggregate_function { -public: - avg_function_for() : native_aggregate_function("avg", data_type_for(), { data_type_for() }) {} - virtual std::unique_ptr new_aggregate() override { - return std::make_unique>(); - } -}; - -template -inline -shared_ptr -make_avg_function() { - return make_shared>(); -} - -template -class impl_max_function_for final : public aggregate_function::aggregate { - std::experimental::optional _max{}; -public: - virtual void reset() override { - _max = {}; - } - virtual opt_bytes compute(cql_serialization_format sf) override { - if (!_max) { - return {}; - } - return data_type_for()->decompose(*_max); - } - virtual void add_input(cql_serialization_format sf, const std::vector& values) override { - if (!values[0]) { - return; - } - auto val = value_cast(data_type_for()->deserialize(*values[0])); - if (!_max) { - _max = val; - } else { - _max = std::max(*_max, val); - } - } -}; - -template -class max_function_for final : public native_aggregate_function { -public: - max_function_for() : native_aggregate_function("max", data_type_for(), { data_type_for() }) {} - virtual std::unique_ptr new_aggregate() override { - return std::make_unique>(); - } -}; - - /** - * Creates a MAX function for the specified type. - * - * @param inputType the function input and output type - * @return a MAX function for the specified type. - */ -template -shared_ptr -make_max_function() { - return make_shared>(); -} - -template -class impl_min_function_for final : public aggregate_function::aggregate { - std::experimental::optional _min{}; -public: - virtual void reset() override { - _min = {}; - } - virtual opt_bytes compute(cql_serialization_format sf) override { - if (!_min) { - return {}; - } - return data_type_for()->decompose(*_min); - } - virtual void add_input(cql_serialization_format sf, const std::vector& values) override { - if (!values[0]) { - return; - } - auto val = value_cast(data_type_for()->deserialize(*values[0])); - if (!_min) { - _min = val; - } else { - _min = std::min(*_min, val); - } - } -}; - -template -class min_function_for final : public native_aggregate_function { -public: - min_function_for() : native_aggregate_function("min", data_type_for(), { data_type_for() }) {} - virtual std::unique_ptr new_aggregate() override { - return std::make_unique>(); - } -}; - - - /** - * Creates a MIN function for the specified type. - * - * @param inputType the function input and output type - * @return a MIN function for the specified type. - */ -template -shared_ptr -make_min_function() { - return make_shared>(); -} - - -template -class impl_count_function_for final : public aggregate_function::aggregate { - int64_t _count = 0; -public: - virtual void reset() override { - _count = 0; - } - virtual opt_bytes compute(cql_serialization_format sf) override { - return long_type->decompose(_count); - } - virtual void add_input(cql_serialization_format sf, const std::vector& values) override { - if (!values[0]) { - return; - } - ++_count; - } -}; - -template -class count_function_for final : public native_aggregate_function { -public: - count_function_for() : native_aggregate_function("count", long_type, { data_type_for() }) {} - virtual std::unique_ptr new_aggregate() override { - return std::make_unique>(); - } -}; - - /** - * Creates a COUNT function for the specified type. - * - * @param inputType the function input type - * @return a COUNT function for the specified type. - */ -template -shared_ptr -make_count_function() { - return make_shared>(); -} - -} -} -} - diff --git a/scylla/cql3/functions/aggregate_function.hh b/scylla/cql3/functions/aggregate_function.hh deleted file mode 100644 index 5643138..0000000 --- a/scylla/cql3/functions/aggregate_function.hh +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Modified by ScyllaDB - * - * Copyright (C) 2014 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "function.hh" -#include - -namespace cql3 { -namespace functions { - - -/** - * Performs a calculation on a set of values and return a single value. - */ -class aggregate_function : public virtual function { -public: - class aggregate; - - /** - * Creates a new Aggregate instance. - * - * @return a new Aggregate instance. - */ - virtual std::unique_ptr new_aggregate() = 0; - - /** - * An aggregation operation. - */ - class aggregate { - public: - using opt_bytes = aggregate_function::opt_bytes; - - virtual ~aggregate() {} - - /** - * Adds the specified input to this aggregate. - * - * @param protocol_version native protocol version - * @param values the values to add to the aggregate. - */ - virtual void add_input(cql_serialization_format sf, const std::vector& values) = 0; - - /** - * Computes and returns the aggregate current value. - * - * @param protocol_version native protocol version - * @return the aggregate current value. - */ - virtual opt_bytes compute(cql_serialization_format sf) = 0; - - /** - * Reset this aggregate. - */ - virtual void reset() = 0; - }; -}; - -} -} diff --git a/scylla/cql3/functions/bytes_conversion_fcts.hh b/scylla/cql3/functions/bytes_conversion_fcts.hh deleted file mode 100644 index 90d743e..0000000 --- a/scylla/cql3/functions/bytes_conversion_fcts.hh +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Modified by ScyllaDB - * - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "native_scalar_function.hh" -#include "exceptions/exceptions.hh" -#include "core/print.hh" -#include "cql3/cql3_type.hh" - -namespace cql3 { - -namespace functions { - -// Most of the XAsBlob and blobAsX functions are basically no-op since everything is -// bytes internally. They only "trick" the type system. - -inline -shared_ptr -make_to_blob_function(data_type from_type) { - auto name = from_type->as_cql3_type()->to_string() + "asblob"; - return make_native_scalar_function(name, bytes_type, { from_type }, - [] (cql_serialization_format sf, const std::vector& parameters) { - return parameters[0]; - }); -} - -inline -shared_ptr -make_from_blob_function(data_type to_type) { - sstring name = sstring("blobas") + to_type->as_cql3_type()->to_string(); - return make_native_scalar_function(name, to_type, { bytes_type }, - [name, to_type] (cql_serialization_format sf, const std::vector& parameters) -> bytes_opt { - auto&& val = parameters[0]; - if (!val) { - return val; - } - try { - to_type->validate(*val); - return val; - } catch (marshal_exception& e) { - using namespace exceptions; - throw invalid_request_exception(sprint( - "In call to function %s, value 0x%s is not a valid binary representation for type %s", - name, to_hex(val), to_type->as_cql3_type()->to_string())); - } - }); -} - -inline -shared_ptr -make_varchar_as_blob_fct() { - return make_native_scalar_function("varcharasblob", bytes_type, { utf8_type }, - [] (cql_serialization_format sf, const std::vector& parameters) -> bytes_opt { - return parameters[0]; - }); -} - -inline -shared_ptr -make_blob_as_varchar_fct() { - return make_native_scalar_function("blobasvarchar", utf8_type, { bytes_type }, - [] (cql_serialization_format sf, const std::vector& parameters) -> bytes_opt { - return parameters[0]; - }); -} - -} -} diff --git a/scylla/cql3/functions/function.hh b/scylla/cql3/functions/function.hh deleted file mode 100644 index c84c19f..0000000 --- a/scylla/cql3/functions/function.hh +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2014 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "function_name.hh" -#include "types.hh" -#include -#include - -namespace cql3 { -namespace functions { - -class function { -public: - using opt_bytes = std::experimental::optional; - virtual ~function() {} - virtual const function_name& name() const = 0; - virtual const std::vector& arg_types() const = 0; - virtual data_type return_type() const = 0; - - /** - * Checks whether the function is a pure function (as in doesn't depend on, nor produce side effects) or not. - * - * @return true if the function is a pure function, false otherwise. - */ - virtual bool is_pure() = 0; - - /** - * Checks whether the function is a native/hard coded one or not. - * - * @return true if the function is a native/hard coded one, false otherwise. - */ - virtual bool is_native() = 0; - - /** - * Checks whether the function is an aggregate function or not. - * - * @return true if the function is an aggregate function, false otherwise. - */ - virtual bool is_aggregate() = 0; - - virtual void print(std::ostream& os) const = 0; - virtual bool uses_function(const sstring& ks_name, const sstring& function_name) = 0; - virtual bool has_reference_to(function& f) = 0; - friend class function_call; - friend std::ostream& operator<<(std::ostream& os, const function& f); -}; - -inline -std::ostream& -operator<<(std::ostream& os, const function& f) { - f.print(os); - return os; -} - -} -} diff --git a/scylla/cql3/functions/function_call.hh b/scylla/cql3/functions/function_call.hh deleted file mode 100644 index ffa6963..0000000 --- a/scylla/cql3/functions/function_call.hh +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Modified by ScyllaDB - * - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "function.hh" -#include "scalar_function.hh" -#include "cql3/term.hh" -#include "exceptions/exceptions.hh" - -namespace cql3 { -namespace functions { - -class function_call : public non_terminal { - const shared_ptr _fun; - const std::vector> _terms; -public: - function_call(shared_ptr fun, std::vector> terms) - : _fun(std::move(fun)), _terms(std::move(terms)) { - } - virtual bool uses_function(const sstring& ks_name, const sstring& function_name) const override; - virtual void collect_marker_specification(shared_ptr bound_names) override; - virtual shared_ptr bind(const query_options& options) override; - virtual cql3::raw_value_view bind_and_get(const query_options& options) override; -private: - static bytes_opt execute_internal(cql_serialization_format sf, scalar_function& fun, std::vector params); -public: - virtual bool contains_bind_marker() const override; -private: - static shared_ptr make_terminal(shared_ptr fun, cql3::raw_value result, cql_serialization_format sf); -public: - class raw : public term::raw { - function_name _name; - std::vector> _terms; - public: - raw(function_name name, std::vector> terms) - : _name(std::move(name)), _terms(std::move(terms)) { - } - virtual ::shared_ptr prepare(database& db, const sstring& keyspace, ::shared_ptr receiver) override; - private: - // All parameters must be terminal - static bytes_opt execute(scalar_function& fun, std::vector> parameters); - public: - virtual assignment_testable::test_result test_assignment(database& db, const sstring& keyspace, shared_ptr receiver) override; - virtual sstring to_string() const override; - }; -}; - -} -} diff --git a/scylla/cql3/functions/function_name.hh b/scylla/cql3/functions/function_name.hh deleted file mode 100644 index b802ee0..0000000 --- a/scylla/cql3/functions/function_name.hh +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2014 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "core/sstring.hh" -#include "db/system_keyspace.hh" -#include -#include - -namespace cql3 { - -namespace functions { - -class function_name final { -public: - sstring keyspace; - sstring name; - - static function_name native_function(sstring name) { - return function_name(db::system_keyspace::NAME, name); - } - - function_name() = default; // for ANTLR - function_name(sstring keyspace, sstring name) - : keyspace(std::move(keyspace)), name(std::move(name)) { - } - - function_name as_native_function() const { - return native_function(name); - } - - bool has_keyspace() const { - return !keyspace.empty(); - } - - bool operator==(const function_name& x) const { - return keyspace == x.keyspace && name == x.name; - } -}; - -inline -std::ostream& operator<<(std::ostream& os, const function_name& fn) { - if (!fn.keyspace.empty()) { - os << fn.keyspace << "."; - } - return os << fn.name; -} - -} -} - -namespace std { - -template <> -struct hash { - size_t operator()(const cql3::functions::function_name& x) const { - return std::hash()(x.keyspace) ^ std::hash()(x.name); - } -}; - -} diff --git a/scylla/cql3/functions/functions.cc b/scylla/cql3/functions/functions.cc deleted file mode 100644 index 5dfab91..0000000 --- a/scylla/cql3/functions/functions.cc +++ /dev/null @@ -1,478 +0,0 @@ -/* - * Copyright (C) 2014 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "functions.hh" -#include "function_call.hh" -#include "token_fct.hh" -#include "cql3/maps.hh" -#include "cql3/sets.hh" -#include "cql3/lists.hh" -#include "cql3/constants.hh" - -namespace cql3 { -namespace functions { - -thread_local std::unordered_multimap> functions::_declared = init(); - -std::unordered_multimap> -functions::init() { - std::unordered_multimap> ret; - auto declare = [&ret] (shared_ptr f) { ret.emplace(f->name(), f); }; - declare(aggregate_fcts::make_count_rows_function()); - declare(time_uuid_fcts::make_now_fct()); - declare(time_uuid_fcts::make_min_timeuuid_fct()); - declare(time_uuid_fcts::make_max_timeuuid_fct()); - declare(time_uuid_fcts::make_date_of_fct()); - declare(time_uuid_fcts::make_unix_timestamp_of_fcf()); - declare(make_uuid_fct()); - - for (auto&& type : cql3_type::values()) { - // Note: because text and varchar ends up being synonimous, our automatic makeToBlobFunction doesn't work - // for varchar, so we special case it below. We also skip blob for obvious reasons. - if (type == cql3_type::varchar || type == cql3_type::blob) { - continue; - } - // counters are not supported yet - if (type->is_counter()) { - warn(unimplemented::cause::COUNTERS); - continue; - } - - declare(make_to_blob_function(type->get_type())); - declare(make_from_blob_function(type->get_type())); - } - declare(aggregate_fcts::make_count_function()); - declare(aggregate_fcts::make_max_function()); - declare(aggregate_fcts::make_min_function()); - - declare(aggregate_fcts::make_count_function()); - declare(aggregate_fcts::make_max_function()); - declare(aggregate_fcts::make_min_function()); - - declare(aggregate_fcts::make_count_function()); - declare(aggregate_fcts::make_max_function()); - declare(aggregate_fcts::make_min_function()); - - declare(aggregate_fcts::make_count_function()); - declare(aggregate_fcts::make_max_function()); - declare(aggregate_fcts::make_min_function()); - - declare(aggregate_fcts::make_count_function()); - declare(aggregate_fcts::make_max_function()); - declare(aggregate_fcts::make_min_function()); - - //FIXME: - //declare(aggregate_fcts::make_count_function()); - //declare(aggregate_fcts::make_max_function()); - //declare(aggregate_fcts::make_min_function()); - - // FIXME: more count/min/max - - declare(make_varchar_as_blob_fct()); - declare(make_blob_as_varchar_fct()); - declare(aggregate_fcts::make_sum_function()); - declare(aggregate_fcts::make_sum_function()); - declare(aggregate_fcts::make_sum_function()); - declare(aggregate_fcts::make_sum_function()); -#if 0 - declare(AggregateFcts.sumFunctionForDecimal); - declare(AggregateFcts.sumFunctionForVarint); -#endif - declare(aggregate_fcts::make_avg_function()); - declare(aggregate_fcts::make_avg_function()); - declare(aggregate_fcts::make_avg_function()); - declare(aggregate_fcts::make_avg_function()); -#if 0 - declare(AggregateFcts.avgFunctionForVarint); - declare(AggregateFcts.avgFunctionForDecimal); -#endif - - // also needed for smp: -#if 0 - MigrationManager.instance.register(new FunctionsMigrationListener()); -#endif - return ret; -} - -shared_ptr -functions::make_arg_spec(const sstring& receiver_ks, const sstring& receiver_cf, - const function& fun, size_t i) { - auto&& name = boost::lexical_cast(fun.name()); - std::transform(name.begin(), name.end(), name.begin(), ::tolower); - return ::make_shared(receiver_ks, - receiver_cf, - ::make_shared(sprint("arg%d(%s)", i, name), true), - fun.arg_types()[i]); -} - -int -functions::get_overload_count(const function_name& name) { - return _declared.count(name); -} - -shared_ptr -functions::get(database& db, - const sstring& keyspace, - const function_name& name, - const std::vector>& provided_args, - const sstring& receiver_ks, - const sstring& receiver_cf) { - - static const function_name TOKEN_FUNCTION_NAME = function_name::native_function("token"); - - if (name.has_keyspace() - ? name == TOKEN_FUNCTION_NAME - : name.name == TOKEN_FUNCTION_NAME.name) - { - return ::make_shared(db.find_schema(receiver_ks, receiver_cf)); - } - - std::vector> candidates; - auto&& add_declared = [&] (function_name fn) { - auto&& fns = _declared.equal_range(fn); - for (auto i = fns.first; i != fns.second; ++i) { - candidates.push_back(i->second); - } - }; - if (!name.has_keyspace()) { - // add 'SYSTEM' (native) candidates - add_declared(name.as_native_function()); - add_declared(function_name(keyspace, name.name)); - } else { - // function name is fully qualified (keyspace + name) - add_declared(name); - } - - if (candidates.empty()) { - return {}; - } - - // Fast path if there is only one choice - if (candidates.size() == 1) { - auto fun = std::move(candidates[0]); - validate_types(db, keyspace, fun, provided_args, receiver_ks, receiver_cf); - return fun; - } - - std::vector> compatibles; - for (auto&& to_test : candidates) { - auto r = match_arguments(db, keyspace, to_test, provided_args, receiver_ks, receiver_cf); - switch (r) { - case assignment_testable::test_result::EXACT_MATCH: - // We always favor exact matches - return to_test; - case assignment_testable::test_result::WEAKLY_ASSIGNABLE: - compatibles.push_back(std::move(to_test)); - break; - default: - ; - }; - } - - if (compatibles.empty()) { - throw exceptions::invalid_request_exception( - sprint("Invalid call to function %s, none of its type signatures match (known type signatures: %s)", - name, join(", ", candidates))); - } - - if (compatibles.size() > 1) { - throw exceptions::invalid_request_exception( - sprint("Ambiguous call to function %s (can be matched by following signatures: %s): use type casts to disambiguate", - name, join(", ", compatibles))); - } - - return std::move(compatibles[0]); -} - -std::vector> -functions::find(const function_name& name) { - auto range = _declared.equal_range(name); - std::vector> ret; - for (auto i = range.first; i != range.second; ++i) { - ret.push_back(i->second); - } - return ret; -} - -shared_ptr -functions::find(const function_name& name, const std::vector& arg_types) { - assert(name.has_keyspace()); // : "function name not fully qualified"; - for (auto&& f : find(name)) { - if (type_equals(f->arg_types(), arg_types)) { - return f; - } - } - return {}; -} - -// This method and matchArguments are somewhat duplicate, but this method allows us to provide more precise errors in the common -// case where there is no override for a given function. This is thus probably worth the minor code duplication. -void -functions::validate_types(database& db, - const sstring& keyspace, - shared_ptr fun, - const std::vector>& provided_args, - const sstring& receiver_ks, - const sstring& receiver_cf) { - if (provided_args.size() != fun->arg_types().size()) { - throw exceptions::invalid_request_exception( - sprint("Invalid number of arguments in call to function %s: %d required but %d provided", - fun->name(), fun->arg_types().size(), provided_args.size())); - } - - for (size_t i = 0; i < provided_args.size(); ++i) { - auto&& provided = provided_args[i]; - - // If the concrete argument is a bind variables, it can have any type. - // We'll validate the actually provided value at execution time. - if (!provided) { - continue; - } - - auto&& expected = make_arg_spec(receiver_ks, receiver_cf, *fun, i); - if (!is_assignable(provided->test_assignment(db, keyspace, expected))) { - throw exceptions::invalid_request_exception( - sprint("Type error: %s cannot be passed as argument %d of function %s of type %s", - provided, i, fun->name(), expected->type->as_cql3_type())); - } - } -} - -assignment_testable::test_result -functions::match_arguments(database& db, const sstring& keyspace, - shared_ptr fun, - const std::vector>& provided_args, - const sstring& receiver_ks, - const sstring& receiver_cf) { - if (provided_args.size() != fun->arg_types().size()) { - return assignment_testable::test_result::NOT_ASSIGNABLE; - } - - // It's an exact match if all are exact match, but is not assignable as soon as any is non assignable. - auto res = assignment_testable::test_result::EXACT_MATCH; - for (size_t i = 0; i < provided_args.size(); ++i) { - auto&& provided = provided_args[i]; - if (!provided) { - res = assignment_testable::test_result::WEAKLY_ASSIGNABLE; - continue; - } - auto&& expected = make_arg_spec(receiver_ks, receiver_cf, *fun, i); - auto arg_res = provided->test_assignment(db, keyspace, expected); - if (arg_res == assignment_testable::test_result::NOT_ASSIGNABLE) { - return assignment_testable::test_result::NOT_ASSIGNABLE; - } - if (arg_res == assignment_testable::test_result::WEAKLY_ASSIGNABLE) { - res = assignment_testable::test_result::WEAKLY_ASSIGNABLE; - } - } - return res; -} - -bool -functions::type_equals(const std::vector& t1, const std::vector& t2) { -#if 0 - if (t1.size() != t2.size()) - return false; - for (int i = 0; i < t1.size(); i ++) - if (!typeEquals(t1.get(i), t2.get(i))) - return false; - return true; -#endif - abort(); -} - -bool -function_call::uses_function(const sstring& ks_name, const sstring& function_name) const { - return _fun->uses_function(ks_name, function_name); -} - -void -function_call::collect_marker_specification(shared_ptr bound_names) { - for (auto&& t : _terms) { - t->collect_marker_specification(bound_names); - } -} - -shared_ptr -function_call::bind(const query_options& options) { - return make_terminal(_fun, cql3::raw_value::make_value(bind_and_get(options)), options.get_cql_serialization_format()); -} - -cql3::raw_value_view -function_call::bind_and_get(const query_options& options) { - std::vector buffers; - buffers.reserve(_terms.size()); - for (auto&& t : _terms) { - // For now, we don't allow nulls as argument as no existing function needs it and it - // simplify things. - auto val = t->bind_and_get(options); - if (!val) { - throw exceptions::invalid_request_exception(sprint("Invalid null value for argument to %s", *_fun)); - } - buffers.push_back(std::move(to_bytes_opt(val))); - } - auto result = execute_internal(options.get_cql_serialization_format(), *_fun, std::move(buffers)); - return options.make_temporary(cql3::raw_value::make_value(result)); -} - -bytes_opt -function_call::execute_internal(cql_serialization_format sf, scalar_function& fun, std::vector params) { - bytes_opt result = fun.execute(sf, params); - try { - // Check the method didn't lied on it's declared return type - if (result) { - fun.return_type()->validate(*result); - } - return result; - } catch (marshal_exception e) { - throw runtime_exception(sprint("Return of function %s (%s) is not a valid value for its declared return type %s", - fun, to_hex(result), - *fun.return_type()->as_cql3_type() - )); - } -} - -bool -function_call::contains_bind_marker() const { - for (auto&& t : _terms) { - if (t->contains_bind_marker()) { - return true; - } - } - return false; -} - -shared_ptr -function_call::make_terminal(shared_ptr fun, cql3::raw_value result, cql_serialization_format sf) { - if (!dynamic_pointer_cast(fun->return_type())) { - return ::make_shared(std::move(result)); - } - - auto ctype = static_pointer_cast(fun->return_type()); - bytes_view res; - if (result) { - res = *result; - } - if (&ctype->_kind == &collection_type_impl::kind::list) { - return make_shared(lists::value::from_serialized(std::move(res), static_pointer_cast(ctype), sf)); - } else if (&ctype->_kind == &collection_type_impl::kind::set) { - return make_shared(sets::value::from_serialized(std::move(res), static_pointer_cast(ctype), sf)); - } else if (&ctype->_kind == &collection_type_impl::kind::map) { - return make_shared(maps::value::from_serialized(std::move(res), static_pointer_cast(ctype), sf)); - } - abort(); -} - -::shared_ptr -function_call::raw::prepare(database& db, const sstring& keyspace, ::shared_ptr receiver) { - std::vector> args; - args.reserve(_terms.size()); - std::transform(_terms.begin(), _terms.end(), std::back_inserter(args), - [] (auto&& x) -> shared_ptr { - return x; - }); - auto&& fun = functions::functions::get(db, keyspace, _name, args, receiver->ks_name, receiver->cf_name); - if (!fun) { - throw exceptions::invalid_request_exception(sprint("Unknown function %s called", _name)); - } - if (fun->is_aggregate()) { - throw exceptions::invalid_request_exception("Aggregation function are not supported in the where clause"); - } - - // Can't use static_pointer_cast<> because function is a virtual base class of scalar_function - auto&& scalar_fun = dynamic_pointer_cast(fun); - - // Functions.get() will complain if no function "name" type check with the provided arguments. - // We still have to validate that the return type matches however - if (!receiver->type->is_value_compatible_with(*scalar_fun->return_type())) { - throw exceptions::invalid_request_exception(sprint("Type error: cannot assign result of function %s (type %s) to %s (type %s)", - fun->name(), fun->return_type()->as_cql3_type(), - receiver->name, receiver->type->as_cql3_type())); - } - - if (scalar_fun->arg_types().size() != _terms.size()) { - throw exceptions::invalid_request_exception(sprint("Incorrect number of arguments specified for function %s (expected %d, found %d)", - fun->name(), fun->arg_types().size(), _terms.size())); - } - - std::vector> parameters; - parameters.reserve(_terms.size()); - bool all_terminal = true; - for (size_t i = 0; i < _terms.size(); ++i) { - auto&& t = _terms[i]->prepare(db, keyspace, functions::make_arg_spec(receiver->ks_name, receiver->cf_name, *scalar_fun, i)); - if (dynamic_cast(t.get())) { - all_terminal = false; - } - parameters.push_back(t); - } - - // If all parameters are terminal and the function is pure, we can - // evaluate it now, otherwise we'd have to wait execution time - if (all_terminal && scalar_fun->is_pure()) { - return make_terminal(scalar_fun, cql3::raw_value::make_value(execute(*scalar_fun, parameters)), query_options::DEFAULT.get_cql_serialization_format()); - } else { - return ::make_shared(scalar_fun, parameters); - } -} - -bytes_opt -function_call::raw::execute(scalar_function& fun, std::vector> parameters) { - std::vector buffers; - buffers.reserve(parameters.size()); - for (auto&& t : parameters) { - assert(dynamic_cast(t.get())); - auto&& param = static_cast(t.get())->get(query_options::DEFAULT); - buffers.push_back(std::move(to_bytes_opt(param))); - } - - return execute_internal(cql_serialization_format::internal(), fun, buffers); -} - -assignment_testable::test_result -function_call::raw::test_assignment(database& db, const sstring& keyspace, shared_ptr receiver) { - // Note: Functions.get() will return null if the function doesn't exist, or throw is no function matching - // the arguments can be found. We may get one of those if an undefined/wrong function is used as argument - // of another, existing, function. In that case, we return true here because we'll throw a proper exception - // later with a more helpful error message that if we were to return false here. - try { - auto&& fun = functions::get(db, keyspace, _name, _terms, receiver->ks_name, receiver->cf_name); - if (fun && receiver->type->equals(fun->return_type())) { - return assignment_testable::test_result::EXACT_MATCH; - } else if (!fun || receiver->type->is_value_compatible_with(*fun->return_type())) { - return assignment_testable::test_result::WEAKLY_ASSIGNABLE; - } else { - return assignment_testable::test_result::NOT_ASSIGNABLE; - } - } catch (exceptions::invalid_request_exception& e) { - return assignment_testable::test_result::WEAKLY_ASSIGNABLE; - } -} - -sstring -function_call::raw::to_string() const { - return sprint("%s(%s)", _name, join(", ", _terms)); -} - - -} -} - - diff --git a/scylla/cql3/functions/functions.hh b/scylla/cql3/functions/functions.hh deleted file mode 100644 index 236da5b..0000000 --- a/scylla/cql3/functions/functions.hh +++ /dev/null @@ -1,186 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Modified by ScyllaDB - * - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "function.hh" -#include "aggregate_fcts.hh" -#include "time_uuid_fcts.hh" -#include "uuid_fcts.hh" -#include "bytes_conversion_fcts.hh" -#include "aggregate_fcts.hh" -#include "bytes_conversion_fcts.hh" -#include "cql3/assignment_testable.hh" -#include "cql3/cql3_type.hh" -#include "cql3/column_identifier.hh" -#include "to_string.hh" -#include -#include - -namespace cql3 { - -namespace functions { - -#if 0 - // We special case the token function because that's the only function whose argument types actually - // depend on the table on which the function is called. Because it's the sole exception, it's easier - // to handle it as a special case. - private static final FunctionName TOKEN_FUNCTION_NAME = FunctionName.nativeFunction("token"); -#endif - -class functions { - static thread_local std::unordered_multimap> _declared; -private: - static std::unordered_multimap> init(); -public: - static shared_ptr make_arg_spec(const sstring& receiver_ks, const sstring& receiver_cf, - const function& fun, size_t i); - static int get_overload_count(const function_name& name); -public: - static shared_ptr get(database& db, - const sstring& keyspace, - const function_name& name, - const std::vector>& provided_args, - const sstring& receiver_ks, - const sstring& receiver_cf); - template - static shared_ptr get(database& db, - const sstring& keyspace, - const function_name& name, - AssignmentTestablePtrRange&& provided_args, - const sstring& receiver_ks, - const sstring& receiver_cf) { - const std::vector> args(std::begin(provided_args), std::end(provided_args)); - return get(db, keyspace, name, args, receiver_ks, receiver_cf); - } - static std::vector> find(const function_name& name); - static shared_ptr find(const function_name& name, const std::vector& arg_types); -private: - // This method and matchArguments are somewhat duplicate, but this method allows us to provide more precise errors in the common - // case where there is no override for a given function. This is thus probably worth the minor code duplication. - static void validate_types(database& db, - const sstring& keyspace, - shared_ptr fun, - const std::vector>& provided_args, - const sstring& receiver_ks, - const sstring& receiver_cf); - static assignment_testable::test_result match_arguments(database& db, const sstring& keyspace, - shared_ptr fun, - const std::vector>& provided_args, - const sstring& receiver_ks, - const sstring& receiver_cf); -#if 0 - // This is *not* thread safe but is only called in SchemaTables that is synchronized. - public static void addFunction(AbstractFunction fun) - { - // We shouldn't get there unless that function don't exist - assert find(fun.name(), fun.argTypes()) == null; - declare(fun); - } - - // Same remarks than for addFunction - public static void removeFunction(FunctionName name, List> argsTypes) - { - Function old = find(name, argsTypes); - assert old != null && !old.isNative(); - declared.remove(old.name(), old); - } - - // Same remarks than for addFunction - public static void replaceFunction(AbstractFunction fun) - { - removeFunction(fun.name(), fun.argTypes()); - addFunction(fun); - } - - public static List getReferencesTo(Function old) - { - List references = new ArrayList<>(); - for (Function function : declared.values()) - if (function.hasReferenceTo(old)) - references.add(function); - return references; - } - - public static Collection all() - { - return declared.values(); - } - - public static boolean typeEquals(AbstractType t1, AbstractType t2) - { - return t1.asCQL3Type().toString().equals(t2.asCQL3Type().toString()); - } - -#endif - - static bool type_equals(const std::vector& t1, const std::vector& t2); - -#if 0 - private static class FunctionsMigrationListener implements IMigrationListener - { - public void onCreateKeyspace(String ksName) { } - public void onCreateColumnFamily(String ksName, String cfName) { } - public void onCreateUserType(String ksName, String typeName) { } - public void onCreateFunction(String ksName, String functionName) { } - public void onCreateAggregate(String ksName, String aggregateName) { } - - public void onUpdateKeyspace(String ksName) { } - public void onUpdateColumnFamily(String ksName, String cfName) { } - public void onUpdateUserType(String ksName, String typeName) { - for (Function function : all()) - if (function instanceof UDFunction) - ((UDFunction)function).userTypeUpdated(ksName, typeName); - } - public void onUpdateFunction(String ksName, String functionName) { } - public void onUpdateAggregate(String ksName, String aggregateName) { } - - public void onDropKeyspace(String ksName) { } - public void onDropColumnFamily(String ksName, String cfName) { } - public void onDropUserType(String ksName, String typeName) { } - public void onDropFunction(String ksName, String functionName) { } - public void onDropAggregate(String ksName, String aggregateName) { } - } -#endif -}; - -} -} diff --git a/scylla/cql3/functions/native_aggregate_function.hh b/scylla/cql3/functions/native_aggregate_function.hh deleted file mode 100644 index ecff2b7..0000000 --- a/scylla/cql3/functions/native_aggregate_function.hh +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Modified by ScyllaDB - * - * Copyright (C) 2014 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "types.hh" -#include "native_function.hh" -#include "core/shared_ptr.hh" - -namespace cql3 { -namespace functions { - -/** - * Base class for the AggregateFunction native classes. - */ -class native_aggregate_function : public native_function, public aggregate_function { -protected: - native_aggregate_function(sstring name, data_type return_type, - std::vector arg_types) - : native_function(std::move(name), std::move(return_type), std::move(arg_types)) { - } - -public: - virtual bool is_aggregate() override final { - return true; - } -}; - -template -class native_aggregate_function_using : public native_aggregate_function { -public: - native_aggregate_function_using(sstring name, data_type type) - : native_aggregate_function(std::move(name), type, {}) { - } - virtual std::unique_ptr new_aggregate() override { - return std::make_unique(); - } -}; - -template -shared_ptr -make_native_aggregate_function_using(sstring name, data_type type) { - return ::make_shared>(name, type); -} - - -} -} diff --git a/scylla/cql3/functions/native_function.hh b/scylla/cql3/functions/native_function.hh deleted file mode 100644 index be9130c..0000000 --- a/scylla/cql3/functions/native_function.hh +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Modified by ScyllaDB - * - * Copyright (C) 2014 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "abstract_function.hh" - -namespace cql3 { -namespace functions { - -/** - * Base class for our native/hardcoded functions. - */ -class native_function : public abstract_function { -protected: - native_function(sstring name, data_type return_type, std::vector arg_types) - : abstract_function(function_name::native_function(std::move(name)), - std::move(arg_types), std::move(return_type)) { - } - -public: - // Most of our functions are pure, the other ones should override this - virtual bool is_pure() override { - return true; - } - - virtual bool is_native() override { - return true; - } -}; - -} -} diff --git a/scylla/cql3/functions/native_scalar_function.hh b/scylla/cql3/functions/native_scalar_function.hh deleted file mode 100644 index 20446bc..0000000 --- a/scylla/cql3/functions/native_scalar_function.hh +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Modified by ScyllaDB - * - * Copyright (C) 2014 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "native_function.hh" -#include "scalar_function.hh" -#include "core/shared_ptr.hh" - -namespace cql3 { -namespace functions { - -/** - * Base class for the ScalarFunction native classes. - */ -class native_scalar_function : public native_function, public scalar_function { -protected: - native_scalar_function(sstring name, data_type return_type, std::vector args_type) - : native_function(std::move(name), std::move(return_type), std::move(args_type)) { - } - -public: - virtual bool is_aggregate() override { - return false; - } -}; - -template -class native_scalar_function_for : public native_scalar_function { - Func _func; -public: - native_scalar_function_for(sstring name, - data_type return_type, - const std::vector arg_types, - Func&& func) - : native_scalar_function(std::move(name), std::move(return_type), std::move(arg_types)) - , _func(std::forward(func)) { - } - virtual bool is_pure() override { - return Pure; - } - virtual bytes_opt execute(cql_serialization_format sf, const std::vector& parameters) override { - return _func(sf, parameters); - } -}; - -template -shared_ptr -make_native_scalar_function(sstring name, - data_type return_type, - std::vector args_type, - Func&& func) { - return ::make_shared>(std::move(name), - std::move(return_type), - std::move(args_type), - std::forward(func)); -} - -} -} diff --git a/scylla/cql3/functions/scalar_function.hh b/scylla/cql3/functions/scalar_function.hh deleted file mode 100644 index 5184220..0000000 --- a/scylla/cql3/functions/scalar_function.hh +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2014 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "bytes.hh" -#include - -namespace cql3 { - -namespace functions { - -class scalar_function : public virtual function { -public: - /** - * Applies this function to the specified parameter. - * - * @param protocolVersion protocol version used for parameters and return value - * @param parameters the input parameters - * @return the result of applying this function to the parameter - * @throws InvalidRequestException if this function cannot not be applied to the parameter - */ - virtual bytes_opt execute(cql_serialization_format sf, const std::vector& parameters) = 0; -}; - - -} -} diff --git a/scylla/cql3/functions/time_uuid_fcts.hh b/scylla/cql3/functions/time_uuid_fcts.hh deleted file mode 100644 index bf44a69..0000000 --- a/scylla/cql3/functions/time_uuid_fcts.hh +++ /dev/null @@ -1,134 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Modified by ScyllaDB - * - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "native_scalar_function.hh" -#include "utils/UUID_gen.hh" -#include -#include - -namespace cql3 { - -namespace functions { - -namespace time_uuid_fcts { - -inline -shared_ptr -make_now_fct() { - return make_native_scalar_function("now", timeuuid_type, {}, - [] (cql_serialization_format sf, const std::vector& values) -> bytes_opt { - return {to_bytes(utils::UUID_gen::get_time_UUID())}; - }); -} - -inline -shared_ptr -make_min_timeuuid_fct() { - return make_native_scalar_function("mintimeuuid", timeuuid_type, { timestamp_type }, - [] (cql_serialization_format sf, const std::vector& values) -> bytes_opt { - auto& bb = values[0]; - if (!bb) { - return {}; - } - auto ts_obj = timestamp_type->deserialize(*bb); - if (ts_obj.is_null()) { - return {}; - } - auto ts = value_cast(ts_obj); - auto uuid = utils::UUID_gen::min_time_UUID(ts.time_since_epoch().count()); - return {timeuuid_type->decompose(uuid)}; - }); -} - -inline -shared_ptr -make_max_timeuuid_fct() { - return make_native_scalar_function("maxtimeuuid", timeuuid_type, { timestamp_type }, - [] (cql_serialization_format sf, const std::vector& values) -> bytes_opt { - // FIXME: should values be a vector>? - auto& bb = values[0]; - if (!bb) { - return {}; - } - auto ts_obj = timestamp_type->deserialize(*bb); - if (ts_obj.is_null()) { - return {}; - } - auto ts = value_cast(ts_obj); - auto uuid = utils::UUID_gen::max_time_UUID(ts.time_since_epoch().count()); - return {timeuuid_type->decompose(uuid)}; - }); -} - -inline -shared_ptr -make_date_of_fct() { - return make_native_scalar_function("dateof", timestamp_type, { timeuuid_type }, - [] (cql_serialization_format sf, const std::vector& values) -> bytes_opt { - using namespace utils; - auto& bb = values[0]; - if (!bb) { - return {}; - } - auto ts = db_clock::time_point(db_clock::duration(UUID_gen::unix_timestamp(UUID_gen::get_UUID(*bb)))); - return {timestamp_type->decompose(ts)}; - }); -} - -inline -shared_ptr -make_unix_timestamp_of_fcf() { - return make_native_scalar_function("unixtimestampof", long_type, { timeuuid_type }, - [] (cql_serialization_format sf, const std::vector& values) -> bytes_opt { - using namespace utils; - auto& bb = values[0]; - if (!bb) { - return {}; - } - return {long_type->decompose(UUID_gen::unix_timestamp(UUID_gen::get_UUID(*bb)))}; - }); -} - -} -} -} diff --git a/scylla/cql3/functions/token_fct.hh b/scylla/cql3/functions/token_fct.hh deleted file mode 100644 index ec747ac..0000000 --- a/scylla/cql3/functions/token_fct.hh +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Modified by ScyllaDB - * - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "types.hh" -#include "native_scalar_function.hh" -#include "dht/i_partitioner.hh" -#include "utils/UUID.hh" - -namespace cql3 { -namespace functions { - -class token_fct: public native_scalar_function { -private: - schema_ptr _schema; - -public: - token_fct(schema_ptr s) - : native_scalar_function("token", - dht::global_partitioner().get_token_validator(), - s->partition_key_type()->types()) - , _schema(s) { - } - - bytes_opt execute(cql_serialization_format sf, const std::vector& parameters) override { - auto key = partition_key::from_optional_exploded(*_schema, parameters); - auto tok = dht::global_partitioner().get_token(*_schema, key); - warn(unimplemented::cause::VALIDATION); - return dht::global_partitioner().token_to_bytes(tok); - } -}; - -} -} diff --git a/scylla/cql3/functions/uuid_fcts.hh b/scylla/cql3/functions/uuid_fcts.hh deleted file mode 100644 index 008882c..0000000 --- a/scylla/cql3/functions/uuid_fcts.hh +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Modified by ScyllaDB - * - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "types.hh" -#include "native_scalar_function.hh" -#include "utils/UUID.hh" - -namespace cql3 { - -namespace functions { - -inline -shared_ptr -make_uuid_fct() { - return make_native_scalar_function("uuid", uuid_type, {}, - [] (cql_serialization_format sf, const std::vector& parameters) -> bytes_opt { - return {uuid_type->decompose(utils::make_random_uuid())}; - }); -} - -} -} diff --git a/scylla/cql3/index_name.cc b/scylla/cql3/index_name.cc deleted file mode 100644 index c285bc2..0000000 --- a/scylla/cql3/index_name.cc +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "cql3/index_name.hh" - -namespace cql3 { - -void index_name::set_index(const sstring& idx, bool keep_case) -{ - _idx_name = to_internal_name(idx, keep_case); -} - -const sstring& index_name::get_idx() const -{ - return _idx_name; -} - -::shared_ptr index_name::get_cf_name() const -{ - auto cf = ::make_shared(); - if (has_keyspace()) { - cf->set_keyspace(get_keyspace(), true); - } - return cf; -} - -sstring index_name::to_string() const -{ - return keyspace_element_name::to_string() + _idx_name; -} - -} diff --git a/scylla/cql3/index_name.hh b/scylla/cql3/index_name.hh deleted file mode 100644 index f8ddbb5..0000000 --- a/scylla/cql3/index_name.hh +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "cql3/keyspace_element_name.hh" - -#include "core/shared_ptr.hh" -#include "cql3/cf_name.hh" - -namespace cql3 { - -class index_name : public keyspace_element_name { - sstring _idx_name = ""; -public: - void set_index(const sstring& idx, bool keep_case); - - const sstring& get_idx() const; - - ::shared_ptr get_cf_name() const; - - virtual sstring to_string() const override; -}; - -} diff --git a/scylla/cql3/keyspace_element_name.cc b/scylla/cql3/keyspace_element_name.cc deleted file mode 100644 index c219ec8..0000000 --- a/scylla/cql3/keyspace_element_name.cc +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "cql3/keyspace_element_name.hh" - -namespace cql3 { - -void keyspace_element_name::set_keyspace(const sstring& ks, bool keep_case) -{ - _ks_name = to_internal_name(ks, keep_case); -} - -bool keyspace_element_name::has_keyspace() const -{ - return bool(_ks_name); -} - -const sstring& keyspace_element_name::get_keyspace() const -{ - return *_ks_name; -} - -sstring keyspace_element_name::to_internal_name(sstring name, bool keep_case) -{ - if (!keep_case) { - std::transform(name.begin(), name.end(), name.begin(), ::tolower); - } - return name; -} - -sstring keyspace_element_name::to_string() const -{ - return has_keyspace() ? (get_keyspace() + ".") : ""; -} - -} diff --git a/scylla/cql3/keyspace_element_name.hh b/scylla/cql3/keyspace_element_name.hh deleted file mode 100644 index f47df2f..0000000 --- a/scylla/cql3/keyspace_element_name.hh +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "core/sstring.hh" -#include "seastarx.hh" - -#include - -namespace cql3 { - -/** - * Base class for the names of the keyspace elements (e.g. table, index ...) - */ -class keyspace_element_name { - /** - * The keyspace name as stored internally. - */ - std::experimental::optional _ks_name = std::experimental::nullopt; - -public: - /** - * Sets the keyspace. - * - * @param ks the keyspace name - * @param keepCase true if the case must be kept, false otherwise. - */ - void set_keyspace(const sstring& ks, bool keep_case); - - /** - * Checks if the keyspace is specified. - * @return true if the keyspace is specified, false otherwise. - */ - bool has_keyspace() const; - - const sstring& get_keyspace() const; - - virtual sstring to_string() const; - -protected: - /** - * Converts the specified name into the name used internally. - * - * @param name the name - * @param keepCase true if the case must be kept, false otherwise. - * @return the name used internally. - */ - static sstring to_internal_name(sstring name, bool keep_case); -}; - -} diff --git a/scylla/cql3/lists.cc b/scylla/cql3/lists.cc deleted file mode 100644 index e9b8173..0000000 --- a/scylla/cql3/lists.cc +++ /dev/null @@ -1,506 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "lists.hh" -#include "update_parameters.hh" -#include "column_identifier.hh" -#include "cql3_type.hh" -#include "constants.hh" -#include -#include - -namespace cql3 { - -shared_ptr -lists::index_spec_of(shared_ptr column) { - return make_shared(column->ks_name, column->cf_name, - ::make_shared(sprint("idx(%s)", *column->name), true), int32_type); -} - -shared_ptr -lists::value_spec_of(shared_ptr column) { - return make_shared(column->ks_name, column->cf_name, - ::make_shared(sprint("value(%s)", *column->name), true), - dynamic_pointer_cast(column->type)->get_elements_type()); -} - -shared_ptr -lists::uuid_index_spec_of(shared_ptr column) { - return make_shared(column->ks_name, column->cf_name, - ::make_shared(sprint("uuid_idx(%s)", *column->name), true), uuid_type); -} - - -shared_ptr -lists::literal::prepare(database& db, const sstring& keyspace, shared_ptr receiver) { - validate_assignable_to(db, keyspace, receiver); - - auto&& value_spec = value_spec_of(receiver); - std::vector> values; - values.reserve(_elements.size()); - bool all_terminal = true; - for (auto rt : _elements) { - auto&& t = rt->prepare(db, keyspace, value_spec); - - if (t->contains_bind_marker()) { - throw exceptions::invalid_request_exception(sprint("Invalid list literal for %s: bind variables are not supported inside collection literals", *receiver->name)); - } - if (dynamic_pointer_cast(t)) { - all_terminal = false; - } - values.push_back(std::move(t)); - } - delayed_value value(values); - if (all_terminal) { - return value.bind(query_options::DEFAULT); - } else { - return make_shared(std::move(value)); - } -} - -void -lists::literal::validate_assignable_to(database& db, const sstring keyspace, shared_ptr receiver) { - if (!dynamic_pointer_cast(receiver->type)) { - throw exceptions::invalid_request_exception(sprint("Invalid list literal for %s of type %s", - *receiver->name, *receiver->type->as_cql3_type())); - } - auto&& value_spec = value_spec_of(receiver); - for (auto rt : _elements) { - if (!is_assignable(rt->test_assignment(db, keyspace, value_spec))) { - throw exceptions::invalid_request_exception(sprint("Invalid list literal for %s: value %s is not of type %s", - *receiver->name, *rt, *value_spec->type->as_cql3_type())); - } - } -} - -assignment_testable::test_result -lists::literal::test_assignment(database& db, const sstring& keyspace, shared_ptr receiver) { - if (!dynamic_pointer_cast(receiver->type)) { - return assignment_testable::test_result::NOT_ASSIGNABLE; - } - - // If there is no elements, we can't say it's an exact match (an empty list if fundamentally polymorphic). - if (_elements.empty()) { - return assignment_testable::test_result::WEAKLY_ASSIGNABLE; - } - - auto&& value_spec = value_spec_of(receiver); - std::vector> to_test; - to_test.reserve(_elements.size()); - std::copy(_elements.begin(), _elements.end(), std::back_inserter(to_test)); - return assignment_testable::test_all(db, keyspace, value_spec, to_test); -} - -sstring -lists::literal::to_string() const { - return std::to_string(_elements); -} - -lists::value -lists::value::from_serialized(bytes_view v, list_type type, cql_serialization_format sf) { - try { - // Collections have this small hack that validate cannot be called on a serialized object, - // but compose does the validation (so we're fine). - // FIXME: deserializeForNativeProtocol()?! - auto l = value_cast(type->deserialize(v, sf)); - std::vector elements; - elements.reserve(l.size()); - for (auto&& element : l) { - // elements can be null in lists that represent a set of IN values - elements.push_back(element.is_null() ? bytes_opt() : bytes_opt(type->get_elements_type()->decompose(element))); - } - return value(std::move(elements)); - } catch (marshal_exception& e) { - throw exceptions::invalid_request_exception(e.what()); - } -} - -cql3::raw_value -lists::value::get(const query_options& options) { - return cql3::raw_value::make_value(get_with_protocol_version(options.get_cql_serialization_format())); -} - -bytes -lists::value::get_with_protocol_version(cql_serialization_format sf) { - // Can't use boost::indirect_iterator, because optional is not an iterator - auto deref = [] (bytes_opt& x) { return *x; }; - return collection_type_impl::pack( - boost::make_transform_iterator(_elements.begin(), deref), - boost::make_transform_iterator( _elements.end(), deref), - _elements.size(), sf); -} - -bool -lists::value::equals(shared_ptr lt, const value& v) { - if (_elements.size() != v._elements.size()) { - return false; - } - return std::equal(_elements.begin(), _elements.end(), - v._elements.begin(), - [t = lt->get_elements_type()] (const bytes_opt& e1, const bytes_opt& e2) { return t->equal(*e1, *e2); }); -} - -std::vector -lists::value::get_elements() { - return _elements; -} - -sstring -lists::value::to_string() const { - std::ostringstream os; - os << "["; - bool is_first = true; - for (auto&& e : _elements) { - if (!is_first) { - os << ", "; - } - is_first = false; - os << to_hex(e); - } - os << "]"; - return os.str(); -} - -bool -lists::delayed_value::contains_bind_marker() const { - // False since we don't support them in collection - return false; -} - -void -lists::delayed_value::collect_marker_specification(shared_ptr bound_names) { -} - -shared_ptr -lists::delayed_value::bind(const query_options& options) { - std::vector buffers; - buffers.reserve(_elements.size()); - for (auto&& t : _elements) { - auto bo = t->bind_and_get(options); - - if (bo.is_null()) { - throw exceptions::invalid_request_exception("null is not supported inside collections"); - } - if (bo.is_unset_value()) { - return constants::UNSET_VALUE; - } - // We don't support value > 64K because the serialization format encode the length as an unsigned short. - if (bo->size() > std::numeric_limits::max()) { - throw exceptions::invalid_request_exception(sprint("List value is too long. List values are limited to %d bytes but %d bytes value provided", - std::numeric_limits::max(), - bo->size())); - } - - buffers.push_back(std::move(to_bytes(*bo))); - } - return ::make_shared(buffers); -} - -::shared_ptr -lists::marker::bind(const query_options& options) { - const auto& value = options.get_value_at(_bind_index); - auto ltype = static_pointer_cast(_receiver->type); - if (value.is_null()) { - return nullptr; - } else if (value.is_unset_value()) { - return constants::UNSET_VALUE; - } else { - return make_shared(value::from_serialized(*value, std::move(ltype), options.get_cql_serialization_format())); - } -} - -constexpr const db_clock::time_point lists::precision_time::REFERENCE_TIME; -thread_local lists::precision_time lists::precision_time::_last = {db_clock::time_point::max(), 0}; - -lists::precision_time -lists::precision_time::get_next(db_clock::time_point millis) { - // FIXME: and if time goes backwards? - assert(millis <= _last.millis); - auto next = millis < _last.millis - ? precision_time{millis, 9999} - : precision_time{millis, std::max(0, _last.nanos - 1)}; - _last = next; - return next; -} - -void -lists::setter::execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) { - const auto& value = _t->bind(params._options); - if (value == constants::UNSET_VALUE) { - return; - } - if (column.type->is_multi_cell()) { - // delete + append - collection_type_impl::mutation mut; - mut.tomb = params.make_tombstone_just_before(); - auto ctype = static_pointer_cast(column.type); - auto col_mut = ctype->serialize_mutation_form(std::move(mut)); - m.set_cell(prefix, column, std::move(col_mut)); - } - do_append(value, m, prefix, column, params); -} - -bool -lists::setter_by_index::requires_read() { - return true; -} - -void -lists::setter_by_index::collect_marker_specification(shared_ptr bound_names) { - operation::collect_marker_specification(bound_names); - _idx->collect_marker_specification(std::move(bound_names)); -} - -void -lists::setter_by_index::execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) { - // we should not get here for frozen lists - assert(column.type->is_multi_cell()); // "Attempted to set an individual element on a frozen list"; - - auto index = _idx->bind_and_get(params._options); - if (index.is_null()) { - throw exceptions::invalid_request_exception("Invalid null value for list index"); - } - if (index.is_unset_value()) { - throw exceptions::invalid_request_exception("Invalid unset value for list index"); - } - auto value = _t->bind_and_get(params._options); - if (value.is_unset_value()) { - return; - } - - auto idx = net::ntoh(int32_t(*unaligned_cast(index->begin()))); - auto&& existing_list_opt = params.get_prefetched_list(m.key().view(), prefix.view(), column); - if (!existing_list_opt) { - throw exceptions::invalid_request_exception("Attempted to set an element on a list which is null"); - } - auto ltype = dynamic_pointer_cast(column.type); - auto&& existing_list = *existing_list_opt; - // we verified that index is an int32_type - if (idx < 0 || size_t(idx) >= existing_list.size()) { - throw exceptions::invalid_request_exception(sprint("List index %d out of bound, list has size %d", - idx, existing_list.size())); - } - - const bytes& eidx = existing_list[idx].key; - list_type_impl::mutation mut; - mut.cells.reserve(1); - if (!value) { - mut.cells.emplace_back(eidx, params.make_dead_cell()); - } else { - if (value->size() > std::numeric_limits::max()) { - throw exceptions::invalid_request_exception( - sprint("List value is too long. List values are limited to %d bytes but %d bytes value provided", - std::numeric_limits::max(), value->size())); - } - mut.cells.emplace_back(eidx, params.make_cell(*value)); - } - auto smut = ltype->serialize_mutation_form(mut); - m.set_cell(prefix, column, atomic_cell_or_collection::from_collection_mutation(std::move(smut))); -} - -bool -lists::setter_by_uuid::requires_read() { - return false; -} - -void -lists::setter_by_uuid::execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) { - // we should not get here for frozen lists - assert(column.type->is_multi_cell()); // "Attempted to set an individual element on a frozen list"; - - auto index = _idx->bind_and_get(params._options); - auto value = _t->bind_and_get(params._options); - - if (!index) { - throw exceptions::invalid_request_exception("Invalid null value for list index"); - } - - auto ltype = dynamic_pointer_cast(column.type); - - list_type_impl::mutation mut; - mut.cells.reserve(1); - mut.cells.emplace_back(to_bytes(*index), params.make_cell(*value)); - auto smut = ltype->serialize_mutation_form(mut); - m.set_cell(prefix, column, - atomic_cell_or_collection::from_collection_mutation( - std::move(smut))); -} - -void -lists::appender::execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) { - const auto& value = _t->bind(params._options); - if (value == constants::UNSET_VALUE) { - return; - } - assert(column.type->is_multi_cell()); // "Attempted to append to a frozen list"; - do_append(value, m, prefix, column, params); -} - -void -lists::do_append(shared_ptr value, - mutation& m, - const clustering_key_prefix& prefix, - const column_definition& column, - const update_parameters& params) { - auto&& list_value = dynamic_pointer_cast(value); - auto&& ltype = dynamic_pointer_cast(column.type); - if (column.type->is_multi_cell()) { - // If we append null, do nothing. Note that for Setter, we've - // already removed the previous value so we're good here too - if (!value || value == constants::UNSET_VALUE) { - return; - } - - auto&& to_add = list_value->_elements; - collection_type_impl::mutation appended; - appended.cells.reserve(to_add.size()); - for (auto&& e : to_add) { - auto uuid1 = utils::UUID_gen::get_time_UUID_bytes(); - auto uuid = bytes(reinterpret_cast(uuid1.data()), uuid1.size()); - // FIXME: can e be empty? - appended.cells.emplace_back(std::move(uuid), params.make_cell(*e)); - } - m.set_cell(prefix, column, ltype->serialize_mutation_form(appended)); - } else { - // for frozen lists, we're overwriting the whole cell value - if (!value) { - m.set_cell(prefix, column, params.make_dead_cell()); - } else { - auto newv = list_value->get_with_protocol_version(cql_serialization_format::internal()); - m.set_cell(prefix, column, params.make_cell(std::move(newv))); - } - } -} - -void -lists::prepender::execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) { - assert(column.type->is_multi_cell()); // "Attempted to prepend to a frozen list"; - auto&& value = _t->bind(params._options); - if (!value || value == constants::UNSET_VALUE) { - return; - } - - auto&& lvalue = dynamic_pointer_cast(std::move(value)); - assert(lvalue); - auto time = precision_time::REFERENCE_TIME - (db_clock::now() - precision_time::REFERENCE_TIME); - - collection_type_impl::mutation mut; - mut.cells.reserve(lvalue->get_elements().size()); - // We reverse the order of insertion, so that the last element gets the lastest time - // (lists are sorted by time) - for (auto&& v : lvalue->_elements | boost::adaptors::reversed) { - auto&& pt = precision_time::get_next(time); - auto uuid = utils::UUID_gen::get_time_UUID_bytes(pt.millis.time_since_epoch().count(), pt.nanos); - mut.cells.emplace_back(bytes(uuid.data(), uuid.size()), params.make_cell(*v)); - } - // now reverse again, to get the original order back - std::reverse(mut.cells.begin(), mut.cells.end()); - auto&& ltype = static_cast(column.type.get()); - m.set_cell(prefix, column, atomic_cell_or_collection::from_collection_mutation(ltype->serialize_mutation_form(std::move(mut)))); -} - -bool -lists::discarder::requires_read() { - return true; -} - -void -lists::discarder::execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) { - assert(column.type->is_multi_cell()); // "Attempted to delete from a frozen list"; - - auto&& existing_list = params.get_prefetched_list(m.key().view(), prefix.view(), column); - // We want to call bind before possibly returning to reject queries where the value provided is not a list. - auto&& value = _t->bind(params._options); - - auto&& ltype = static_pointer_cast(column.type); - - if (!existing_list) { - return; - } - - auto&& elist = *existing_list; - - if (elist.empty()) { - return; - } - - if (!value || value == constants::UNSET_VALUE) { - return; - } - - auto lvalue = dynamic_pointer_cast(value); - assert(lvalue); - - // Note: below, we will call 'contains' on this toDiscard list for each element of existingList. - // Meaning that if toDiscard is big, converting it to a HashSet might be more efficient. However, - // the read-before-write this operation requires limits its usefulness on big lists, so in practice - // toDiscard will be small and keeping a list will be more efficient. - auto&& to_discard = lvalue->_elements; - collection_type_impl::mutation mnew; - for (auto&& cell : elist) { - auto have_value = [&] (bytes_view value) { - return std::find_if(to_discard.begin(), to_discard.end(), - [ltype, value] (auto&& v) { return ltype->get_elements_type()->equal(*v, value); }) - != to_discard.end(); - }; - if (have_value(cell.value)) { - mnew.cells.emplace_back(cell.key, params.make_dead_cell()); - } - } - auto mnew_ser = ltype->serialize_mutation_form(mnew); - m.set_cell(prefix, column, atomic_cell_or_collection::from_collection_mutation(std::move(mnew_ser))); -} - -bool -lists::discarder_by_index::requires_read() { - return true; -} - -void -lists::discarder_by_index::execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) { - assert(column.type->is_multi_cell()); // "Attempted to delete an item by index from a frozen list"; - auto&& index = _t->bind(params._options); - if (!index) { - throw exceptions::invalid_request_exception("Invalid null value for list index"); - } - if (index == constants::UNSET_VALUE) { - return; - } - - auto ltype = static_pointer_cast(column.type); - auto cvalue = dynamic_pointer_cast(index); - assert(cvalue); - - auto&& existing_list_opt = params.get_prefetched_list(m.key().view(), prefix.view(), column); - int32_t idx = read_simple_exactly(*cvalue->_bytes); - if (!existing_list_opt) { - throw exceptions::invalid_request_exception("Attempted to delete an element from a list which is null"); - } - auto&& existing_list = *existing_list_opt; - if (idx < 0 || size_t(idx) >= existing_list.size()) { - throw exceptions::invalid_request_exception(sprint("List index %d out of bound, list has size %d", idx, existing_list.size())); - } - collection_type_impl::mutation mut; - mut.cells.emplace_back(existing_list[idx].key, params.make_dead_cell()); - m.set_cell(prefix, column, ltype->serialize_mutation_form(mut)); -} - -} diff --git a/scylla/cql3/lists.hh b/scylla/cql3/lists.hh deleted file mode 100644 index ecdf1c1..0000000 --- a/scylla/cql3/lists.hh +++ /dev/null @@ -1,210 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "cql3/abstract_marker.hh" -#include "to_string.hh" -#include "utils/UUID_gen.hh" -#include "operation.hh" - -namespace cql3 { - -/** - * Static helper methods and classes for lists. - */ -class lists { - lists() = delete; -public: - static shared_ptr index_spec_of(shared_ptr column); - static shared_ptr value_spec_of(shared_ptr column); - static shared_ptr uuid_index_spec_of(shared_ptr); - - class literal : public term::raw { - const std::vector> _elements; - public: - explicit literal(std::vector> elements) - : _elements(std::move(elements)) { - } - shared_ptr prepare(database& db, const sstring& keyspace, shared_ptr receiver); - private: - void validate_assignable_to(database& db, const sstring keyspace, shared_ptr receiver); - public: - virtual assignment_testable::test_result test_assignment(database& db, const sstring& keyspace, shared_ptr receiver) override; - virtual sstring to_string() const override; - }; - - class value : public multi_item_terminal, collection_terminal { - public: - std::vector _elements; - public: - explicit value(std::vector elements) - : _elements(std::move(elements)) { - } - static value from_serialized(bytes_view v, list_type type, cql_serialization_format sf); - virtual cql3::raw_value get(const query_options& options) override; - virtual bytes get_with_protocol_version(cql_serialization_format sf) override; - bool equals(shared_ptr lt, const value& v); - virtual std::vector get_elements() override; - virtual sstring to_string() const; - friend class lists; - }; - /** - * Basically similar to a Value, but with some non-pure function (that need - * to be evaluated at execution time) in it. - * - * Note: this would also work for a list with bind markers, but we don't support - * that because 1) it's not excessively useful and 2) we wouldn't have a good - * column name to return in the ColumnSpecification for those markers (not a - * blocker per-se but we don't bother due to 1)). - */ - class delayed_value : public non_terminal { - std::vector> _elements; - public: - explicit delayed_value(std::vector> elements) - : _elements(std::move(elements)) { - } - virtual bool contains_bind_marker() const override; - virtual void collect_marker_specification(shared_ptr bound_names); - virtual shared_ptr bind(const query_options& options) override; - }; - - /** - * A marker for List values and IN relations - */ - class marker : public abstract_marker { - public: - marker(int32_t bind_index, ::shared_ptr receiver) - : abstract_marker{bind_index, std::move(receiver)} - { } - virtual ::shared_ptr bind(const query_options& options) override; - }; - - /* - * For prepend, we need to be able to generate unique but decreasing time - * UUID, which is a bit challenging. To do that, given a time in milliseconds, - * we adds a number representing the 100-nanoseconds precision and make sure - * that within the same millisecond, that number is always decreasing. We - * do rely on the fact that the user will only provide decreasing - * milliseconds timestamp for that purpose. - */ -private: - class precision_time { - public: - // Our reference time (1 jan 2010, 00:00:00) in milliseconds. - static constexpr db_clock::time_point REFERENCE_TIME{std::chrono::milliseconds(1262304000000)}; - private: - static thread_local precision_time _last; - public: - db_clock::time_point millis; - int32_t nanos; - - static precision_time get_next(db_clock::time_point millis); - }; - -public: - class setter : public operation { - public: - setter(const column_definition& column, shared_ptr t) - : operation(column, std::move(t)) { - } - virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) override; - }; - - class setter_by_index : public operation { - protected: - shared_ptr _idx; - public: - setter_by_index(const column_definition& column, shared_ptr idx, shared_ptr t) - : operation(column, std::move(t)), _idx(std::move(idx)) { - } - virtual bool requires_read() override; - virtual void collect_marker_specification(shared_ptr bound_names); - virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) override; - }; - - class setter_by_uuid : public setter_by_index { - public: - setter_by_uuid(const column_definition& column, shared_ptr idx, shared_ptr t) - : setter_by_index(column, std::move(idx), std::move(t)) { - } - virtual bool requires_read() override; - virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) override; - }; - - class appender : public operation { - public: - using operation::operation; - virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) override; - }; - - static void do_append(shared_ptr value, - mutation& m, - const clustering_key_prefix& prefix, - const column_definition& column, - const update_parameters& params); - - class prepender : public operation { - public: - using operation::operation; - virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) override; - }; - - class discarder : public operation { - public: - discarder(const column_definition& column, shared_ptr t) - : operation(column, std::move(t)) { - } - virtual bool requires_read() override; - virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) override; - }; - - class discarder_by_index : public operation { - public: - discarder_by_index(const column_definition& column, shared_ptr idx) - : operation(column, std::move(idx)) { - } - virtual bool requires_read() override; - virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params); - }; -}; - -} diff --git a/scylla/cql3/maps.cc b/scylla/cql3/maps.cc deleted file mode 100644 index 43c8a01..0000000 --- a/scylla/cql3/maps.cc +++ /dev/null @@ -1,372 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "maps.hh" -#include "cql3/abstract_marker.hh" -#include "cql3/term.hh" -#include "operation.hh" -#include "update_parameters.hh" -#include "exceptions/exceptions.hh" -#include "cql3/cql3_type.hh" -#include "constants.hh" - -namespace cql3 { - -shared_ptr -maps::key_spec_of(column_specification& column) { - return ::make_shared(column.ks_name, column.cf_name, - ::make_shared(sprint("key(%s)", *column.name), true), - dynamic_pointer_cast(column.type)->get_keys_type()); -} - -shared_ptr -maps::value_spec_of(column_specification& column) { - return ::make_shared(column.ks_name, column.cf_name, - ::make_shared(sprint("value(%s)", *column.name), true), - dynamic_pointer_cast(column.type)->get_values_type()); -} - -::shared_ptr -maps::literal::prepare(database& db, const sstring& keyspace, ::shared_ptr receiver) { - validate_assignable_to(db, keyspace, *receiver); - - auto key_spec = maps::key_spec_of(*receiver); - auto value_spec = maps::value_spec_of(*receiver); - std::unordered_map, shared_ptr> values; - values.reserve(entries.size()); - bool all_terminal = true; - for (auto&& entry : entries) { - auto k = entry.first->prepare(db, keyspace, key_spec); - auto v = entry.second->prepare(db, keyspace, value_spec); - - if (k->contains_bind_marker() || v->contains_bind_marker()) { - throw exceptions::invalid_request_exception(sprint("Invalid map literal for %s: bind variables are not supported inside collection literals", *receiver->name)); - } - - if (dynamic_pointer_cast(k) || dynamic_pointer_cast(v)) { - all_terminal = false; - } - - values.emplace(k, v); - } - delayed_value value(static_pointer_cast(receiver->type)->get_keys_type()->as_less_comparator(), values); - if (all_terminal) { - return value.bind(query_options::DEFAULT); - } else { - return make_shared(std::move(value)); - } -} - -void -maps::literal::validate_assignable_to(database& db, const sstring& keyspace, column_specification& receiver) { - if (!dynamic_pointer_cast(receiver.type)) { - throw exceptions::invalid_request_exception(sprint("Invalid map literal for %s of type %s", *receiver.name, *receiver.type->as_cql3_type())); - } - auto&& key_spec = maps::key_spec_of(receiver); - auto&& value_spec = maps::value_spec_of(receiver); - for (auto&& entry : entries) { - if (!is_assignable(entry.first->test_assignment(db, keyspace, key_spec))) { - throw exceptions::invalid_request_exception(sprint("Invalid map literal for %s: key %s is not of type %s", *receiver.name, *entry.first, *key_spec->type->as_cql3_type())); - } - if (!is_assignable(entry.second->test_assignment(db, keyspace, value_spec))) { - throw exceptions::invalid_request_exception(sprint("Invalid map literal for %s: value %s is not of type %s", *receiver.name, *entry.second, *value_spec->type->as_cql3_type())); - } - } -} - -assignment_testable::test_result -maps::literal::test_assignment(database& db, const sstring& keyspace, ::shared_ptr receiver) { - if (!dynamic_pointer_cast(receiver->type)) { - return assignment_testable::test_result::NOT_ASSIGNABLE; - } - // If there is no elements, we can't say it's an exact match (an empty map if fundamentally polymorphic). - if (entries.empty()) { - return assignment_testable::test_result::WEAKLY_ASSIGNABLE; - } - auto key_spec = maps::key_spec_of(*receiver); - auto value_spec = maps::value_spec_of(*receiver); - // It's an exact match if all are exact match, but is not assignable as soon as any is non assignable. - auto res = assignment_testable::test_result::EXACT_MATCH; - for (auto entry : entries) { - auto t1 = entry.first->test_assignment(db, keyspace, key_spec); - auto t2 = entry.second->test_assignment(db, keyspace, value_spec); - if (t1 == assignment_testable::test_result::NOT_ASSIGNABLE || t2 == assignment_testable::test_result::NOT_ASSIGNABLE) - return assignment_testable::test_result::NOT_ASSIGNABLE; - if (t1 != assignment_testable::test_result::EXACT_MATCH || t2 != assignment_testable::test_result::EXACT_MATCH) - res = assignment_testable::test_result::WEAKLY_ASSIGNABLE; - } - return res; -} - -sstring -maps::literal::to_string() const { - sstring result = "{"; - for (size_t i = 0; i < entries.size(); i++) { - if (i > 0) { - result += ", "; - } - result += entries[i].first->to_string(); - result += ":"; - result += entries[i].second->to_string(); - } - result += "}"; - return result; -} - -maps::value -maps::value::from_serialized(bytes_view value, map_type type, cql_serialization_format sf) { - try { - // Collections have this small hack that validate cannot be called on a serialized object, - // but compose does the validation (so we're fine). - // FIXME: deserialize_for_native_protocol?! - auto m = value_cast(type->deserialize(value, sf)); - std::map map(type->get_keys_type()->as_less_comparator()); - for (auto&& e : m) { - map.emplace(type->get_keys_type()->decompose(e.first), - type->get_values_type()->decompose(e.second)); - } - return { std::move(map) }; - } catch (marshal_exception& e) { - throw exceptions::invalid_request_exception(e.what()); - } -} - -cql3::raw_value -maps::value::get(const query_options& options) { - return cql3::raw_value::make_value(get_with_protocol_version(options.get_cql_serialization_format())); -} - -bytes -maps::value::get_with_protocol_version(cql_serialization_format sf) { - //FIXME: share code with serialize_partially_deserialized_form - size_t len = collection_value_len(sf) * map.size() * 2 + collection_size_len(sf); - for (auto&& e : map) { - len += e.first.size() + e.second.size(); - } - bytes b(bytes::initialized_later(), len); - bytes::iterator out = b.begin(); - - write_collection_size(out, map.size(), sf); - for (auto&& e : map) { - write_collection_value(out, sf, e.first); - write_collection_value(out, sf, e.second); - } - return b; -} - -bool -maps::value::equals(map_type mt, const value& v) { - return std::equal(map.begin(), map.end(), - v.map.begin(), v.map.end(), - [mt] (auto&& e1, auto&& e2) { - return mt->get_keys_type()->compare(e1.first, e2.first) == 0 - && mt->get_values_type()->compare(e1.second, e2.second) == 0; - }); -} - -sstring -maps::value::to_string() const { - // FIXME: - abort(); -} - -bool -maps::delayed_value::contains_bind_marker() const { - // False since we don't support them in collection - return false; -} - -void -maps::delayed_value::collect_marker_specification(shared_ptr bound_names) { -} - -shared_ptr -maps::delayed_value::bind(const query_options& options) { - std::map buffers(_comparator); - for (auto&& entry : _elements) { - auto&& key = entry.first; - auto&& value = entry.second; - - // We don't support values > 64K because the serialization format encode the length as an unsigned short. - auto key_bytes = key->bind_and_get(options); - if (key_bytes.is_null()) { - throw exceptions::invalid_request_exception("null is not supported inside collections"); - } - if (key_bytes.is_unset_value()) { - throw exceptions::invalid_request_exception("unset value is not supported inside collections"); - } - if (key_bytes->size() > std::numeric_limits::max()) { - throw exceptions::invalid_request_exception(sprint("Map key is too long. Map keys are limited to %d bytes but %d bytes keys provided", - std::numeric_limits::max(), - key_bytes->size())); - } - auto value_bytes = value->bind_and_get(options); - if (value_bytes.is_null()) { - throw exceptions::invalid_request_exception("null is not supported inside collections");\ - } - if (value_bytes.is_unset_value()) { - return constants::UNSET_VALUE; - } - if (value_bytes->size() > std::numeric_limits::max()) { - throw exceptions::invalid_request_exception(sprint("Map value is too long. Map values are limited to %d bytes but %d bytes value provided", - std::numeric_limits::max(), - value_bytes->size())); - } - buffers.emplace(std::move(to_bytes(*key_bytes)), std::move(to_bytes(*value_bytes))); - } - return ::make_shared(std::move(buffers)); -} - -::shared_ptr -maps::marker::bind(const query_options& options) { - auto val = options.get_value_at(_bind_index); - if (val.is_null()) { - return nullptr; - } - if (val.is_unset_value()) { - return constants::UNSET_VALUE; - } - return ::make_shared(maps::value::from_serialized(*val, static_pointer_cast(_receiver->type), - options.get_cql_serialization_format())); -} - -void -maps::setter::execute(mutation& m, const clustering_key_prefix& row_key, const update_parameters& params) { - auto value = _t->bind(params._options); - if (value == constants::UNSET_VALUE) { - return; - } - if (column.type->is_multi_cell()) { - // delete + put - collection_type_impl::mutation mut; - mut.tomb = params.make_tombstone_just_before(); - auto ctype = static_pointer_cast(column.type); - auto col_mut = ctype->serialize_mutation_form(std::move(mut)); - m.set_cell(row_key, column, std::move(col_mut)); - } - do_put(m, row_key, params, value, column); -} - -void -maps::setter_by_key::collect_marker_specification(shared_ptr bound_names) { - operation::collect_marker_specification(bound_names); - _k->collect_marker_specification(bound_names); -} - -void -maps::setter_by_key::execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) { - using exceptions::invalid_request_exception; - assert(column.type->is_multi_cell()); // "Attempted to set a value for a single key on a frozen map"m - auto key = _k->bind_and_get(params._options); - auto value = _t->bind_and_get(params._options); - if (!key) { - throw invalid_request_exception("Invalid null map key"); - } - if (value && value->size() >= std::numeric_limits::max()) { - throw invalid_request_exception( - sprint("Map value is too long. Map values are limited to %d bytes but %d bytes value provided", - std::numeric_limits::max(), - value->size())); - } - auto avalue = value ? params.make_cell(*value) : params.make_dead_cell(); - map_type_impl::mutation update = { {}, { { std::move(to_bytes(*key)), std::move(avalue) } } }; - // should have been verified as map earlier? - auto ctype = static_pointer_cast(column.type); - auto col_mut = ctype->serialize_mutation_form(std::move(update)); - m.set_cell(prefix, column, std::move(col_mut)); -} - -void -maps::putter::execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) { - assert(column.type->is_multi_cell()); // "Attempted to add items to a frozen map"; - auto value = _t->bind(params._options); - if (value != constants::UNSET_VALUE) { - do_put(m, prefix, params, value, column); - } -} - -void -maps::do_put(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params, - shared_ptr value, const column_definition& column) { - auto map_value = dynamic_pointer_cast(value); - if (column.type->is_multi_cell()) { - collection_type_impl::mutation mut; - - if (!value) { - return; - } - - for (auto&& e : map_value->map) { - mut.cells.emplace_back(e.first, params.make_cell(e.second)); - } - auto ctype = static_pointer_cast(column.type); - auto col_mut = ctype->serialize_mutation_form(std::move(mut)); - m.set_cell(prefix, column, std::move(col_mut)); - } else { - // for frozen maps, we're overwriting the whole cell - if (!value) { - m.set_cell(prefix, column, params.make_dead_cell()); - } else { - auto v = map_type_impl::serialize_partially_deserialized_form({map_value->map.begin(), map_value->map.end()}, - cql_serialization_format::internal()); - m.set_cell(prefix, column, params.make_cell(std::move(v))); - } - } -} - -void -maps::discarder_by_key::execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) { - assert(column.type->is_multi_cell()); // "Attempted to delete a single key in a frozen map"; - auto&& key = _t->bind(params._options); - if (!key) { - throw exceptions::invalid_request_exception("Invalid null map key"); - } - if (key == constants::UNSET_VALUE) { - throw exceptions::invalid_request_exception("Invalid unset map key"); - } - collection_type_impl::mutation mut; - mut.cells.emplace_back(*key->get(params._options), params.make_dead_cell()); - auto mtype = static_cast(column.type.get()); - m.set_cell(prefix, column, mtype->serialize_mutation_form(mut)); -} - -} - diff --git a/scylla/cql3/maps.hh b/scylla/cql3/maps.hh deleted file mode 100644 index ad9bc92..0000000 --- a/scylla/cql3/maps.hh +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "cql3/abstract_marker.hh" -#include "cql3/term.hh" -#include "operation.hh" -#include "update_parameters.hh" -#include "constants.hh" - -namespace cql3 { - -/** - * Static helper methods and classes for maps. - */ -class maps { -private: - maps() = delete; -public: - static shared_ptr key_spec_of(column_specification& column); - static shared_ptr value_spec_of(column_specification& column); - - class literal : public term::raw { - public: - const std::vector, ::shared_ptr>> entries; - - literal(const std::vector, ::shared_ptr>>& entries_) - : entries{entries_} - { } - virtual ::shared_ptr prepare(database& db, const sstring& keyspace, ::shared_ptr receiver) override; - private: - void validate_assignable_to(database& db, const sstring& keyspace, column_specification& receiver); - public: - virtual assignment_testable::test_result test_assignment(database& db, const sstring& keyspace, ::shared_ptr receiver) override; - virtual sstring to_string() const override; - }; - - class value : public terminal, collection_terminal { - public: - std::map map; - - value(std::map map) - : map(std::move(map)) { - } - static value from_serialized(bytes_view value, map_type type, cql_serialization_format sf); - virtual cql3::raw_value get(const query_options& options) override; - virtual bytes get_with_protocol_version(cql_serialization_format sf); - bool equals(map_type mt, const value& v); - virtual sstring to_string() const; - }; - - // See Lists.DelayedValue - class delayed_value : public non_terminal { - serialized_compare _comparator; - std::unordered_map, shared_ptr> _elements; - public: - delayed_value(serialized_compare comparator, - std::unordered_map, shared_ptr> elements) - : _comparator(std::move(comparator)), _elements(std::move(elements)) { - } - virtual bool contains_bind_marker() const override; - virtual void collect_marker_specification(shared_ptr bound_names) override; - shared_ptr bind(const query_options& options); - }; - - class marker : public abstract_marker { - public: - marker(int32_t bind_index, ::shared_ptr receiver) - : abstract_marker{bind_index, std::move(receiver)} - { } - virtual ::shared_ptr bind(const query_options& options) override; - }; - - class setter : public operation { - public: - setter(const column_definition& column, shared_ptr t) - : operation(column, std::move(t)) { - } - - virtual void execute(mutation& m, const clustering_key_prefix& row_key, const update_parameters& params) override; - }; - - class setter_by_key : public operation { - const shared_ptr _k; - public: - setter_by_key(const column_definition& column, shared_ptr k, shared_ptr t) - : operation(column, std::move(t)), _k(std::move(k)) { - } - virtual void collect_marker_specification(shared_ptr bound_names) override; - virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) override; - }; - - class putter : public operation { - public: - putter(const column_definition& column, shared_ptr t) - : operation(column, std::move(t)) { - } - virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) override; - }; - - static void do_put(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params, - shared_ptr value, const column_definition& column); - - class discarder_by_key : public operation { - public: - discarder_by_key(const column_definition& column, shared_ptr k) - : operation(column, std::move(k)) { - } - virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) override; - }; -}; - -} diff --git a/scylla/cql3/multi_column_relation.hh b/scylla/cql3/multi_column_relation.hh deleted file mode 100644 index 596b45f..0000000 --- a/scylla/cql3/multi_column_relation.hh +++ /dev/null @@ -1,238 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "cql3/relation.hh" -#include "cql3/term.hh" -#include "cql3/tuples.hh" - -#include "cql3/restrictions/multi_column_restriction.hh" - -namespace cql3 { - -/** - * A relation using the tuple notation, which typically affects multiple columns. - * Examples: - * - SELECT ... WHERE (a, b, c) > (1, 'a', 10) - * - SELECT ... WHERE (a, b, c) IN ((1, 2, 3), (4, 5, 6)) - * - SELECT ... WHERE (a, b) < ? - * - SELECT ... WHERE (a, b) IN ? - */ -class multi_column_relation final : public relation { -private: - std::vector> _entities; - shared_ptr _values_or_marker; - std::vector> _in_values; - shared_ptr _in_marker; - - multi_column_relation(std::vector> entities, - const operator_type& relation_type, shared_ptr values_or_marker, - std::vector> in_values, shared_ptr in_marker) - : relation(relation_type) - , _entities(std::move(entities)) - , _values_or_marker(std::move(values_or_marker)) - , _in_values(std::move(in_values)) - , _in_marker(std::move(in_marker)) - { } -public: - /** - * Creates a multi-column EQ, LT, LTE, GT, or GTE relation. - * For example: "SELECT ... WHERE (a, b) > (0, 1)" - * @param entities the columns on the LHS of the relation - * @param relationType the relation operator - * @param valuesOrMarker a Tuples.Literal instance or a Tuples.Raw marker - * @return a new MultiColumnRelation instance - */ - static shared_ptr create_non_in_relation(std::vector> entities, - const operator_type& relation_type, shared_ptr values_or_marker) { - assert(relation_type != operator_type::IN); - return make_shared(multi_column_relation(std::move(entities), relation_type, std::move(values_or_marker), {}, {})); - } - - /** - * Creates a multi-column IN relation with a list of IN values or markers. - * For example: "SELECT ... WHERE (a, b) IN ((0, 1), (2, 3))" - * @param entities the columns on the LHS of the relation - * @param inValues a list of Tuples.Literal instances or a Tuples.Raw markers - * @return a new MultiColumnRelation instance - */ - static shared_ptr create_in_relation(std::vector> entities, - std::vector> in_values) { - std::vector> values(in_values.size()); - std::copy(in_values.begin(), in_values.end(), values.begin()); - return make_shared(multi_column_relation(std::move(entities), operator_type::IN, {}, std::move(values), {})); - } - - static shared_ptr create_in_relation(std::vector> entities, - std::vector> in_values) { - std::vector> values(in_values.size()); - std::copy(in_values.begin(), in_values.end(), values.begin()); - return make_shared(multi_column_relation(std::move(entities), operator_type::IN, {}, std::move(values), {})); - } - - /** - * Creates a multi-column IN relation with a marker for the IN values. - * For example: "SELECT ... WHERE (a, b) IN ?" - * @param entities the columns on the LHS of the relation - * @param inMarker a single IN marker - * @return a new MultiColumnRelation instance - */ - static shared_ptr create_single_marker_in_relation(std::vector> entities, - shared_ptr in_marker) { - return make_shared(multi_column_relation(std::move(entities), operator_type::IN, {}, {}, std::move(in_marker))); - } - - const std::vector>& get_entities() const { - return _entities; - } - -private: - /** - * For non-IN relations, returns the Tuples.Literal or Tuples.Raw marker for a single tuple. - * @return a Tuples.Literal for non-IN relations or Tuples.Raw marker for a single tuple. - */ - shared_ptr get_value() { - return _relation_type == operator_type::IN ? _in_marker : _values_or_marker; - } -public: - virtual bool is_multi_column() const override { return true; } - -protected: - virtual shared_ptr new_EQ_restriction(database& db, schema_ptr schema, - shared_ptr bound_names) override { - auto rs = receivers(db, schema); - std::vector<::shared_ptr> col_specs(rs.size()); - std::transform(rs.begin(), rs.end(), col_specs.begin(), [] (auto cs) { - return cs->column_specification; - }); - auto t = to_term(col_specs, get_value(), db, schema->ks_name(), bound_names); - return ::make_shared(schema, rs, t); - } - - virtual shared_ptr new_IN_restriction(database& db, schema_ptr schema, - shared_ptr bound_names) override { - auto rs = receivers(db, schema); - std::vector<::shared_ptr> col_specs(rs.size()); - std::transform(rs.begin(), rs.end(), col_specs.begin(), [] (auto cs) { - return cs->column_specification; - }); - if (_in_marker) { - auto t = to_term(col_specs, get_value(), db, schema->ks_name(), bound_names); - auto as_abstract_marker = static_pointer_cast(t); - return ::make_shared(schema, rs, as_abstract_marker); - } else { - std::vector<::shared_ptr> raws(_in_values.size()); - std::copy(_in_values.begin(), _in_values.end(), raws.begin()); - auto ts = to_terms(col_specs, raws, db, schema->ks_name(), bound_names); - return ::make_shared(schema, rs, ts); - } - } - - virtual shared_ptr new_slice_restriction(database& db, schema_ptr schema, - shared_ptr bound_names, - statements::bound bound, bool inclusive) override { - auto rs = receivers(db, schema); - std::vector<::shared_ptr> col_specs(rs.size()); - std::transform(rs.begin(), rs.end(), col_specs.begin(), [] (auto cs) { - return cs->column_specification; - }); - auto t = to_term(col_specs, get_value(), db, schema->ks_name(), bound_names); - return ::make_shared(schema, rs, bound, inclusive, t); - } - - virtual shared_ptr new_contains_restriction(database& db, schema_ptr schema, - shared_ptr bound_names, bool is_key) override { - throw exceptions::invalid_request_exception(sprint("%s cannot be used for Multi-column relations", get_operator())); - } - - virtual ::shared_ptr maybe_rename_identifier(const column_identifier::raw& from, column_identifier::raw to) override { - auto new_entities = boost::copy_range(_entities | boost::adaptors::transformed([&] (auto&& entity) { - return *entity == from ? ::make_shared(to) : entity; - })); - return ::make_shared(multi_column_relation(std::move(new_entities), _relation_type, _values_or_marker, _in_values, _in_marker)); - } - - virtual shared_ptr to_term(const std::vector>& receivers, - ::shared_ptr raw, database& db, const sstring& keyspace, - ::shared_ptr bound_names) override { - auto as_multi_column_raw = dynamic_pointer_cast(raw); - auto t = as_multi_column_raw->prepare(db, keyspace, receivers); - t->collect_marker_specification(bound_names); - return t; - } - - std::vector receivers(database& db, schema_ptr schema) { - using namespace statements::request_validations; - - int previous_position = -1; - std::vector names; - for (auto&& raw : get_entities()) { - const auto& def = to_column_definition(schema, raw); - check_true(def.is_clustering_key(), "Multi-column relations can only be applied to clustering columns but was applied to: %s", def.name_as_text()); - check_false(std::count(names.begin(), names.end(), &def), "Column \"%s\" appeared twice in a relation: %s", def.name_as_text(), to_string()); - - // FIXME: the following restriction should be removed (CASSANDRA-8613) - if (def.position() != unsigned(previous_position + 1)) { - check_false(previous_position == -1, "Clustering columns may not be skipped in multi-column relations. " - "They should appear in the PRIMARY KEY order. Got %s", to_string()); - throw exceptions::invalid_request_exception(sprint("Clustering columns must appear in the PRIMARY KEY order in multi-column relations: %s", to_string())); - } - names.emplace_back(&def); - previous_position = def.position(); - } - return names; - } - - virtual sstring to_string() const override { - sstring str = tuples::tuple_to_string(_entities); - if (is_IN()) { - str += " IN "; - str += !_in_marker ? "?" : tuples::tuple_to_string(_in_values); - return str; - } - str += sstring(" ") + _relation_type.to_string() + " "; - str += _values_or_marker->to_string(); - return str; - } -}; - -} diff --git a/scylla/cql3/operation.cc b/scylla/cql3/operation.cc deleted file mode 100644 index 66067a3..0000000 --- a/scylla/cql3/operation.cc +++ /dev/null @@ -1,301 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ -#include - -#include "operation.hh" -#include "operation_impl.hh" -#include "maps.hh" -#include "sets.hh" -#include "lists.hh" - -namespace cql3 { - - -shared_ptr -operation::set_element::prepare(database& db, const sstring& keyspace, const column_definition& receiver) { - using exceptions::invalid_request_exception; - auto rtype = dynamic_pointer_cast(receiver.type); - if (!rtype) { - throw invalid_request_exception(sprint("Invalid operation (%s) for non collection column %s", receiver, receiver.name())); - } else if (!rtype->is_multi_cell()) { - throw invalid_request_exception(sprint("Invalid operation (%s) for frozen collection column %s", receiver, receiver.name())); - } - - if (&rtype->_kind == &collection_type_impl::kind::list) { - auto&& lval = _value->prepare(db, keyspace, lists::value_spec_of(receiver.column_specification)); - if (_by_uuid) { - auto&& idx = _selector->prepare(db, keyspace, lists::uuid_index_spec_of(receiver.column_specification)); - return make_shared(receiver, idx, lval); - } else { - auto&& idx = _selector->prepare(db, keyspace, lists::index_spec_of(receiver.column_specification)); - return make_shared(receiver, idx, lval); - } - } else if (&rtype->_kind == &collection_type_impl::kind::set) { - throw invalid_request_exception(sprint("Invalid operation (%s) for set column %s", receiver, receiver.name())); - } else if (&rtype->_kind == &collection_type_impl::kind::map) { - auto key = _selector->prepare(db, keyspace, maps::key_spec_of(*receiver.column_specification)); - auto mval = _value->prepare(db, keyspace, maps::value_spec_of(*receiver.column_specification)); - return make_shared(receiver, key, mval); - } - abort(); -} - -bool -operation::set_element::is_compatible_with(shared_ptr other) { - // TODO: we could check that the other operation is not setting the same element - // too (but since the index/key set may be a bind variables we can't always do it at this point) - return !dynamic_pointer_cast(std::move(other)); -} - -shared_ptr -operation::addition::prepare(database& db, const sstring& keyspace, const column_definition& receiver) { - auto v = _value->prepare(db, keyspace, receiver.column_specification); - - auto ctype = dynamic_pointer_cast(receiver.type); - if (!ctype) { - if (!receiver.is_counter()) { - throw exceptions::invalid_request_exception(sprint("Invalid operation (%s) for non counter column %s", receiver, receiver.name())); - } - return make_shared(receiver, v); - } else if (!ctype->is_multi_cell()) { - throw exceptions::invalid_request_exception(sprint("Invalid operation (%s) for frozen collection column %s", receiver, receiver.name())); - } - - if (&ctype->_kind == &collection_type_impl::kind::list) { - return make_shared(receiver, v); - } else if (&ctype->_kind == &collection_type_impl::kind::set) { - return make_shared(receiver, v); - } else if (&ctype->_kind == &collection_type_impl::kind::map) { - return make_shared(receiver, v); - } else { - abort(); - } -} - -bool -operation::addition::is_compatible_with(shared_ptr other) { - return !dynamic_pointer_cast(other); -} - -shared_ptr -operation::subtraction::prepare(database& db, const sstring& keyspace, const column_definition& receiver) { - auto ctype = dynamic_pointer_cast(receiver.type); - if (!ctype) { - if (!receiver.is_counter()) { - throw exceptions::invalid_request_exception(sprint("Invalid operation (%s) for non counter column %s", receiver, receiver.name())); - } - auto v = _value->prepare(db, keyspace, receiver.column_specification); - return make_shared(receiver, v); - } - if (!ctype->is_multi_cell()) { - throw exceptions::invalid_request_exception( - sprint("Invalid operation (%s) for frozen collection column %s", receiver, receiver.name())); - } - - if (&ctype->_kind == &collection_type_impl::kind::list) { - return make_shared(receiver, _value->prepare(db, keyspace, receiver.column_specification)); - } else if (&ctype->_kind == &collection_type_impl::kind::set) { - return make_shared(receiver, _value->prepare(db, keyspace, receiver.column_specification)); - } else if (&ctype->_kind == &collection_type_impl::kind::map) { - auto&& mtype = dynamic_pointer_cast(ctype); - // The value for a map subtraction is actually a set - auto&& vr = make_shared( - receiver.column_specification->ks_name, - receiver.column_specification->cf_name, - receiver.column_specification->name, - set_type_impl::get_instance(mtype->get_keys_type(), false)); - return make_shared(receiver, _value->prepare(db, keyspace, std::move(vr))); - } - abort(); -} - -bool -operation::subtraction::is_compatible_with(shared_ptr other) { - return !dynamic_pointer_cast(other); -} - -shared_ptr -operation::prepend::prepare(database& db, const sstring& keyspace, const column_definition& receiver) { - auto v = _value->prepare(db, keyspace, receiver.column_specification); - - if (!dynamic_cast(receiver.type.get())) { - throw exceptions::invalid_request_exception(sprint("Invalid operation (%s) for non list column %s", receiver, receiver.name())); - } else if (!receiver.type->is_multi_cell()) { - throw exceptions::invalid_request_exception(sprint("Invalid operation (%s) for frozen list column %s", receiver, receiver.name())); - } - - return make_shared(receiver, std::move(v)); -} - -bool -operation::prepend::is_compatible_with(shared_ptr other) { - return !dynamic_pointer_cast(other); -} - - -::shared_ptr -operation::set_value::prepare(database& db, const sstring& keyspace, const column_definition& receiver) { - auto v = _value->prepare(db, keyspace, receiver.column_specification); - - if (receiver.type->is_counter()) { - throw exceptions::invalid_request_exception(sprint("Cannot set the value of counter column %s (counters can only be incremented/decremented, not set)", receiver.name_as_text())); - } - - if (!receiver.type->is_collection()) { - return ::make_shared(receiver, v); - } - - auto& k = static_pointer_cast(receiver.type)->_kind; - if (&k == &collection_type_impl::kind::list) { - return make_shared(receiver, v); - } else if (&k == &collection_type_impl::kind::set) { - return make_shared(receiver, v); - } else if (&k == &collection_type_impl::kind::map) { - return make_shared(receiver, v); - } else { - abort(); - } -} - -::shared_ptr -operation::set_counter_value_from_tuple_list::prepare(database& db, const sstring& keyspace, const column_definition& receiver) { - static thread_local const data_type counter_tuple_type = tuple_type_impl::get_instance({int32_type, uuid_type, long_type, long_type}); - static thread_local const data_type counter_tuple_list_type = list_type_impl::get_instance(counter_tuple_type, true); - - if (!receiver.type->is_counter()) { - throw exceptions::invalid_request_exception(sprint("Column %s is not a counter", receiver.name_as_text())); - } - - // We need to fake a column of list> to prepare the value term - auto & os = receiver.column_specification; - auto spec = make_shared(os->ks_name, os->cf_name, os->name, counter_tuple_list_type); - auto v = _value->prepare(db, keyspace, spec); - - // Will not be used elsewhere, so make it local. - class counter_setter : public operation { - public: - using operation::operation; - - bool is_raw_counter_shard_write() const override { - return true; - } - void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) override { - const auto& value = _t->bind(params._options); - auto&& list_value = dynamic_pointer_cast(value); - - if (!list_value) { - throw std::invalid_argument("Invalid input data to counter set"); - } - - counter_id last(utils::UUID(0, 0)); - counter_cell_builder ccb(list_value->_elements.size()); - for (auto& bo : list_value->_elements) { - // lexical etc cast fails should be enough type checking here. - auto tuple = value_cast(counter_tuple_type->deserialize(*bo)); - auto shard = value_cast(tuple[0]); - auto id = counter_id(value_cast(tuple[1])); - auto clock = value_cast(tuple[2]); - auto value = value_cast(tuple[3]); - - using namespace std::rel_ops; - - if (id <= last) { - throw marshal_exception( - sprint("invalid counter id order, %s <= %s", - id.to_uuid().to_sstring(), - last.to_uuid().to_sstring())); - } - last = id; - // TODO: maybe allow more than global values to propagate, - // though we don't (yet at least) in sstable::partition so... - switch (shard) { - case 'g': - ccb.add_shard(counter_shard(id, value, clock)); - break; - case 'l': - throw marshal_exception("encountered a local shard in a counter cell"); - case 'r': - throw marshal_exception("encountered remote shards in a counter cell"); - default: - throw marshal_exception(sprint("encountered unknown shard %d in a counter cell", shard)); - } - } - // Note. this is a counter value cell, not an update. - // see counters.cc, we need to detect this. - m.set_cell(prefix, column, ccb.build(params.timestamp())); - } - }; - - return make_shared(receiver, v); -}; - -bool -operation::set_value::is_compatible_with(::shared_ptr other) { - // We don't allow setting multiple time the same column, because 1) - // it's stupid and 2) the result would seem random to the user. - return false; -} - -shared_ptr -operation::element_deletion::affected_column() { - return _id; -} - -shared_ptr -operation::element_deletion::prepare(database& db, const sstring& keyspace, const column_definition& receiver) { - if (!receiver.type->is_collection()) { - throw exceptions::invalid_request_exception(sprint("Invalid deletion operation for non collection column %s", receiver.name())); - } else if (!receiver.type->is_multi_cell()) { - throw exceptions::invalid_request_exception(sprint("Invalid deletion operation for frozen collection column %s", receiver.name())); - } - auto ctype = static_pointer_cast(receiver.type); - if (&ctype->_kind == &collection_type_impl::kind::list) { - auto&& idx = _element->prepare(db, keyspace, lists::index_spec_of(receiver.column_specification)); - return make_shared(receiver, std::move(idx)); - } else if (&ctype->_kind == &collection_type_impl::kind::set) { - auto&& elt = _element->prepare(db, keyspace, sets::value_spec_of(receiver.column_specification)); - return make_shared(receiver, std::move(elt)); - } else if (&ctype->_kind == &collection_type_impl::kind::map) { - auto&& key = _element->prepare(db, keyspace, maps::key_spec_of(*receiver.column_specification)); - return make_shared(receiver, std::move(key)); - } - abort(); -} - -} diff --git a/scylla/cql3/operation.hh b/scylla/cql3/operation.hh deleted file mode 100644 index 7cc3627..0000000 --- a/scylla/cql3/operation.hh +++ /dev/null @@ -1,266 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "core/shared_ptr.hh" -#include "exceptions/exceptions.hh" -#include "database_fwd.hh" -#include "term.hh" -#include "update_parameters.hh" - -#include - -namespace cql3 { - -class update_parameters; - -/** - * An UPDATE or DELETE operation. - * - * For UPDATE this includes: - * - setting a constant - * - counter operations - * - collections operations - * and for DELETE: - * - deleting a column - * - deleting an element of collection column - * - * Fine grained operation are obtained from their raw counterpart (Operation.Raw, which - * correspond to a parsed, non-checked operation) by provided the receiver for the operation. - */ -class operation { -public: - // the column the operation applies to - // We can hold a reference because all operations have life bound to their statements and - // statements pin the schema. - const column_definition& column; - -protected: - // Term involved in the operation. In theory this should not be here since some operation - // may require none of more than one term, but most need 1 so it simplify things a bit. - const ::shared_ptr _t; - -public: - operation(const column_definition& column_, ::shared_ptr t) - : column{column_} - , _t{t} - { } - - virtual ~operation() {} - - atomic_cell make_dead_cell(const update_parameters& params) const { - return params.make_dead_cell(); - } - - atomic_cell make_cell(bytes_view value, const update_parameters& params) const { - return params.make_cell(value); - } - - atomic_cell make_counter_update_cell(int64_t delta, const update_parameters& params) const { - return params.make_counter_update_cell(delta); - } - - virtual bool uses_function(const sstring& ks_name, const sstring& function_name) const { - return _t && _t->uses_function(ks_name, function_name); - } - - virtual bool is_raw_counter_shard_write() const { - return false; - } - - /** - * @return whether the operation requires a read of the previous value to be executed - * (only lists setterByIdx, discard and discardByIdx requires that). - */ - virtual bool requires_read() { - return false; - } - - /** - * Collects the column specification for the bind variables of this operation. - * - * @param bound_names the list of column specification where to collect the - * bind variables of this term in. - */ - virtual void collect_marker_specification(::shared_ptr bound_names) { - if (_t) { - _t->collect_marker_specification(bound_names); - } - } - - /** - * Execute the operation. - */ - virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) = 0; - - /** - * A parsed raw UPDATE operation. - * - * This can be one of: - * - Setting a value: c = v - * - Setting an element of a collection: c[x] = v - * - An addition/subtraction to a variable: c = c +/- v (where v can be a collection literal) - * - An prepend operation: c = v + c - */ - class raw_update { - public: - virtual ~raw_update() {} - - /** - * This method validates the operation (i.e. validate it is well typed) - * based on the specification of the receiver of the operation. - * - * It returns an Operation which can be though as post-preparation well-typed - * Operation. - * - * @param receiver the "column" this operation applies to. Note that - * contrarly to the method of same name in Term.Raw, the receiver should always - * be a true column. - * @return the prepared update operation. - */ - virtual ::shared_ptr prepare(database& db, const sstring& keyspace, const column_definition& receiver) = 0; - - /** - * @return whether this operation can be applied alongside the {@code - * other} update (in the same UPDATE statement for the same column). - */ - virtual bool is_compatible_with(::shared_ptr other) = 0; - }; - - /** - * A parsed raw DELETE operation. - * - * This can be one of: - * - Deleting a column - * - Deleting an element of a collection - */ - class raw_deletion { - public: - ~raw_deletion() {} - - /** - * The name of the column affected by this delete operation. - */ - virtual ::shared_ptr affected_column() = 0; - - /** - * This method validates the operation (i.e. validate it is well typed) - * based on the specification of the column affected by the operation (i.e the - * one returned by affectedColumn()). - * - * It returns an Operation which can be though as post-preparation well-typed - * Operation. - * - * @param receiver the "column" this operation applies to. - * @return the prepared delete operation. - */ - virtual ::shared_ptr prepare(database& db, const sstring& keyspace, const column_definition& receiver) = 0; - }; - - class set_value; - class set_counter_value_from_tuple_list; - - class set_element : public raw_update { - const shared_ptr _selector; - const shared_ptr _value; - const bool _by_uuid; - public: - set_element(shared_ptr selector, shared_ptr value, bool by_uuid = false) - : _selector(std::move(selector)), _value(std::move(value)), _by_uuid(by_uuid) { - } - - virtual shared_ptr prepare(database& db, const sstring& keyspace, const column_definition& receiver); - - virtual bool is_compatible_with(shared_ptr other) override; - }; - - class addition : public raw_update { - const shared_ptr _value; - public: - addition(shared_ptr value) - : _value(value) { - } - - virtual shared_ptr prepare(database& db, const sstring& keyspace, const column_definition& receiver) override; - - virtual bool is_compatible_with(shared_ptr other) override; - }; - - class subtraction : public raw_update { - const shared_ptr _value; - public: - subtraction(shared_ptr value) - : _value(value) { - } - - virtual shared_ptr prepare(database& db, const sstring& keyspace, const column_definition& receiver) override; - - virtual bool is_compatible_with(shared_ptr other) override; - }; - - class prepend : public raw_update { - shared_ptr _value; - public: - prepend(shared_ptr value) - : _value(std::move(value)) { - } - - virtual shared_ptr prepare(database& db, const sstring& keyspace, const column_definition& receiver) override; - - virtual bool is_compatible_with(shared_ptr other) override; - }; - - class column_deletion; - - class element_deletion : public raw_deletion { - shared_ptr _id; - shared_ptr _element; - public: - element_deletion(shared_ptr id, shared_ptr element) - : _id(std::move(id)), _element(std::move(element)) { - } - virtual shared_ptr affected_column() override; - virtual shared_ptr prepare(database& db, const sstring& keyspace, const column_definition& receiver) override; - }; -}; - -} diff --git a/scylla/cql3/operation_impl.hh b/scylla/cql3/operation_impl.hh deleted file mode 100644 index 19de292..0000000 --- a/scylla/cql3/operation_impl.hh +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "operation.hh" -#include "constants.hh" -#include "maps.hh" -#include "sets.hh" -#include "lists.hh" - -namespace cql3 { - -class operation::set_value : public raw_update { -protected: - ::shared_ptr _value; -public: - set_value(::shared_ptr value) : _value(std::move(value)) {} - - virtual ::shared_ptr prepare(database& db, const sstring& keyspace, const column_definition& receiver) override; - -#if 0 - protected String toString(ColumnSpecification column) - { - return String.format("%s = %s", column, value); - } -#endif - - virtual bool is_compatible_with(::shared_ptr other) override; -}; - -class operation::set_counter_value_from_tuple_list : public set_value { -public: - using set_value::set_value; - ::shared_ptr prepare(database& db, const sstring& keyspace, const column_definition& receiver) override; -}; - -class operation::column_deletion : public raw_deletion { -private: - ::shared_ptr _id; -public: - column_deletion(::shared_ptr id) - : _id(std::move(id)) - { } - - virtual ::shared_ptr affected_column() override { - return _id; - } - - ::shared_ptr prepare(database& db, const sstring& keyspace, const column_definition& receiver) { - // No validation, deleting a column is always "well typed" - return ::make_shared(receiver); - } -}; - -} diff --git a/scylla/cql3/operator.cc b/scylla/cql3/operator.cc deleted file mode 100644 index ddec3b6..0000000 --- a/scylla/cql3/operator.cc +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "operator.hh" - -namespace cql3 { - -const operator_type operator_type::EQ(0, operator_type::EQ, "="); -const operator_type operator_type::LT(4, operator_type::GT, "<"); -const operator_type operator_type::LTE(3, operator_type::GTE, "<="); -const operator_type operator_type::GTE(1, operator_type::LTE, ">="); -const operator_type operator_type::GT(2, operator_type::LT, ">"); -const operator_type operator_type::IN(7, operator_type::IN, "IN"); -const operator_type operator_type::CONTAINS(5, operator_type::CONTAINS, "CONTAINS"); -const operator_type operator_type::CONTAINS_KEY(6, operator_type::CONTAINS_KEY, "CONTAINS_KEY"); -const operator_type operator_type::NEQ(8, operator_type::NEQ, "!="); -const operator_type operator_type::IS_NOT(9, operator_type::IS_NOT, "IS NOT"); - -} diff --git a/scylla/cql3/operator.hh b/scylla/cql3/operator.hh deleted file mode 100644 index d9af7ec..0000000 --- a/scylla/cql3/operator.hh +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include -#include -#include "core/sstring.hh" -#include "seastarx.hh" - -namespace cql3 { - -class operator_type { -public: - static const operator_type EQ; - static const operator_type LT; - static const operator_type LTE; - static const operator_type GTE; - static const operator_type GT; - static const operator_type IN; - static const operator_type CONTAINS; - static const operator_type CONTAINS_KEY; - static const operator_type NEQ; - static const operator_type IS_NOT; -private: - int32_t _b; - const operator_type& _reverse; - sstring _text; -private: - operator_type(int32_t b, const operator_type& reverse, sstring text) - : _b(b) - , _reverse(reverse) - , _text(std::move(text)) - {} -public: - const operator_type& reverse() const { return _reverse; } - sstring to_string() const { return _text; } - bool operator==(const operator_type& other) const { return this == &other; } - bool operator!=(const operator_type& other) const { return this != &other; } -#if 0 - - /** - * Write the serialized version of this Operator to the specified output. - * - * @param output the output to write to - * @throws IOException if an I/O problem occurs while writing to the specified output - */ - public void writeTo(DataOutput output) throws IOException - { - output.writeInt(b); - } - - /** - * Deserializes a Operator instance from the specified input. - * - * @param input the input to read from - * @return the Operator instance deserialized - * @throws IOException if a problem occurs while deserializing the Type instance. - */ - public static Operator readFrom(DataInput input) throws IOException - { - int b = input.readInt(); - for (Operator operator : values()) - if (operator.b == b) - return operator; - - throw new IOException(String.format("Cannot resolve Relation.Type from binary representation: %s", b)); - } -#endif -}; - -static inline -std::ostream& operator<<(std::ostream& out, const operator_type& op) { - return out << op.to_string(); -} - -} diff --git a/scylla/cql3/query_options.cc b/scylla/cql3/query_options.cc deleted file mode 100644 index 4977e46..0000000 --- a/scylla/cql3/query_options.cc +++ /dev/null @@ -1,210 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "query_options.hh" -#include "version.hh" - -namespace cql3 { - -thread_local const query_options::specific_options query_options::specific_options::DEFAULT{-1, {}, {}, api::missing_timestamp}; - -thread_local query_options query_options::DEFAULT{db::consistency_level::ONE, std::experimental::nullopt, - std::vector(), false, query_options::specific_options::DEFAULT, cql_serialization_format::latest()}; - -query_options::query_options(db::consistency_level consistency, - std::experimental::optional> names, - std::vector values, - bool skip_metadata, - specific_options options, - cql_serialization_format sf) - : _consistency(consistency) - , _names(std::move(names)) - , _values(std::move(values)) - , _value_views() - , _skip_metadata(skip_metadata) - , _options(std::move(options)) - , _cql_serialization_format(sf) -{ - fill_value_views(); -} - -query_options::query_options(db::consistency_level consistency, - std::experimental::optional> names, - std::vector value_views, - bool skip_metadata, - specific_options options, - cql_serialization_format sf) - : _consistency(consistency) - , _names(std::move(names)) - , _values() - , _value_views(std::move(value_views)) - , _skip_metadata(skip_metadata) - , _options(std::move(options)) - , _cql_serialization_format(sf) -{ -} - -query_options::query_options(db::consistency_level cl, std::vector values) - : query_options( - cl, - {}, - std::move(values), - false, - query_options::specific_options::DEFAULT, - cql_serialization_format::latest() - ) -{ -} - -query_options::query_options(std::vector values) - : query_options( - db::consistency_level::ONE, std::move(values)) -{} - -db::consistency_level query_options::get_consistency() const -{ - return _consistency; -} - -cql3::raw_value_view query_options::get_value_at(size_t idx) const -{ - return _value_views.at(idx); -} - -size_t query_options::get_values_count() const -{ - return _value_views.size(); -} - -cql3::raw_value_view query_options::make_temporary(cql3::raw_value value) const -{ - if (value) { - _temporaries.emplace_back(value->begin(), value->end()); - auto& temporary = _temporaries.back(); - return cql3::raw_value_view::make_value(bytes_view{temporary.data(), temporary.size()}); - } - return cql3::raw_value_view::make_null(); -} - -bool query_options::skip_metadata() const -{ - return _skip_metadata; -} - -int32_t query_options::get_page_size() const -{ - return get_specific_options().page_size; -} - -::shared_ptr query_options::get_paging_state() const -{ - return get_specific_options().state; -} - -std::experimental::optional query_options::get_serial_consistency() const -{ - return get_specific_options().serial_consistency; -} - -api::timestamp_type query_options::get_timestamp(service::query_state& state) const -{ - auto tstamp = get_specific_options().timestamp; - return tstamp != api::missing_timestamp ? tstamp : state.get_timestamp(); -} - -int query_options::get_protocol_version() const -{ - return _cql_serialization_format.protocol_version(); -} - -cql_serialization_format query_options::get_cql_serialization_format() const -{ - return _cql_serialization_format; -} - -const query_options::specific_options& query_options::get_specific_options() const -{ - return _options; -} - -const query_options& query_options::for_statement(size_t i) const -{ - if (!_batch_options) { - // No per-statement options supplied, so use the "global" options - return *this; - } - return _batch_options->at(i); -} - -void query_options::prepare(const std::vector<::shared_ptr>& specs) -{ - if (!_names) { - return; - } - - auto& names = *_names; - std::vector ordered_values; - ordered_values.reserve(specs.size()); - for (auto&& spec : specs) { - auto& spec_name = spec->name->text(); - for (size_t j = 0; j < names.size(); j++) { - if (names[j] == spec_name) { - ordered_values.emplace_back(_values[j]); - break; - } - } - } - _values = std::move(ordered_values); - fill_value_views(); -} - -void query_options::fill_value_views() -{ - for (auto&& value : _values) { - if (value) { - _value_views.emplace_back(cql3::raw_value_view::make_value(bytes_view{*value})); - } else { - _value_views.emplace_back(cql3::raw_value_view::make_null()); - } - } -} - -} diff --git a/scylla/cql3/query_options.hh b/scylla/cql3/query_options.hh deleted file mode 100644 index 828df57..0000000 --- a/scylla/cql3/query_options.hh +++ /dev/null @@ -1,188 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2014 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include -#include "timestamp.hh" -#include "bytes.hh" -#include "db/consistency_level.hh" -#include "service/query_state.hh" -#include "service/pager/paging_state.hh" -#include "cql3/column_specification.hh" -#include "cql3/column_identifier.hh" -#include "cql3/values.hh" -#include "cql_serialization_format.hh" - -namespace cql3 { - -/** - * Options for a query. - */ -class query_options { -public: - // Options that are likely to not be present in most queries - struct specific_options final { - static thread_local const specific_options DEFAULT; - - const int32_t page_size; - const ::shared_ptr state; - const std::experimental::optional serial_consistency; - const api::timestamp_type timestamp; - }; -private: - const db::consistency_level _consistency; - const std::experimental::optional> _names; - std::vector _values; - std::vector _value_views; - mutable std::vector> _temporaries; - const bool _skip_metadata; - const specific_options _options; - cql_serialization_format _cql_serialization_format; - std::experimental::optional> _batch_options; - -private: - /** - * @brief Batch query_options constructor. - * - * Requirements: - * - @tparam OneMutationDataRange has a begin() and end() iterators. - * - The values of @tparam OneMutationDataRange are of either raw_value_view or raw_value types. - * - * @param o Base query_options object. query_options objects for each statement in the batch will derive the values from it. - * @param values_ranges a vector of values ranges for each statement in the batch. - */ - template - GCC6_CONCEPT( requires requires (OneMutationDataRange range) { - std::begin(range); - std::end(range); - } && ( requires (OneMutationDataRange range) { { *range.begin() } -> raw_value_view; } || - requires (OneMutationDataRange range) { { *range.begin() } -> raw_value; } ) ) - explicit query_options(query_options&& o, std::vector values_ranges); - -public: - query_options(query_options&&) = default; - query_options(const query_options&) = delete; - - explicit query_options(db::consistency_level consistency, - std::experimental::optional> names, - std::vector values, - bool skip_metadata, - specific_options options, - cql_serialization_format sf); - explicit query_options(db::consistency_level consistency, - std::experimental::optional> names, - std::vector value_views, - bool skip_metadata, - specific_options options, - cql_serialization_format sf); - - /** - * @brief Batch query_options factory. - * - * Requirements: - * - @tparam OneMutationDataRange has a begin() and end() iterators. - * - The values of @tparam OneMutationDataRange are of either raw_value_view or raw_value types. - * - * @param o Base query_options object. query_options objects for each statement in the batch will derive the values from it. - * @param values_ranges a vector of values ranges for each statement in the batch. - */ - template - GCC6_CONCEPT( requires requires (OneMutationDataRange range) { - std::begin(range); - std::end(range); - } && ( requires (OneMutationDataRange range) { { *range.begin() } -> raw_value_view; } || - requires (OneMutationDataRange range) { { *range.begin() } -> raw_value; } ) ) - static query_options make_batch_options(query_options&& o, std::vector values_ranges) { - return query_options(std::move(o), std::move(values_ranges)); - } - - // It can't be const because of prepare() - static thread_local query_options DEFAULT; - - // forInternalUse - explicit query_options(std::vector values); - explicit query_options(db::consistency_level, std::vector values); - - db::consistency_level get_consistency() const; - cql3::raw_value_view get_value_at(size_t idx) const; - cql3::raw_value_view make_temporary(cql3::raw_value value) const; - size_t get_values_count() const; - bool skip_metadata() const; - /** The pageSize for this query. Will be <= 0 if not relevant for the query. */ - int32_t get_page_size() const; - /** The paging state for this query, or null if not relevant. */ - ::shared_ptr get_paging_state() const; - /** Serial consistency for conditional updates. */ - std::experimental::optional get_serial_consistency() const; - api::timestamp_type get_timestamp(service::query_state& state) const; - /** - * The protocol version for the query. Will be 3 if the object don't come from - * a native protocol request (i.e. it's been allocated locally or by CQL-over-thrift). - */ - int get_protocol_version() const; - cql_serialization_format get_cql_serialization_format() const; - // Mainly for the sake of BatchQueryOptions - const specific_options& get_specific_options() const; - const query_options& for_statement(size_t i) const; - void prepare(const std::vector<::shared_ptr>& specs); -private: - void fill_value_views(); -}; - -template -GCC6_CONCEPT( requires requires (OneMutationDataRange range) { - std::begin(range); - std::end(range); -} && ( requires (OneMutationDataRange range) { { *range.begin() } -> raw_value_view; } || - requires (OneMutationDataRange range) { { *range.begin() } -> raw_value; } ) ) -query_options::query_options(query_options&& o, std::vector values_ranges) - : query_options(std::move(o)) -{ - std::vector tmp; - tmp.reserve(values_ranges.size()); - std::transform(values_ranges.begin(), values_ranges.end(), std::back_inserter(tmp), [this](auto& values_range) { - return query_options(_consistency, {}, std::move(values_range), _skip_metadata, _options, _cql_serialization_format); - }); - _batch_options = std::move(tmp); -} - -} diff --git a/scylla/cql3/query_options_fwd.hh b/scylla/cql3/query_options_fwd.hh deleted file mode 100644 index 5e63a76..0000000 --- a/scylla/cql3/query_options_fwd.hh +++ /dev/null @@ -1,29 +0,0 @@ - -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -namespace cql3 { - -class query_options; - -} diff --git a/scylla/cql3/query_processor.cc b/scylla/cql3/query_processor.cc deleted file mode 100644 index e15f5cf..0000000 --- a/scylla/cql3/query_processor.cc +++ /dev/null @@ -1,540 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ -#include - -#include "cql3/query_processor.hh" -#include "cql3/CqlParser.hpp" -#include "cql3/error_collector.hh" -#include "cql3/statements/batch_statement.hh" -#include "cql3/util.hh" - -#include "transport/messages/result_message.hh" - -#define CRYPTOPP_ENABLE_NAMESPACE_WEAK 1 -#include - -namespace cql3 { - -using namespace statements; -using namespace cql_transport::messages; - -logging::logger log("query_processor"); - -distributed _the_query_processor; - -const sstring query_processor::CQL_VERSION = "3.3.1"; - -class query_processor::internal_state { - service::query_state _qs; -public: - internal_state() - : _qs(service::client_state{service::client_state::internal_tag()}) - { } - operator service::query_state&() { - return _qs; - } - operator const service::query_state&() const { - return _qs; - } - operator service::client_state&() { - return _qs.get_client_state(); - } - operator const service::client_state&() const { - return _qs.get_client_state(); - } - api::timestamp_type next_timestamp() { - return _qs.get_client_state().get_timestamp(); - } -}; - -api::timestamp_type query_processor::next_timestamp() { - return _internal_state->next_timestamp(); -} - -query_processor::query_processor(distributed& proxy, - distributed& db) - : _migration_subscriber{std::make_unique(this)} - , _proxy(proxy) - , _db(db) - , _internal_state(new internal_state()) -{ - namespace sm = seastar::metrics; - - _metrics.add_group("query_processor", { - sm::make_derive("statements_prepared", _stats.prepare_invocations, - sm::description("Counts a total number of parsed CQL requests.")), - }); - - _metrics.add_group("cql", { - sm::make_derive("reads", _cql_stats.reads, - sm::description("Counts a total number of CQL read requests.")), - - sm::make_derive("inserts", _cql_stats.inserts, - sm::description("Counts a total number of CQL INSERT requests.")), - - sm::make_derive("updates", _cql_stats.updates, - sm::description("Counts a total number of CQL UPDATE requests.")), - - sm::make_derive("deletes", _cql_stats.deletes, - sm::description("Counts a total number of CQL DELETE requests.")), - - sm::make_derive("batches", _cql_stats.batches, - sm::description("Counts a total number of CQL BATCH requests.")), - - sm::make_derive("statements_in_batches", _cql_stats.statements_in_batches, - sm::description("Counts a total number of sub-statements in CQL BATCH requests.")), - - sm::make_derive("batches_pure_logged", _cql_stats.batches_pure_logged, - sm::description("Counts a total number of LOGGED batches that were executed as LOGGED batches.")), - - sm::make_derive("batches_pure_unlogged", _cql_stats.batches_pure_unlogged, - sm::description("Counts a total number of UNLOGGED batches that were executed as UNLOGGED batches.")), - - sm::make_derive("batches_unlogged_from_logged", _cql_stats.batches_unlogged_from_logged, - sm::description("Counts a total number of LOGGED batches that were executed as UNLOGGED batches.")), - }); - - service::get_local_migration_manager().register_listener(_migration_subscriber.get()); -} - -query_processor::~query_processor() -{} - -future<> query_processor::stop() -{ - service::get_local_migration_manager().unregister_listener(_migration_subscriber.get()); - return make_ready_future<>(); -} - -future<::shared_ptr> -query_processor::process(const sstring_view& query_string, service::query_state& query_state, query_options& options) -{ - log.trace("process: \"{}\"", query_string); - tracing::trace(query_state.get_trace_state(), "Parsing a statement"); - auto p = get_statement(query_string, query_state.get_client_state()); - options.prepare(p->bound_names); - auto cql_statement = p->statement; - if (cql_statement->get_bound_terms() != options.get_values_count()) { - throw exceptions::invalid_request_exception("Invalid amount of bind variables"); - } - - warn(unimplemented::cause::METRICS); -#if 0 - if (!queryState.getClientState().isInternal) - metrics.regularStatementsExecuted.inc(); -#endif - tracing::trace(query_state.get_trace_state(), "Processing a statement"); - return process_statement(std::move(cql_statement), query_state, options); -} - -future<::shared_ptr> -query_processor::process_statement(::shared_ptr statement, - service::query_state& query_state, - const query_options& options) -{ -#if 0 - logger.trace("Process {} @CL.{}", statement, options.getConsistency()); -#endif - - return statement->check_access(query_state.get_client_state()).then([this, statement, &query_state, &options]() { - auto& client_state = query_state.get_client_state(); - - statement->validate(_proxy, client_state); - - auto fut = make_ready_future<::shared_ptr>(); - if (client_state.is_internal()) { - fut = statement->execute_internal(_proxy, query_state, options); - } else { - fut = statement->execute(_proxy, query_state, options); - } - - return fut.then([statement] (auto msg) { - if (msg) { - return make_ready_future<::shared_ptr>(std::move(msg)); - } - return make_ready_future<::shared_ptr>( - ::make_shared()); - }); - }); -} - -future<::shared_ptr> -query_processor::prepare(const std::experimental::string_view& query_string, service::query_state& query_state) -{ - auto& client_state = query_state.get_client_state(); - return prepare(query_string, client_state, client_state.is_thrift()); -} - -future<::shared_ptr> -query_processor::prepare(const std::experimental::string_view& query_string, - const service::client_state& client_state, - bool for_thrift) -{ - auto existing = get_stored_prepared_statement(query_string, client_state.get_raw_keyspace(), for_thrift); - if (existing) { - return make_ready_future<::shared_ptr>(existing); - } - - return futurize<::shared_ptr>::apply([this, &query_string, &client_state, for_thrift] { - auto prepared = get_statement(query_string, client_state); - auto bound_terms = prepared->statement->get_bound_terms(); - if (bound_terms > std::numeric_limits::max()) { - throw exceptions::invalid_request_exception(sprint("Too many markers(?). %d markers exceed the allowed maximum of %d", bound_terms, std::numeric_limits::max())); - } - assert(bound_terms == prepared->bound_names.size()); - return store_prepared_statement(query_string, client_state.get_raw_keyspace(), std::move(prepared), for_thrift); - }); -} - -::shared_ptr -query_processor::get_stored_prepared_statement(const std::experimental::string_view& query_string, - const sstring& keyspace, - bool for_thrift) -{ - if (for_thrift) { - auto statement_id = compute_thrift_id(query_string, keyspace); - auto it = _thrift_prepared_statements.find(statement_id); - if (it == _thrift_prepared_statements.end()) { - return ::shared_ptr(); - } - return ::make_shared(statement_id, it->second->checked_weak_from_this()); - } else { - auto statement_id = compute_id(query_string, keyspace); - auto it = _prepared_statements.find(statement_id); - if (it == _prepared_statements.end()) { - return ::shared_ptr(); - } - return ::make_shared(statement_id, it->second->checked_weak_from_this()); - } -} - -future<::shared_ptr> -query_processor::store_prepared_statement(const std::experimental::string_view& query_string, - const sstring& keyspace, - std::unique_ptr prepared, - bool for_thrift) -{ -#if 0 - // Concatenate the current keyspace so we don't mix prepared statements between keyspace (#5352). - // (if the keyspace is null, queryString has to have a fully-qualified keyspace so it's fine. - long statementSize = measure(prepared.statement); - // don't execute the statement if it's bigger than the allowed threshold - if (statementSize > MAX_CACHE_PREPARED_MEMORY) - throw new InvalidRequestException(String.format("Prepared statement of size %d bytes is larger than allowed maximum of %d bytes.", - statementSize, - MAX_CACHE_PREPARED_MEMORY)); -#endif - prepared->raw_cql_statement = query_string.data(); - if (for_thrift) { - auto statement_id = compute_thrift_id(query_string, keyspace); - auto msg = ::make_shared(statement_id, prepared->checked_weak_from_this()); - _thrift_prepared_statements.emplace(statement_id, std::move(prepared)); - return make_ready_future<::shared_ptr>(std::move(msg)); - } else { - auto statement_id = compute_id(query_string, keyspace); - auto msg = ::make_shared(statement_id, prepared->checked_weak_from_this()); - _prepared_statements.emplace(statement_id, std::move(prepared)); - return make_ready_future<::shared_ptr>(std::move(msg)); - } -} - -static bytes md5_calculate(const std::experimental::string_view& s) -{ - constexpr size_t size = CryptoPP::Weak1::MD5::DIGESTSIZE; - CryptoPP::Weak::MD5 hash; - unsigned char digest[size]; - hash.CalculateDigest(digest, reinterpret_cast(s.data()), s.size()); - return std::move(bytes{reinterpret_cast(digest), size}); -} - -static sstring hash_target(const std::experimental::string_view& query_string, const sstring& keyspace) { - return keyspace + query_string.to_string(); -} - -bytes query_processor::compute_id(const std::experimental::string_view& query_string, const sstring& keyspace) -{ - return md5_calculate(hash_target(query_string, keyspace)); -} - -int32_t query_processor::compute_thrift_id(const std::experimental::string_view& query_string, const sstring& keyspace) -{ - auto target = hash_target(query_string, keyspace); - uint32_t h = 0; - for (auto&& c : hash_target(query_string, keyspace)) { - h = 31*h + c; - } - return static_cast(h); -} - -std::unique_ptr -query_processor::get_statement(const sstring_view& query, const service::client_state& client_state) -{ -#if 0 - Tracing.trace("Parsing {}", queryStr); -#endif - ::shared_ptr statement = parse_statement(query); - - // Set keyspace for statement that require login - auto cf_stmt = dynamic_pointer_cast(statement); - if (cf_stmt) { - cf_stmt->prepare_keyspace(client_state); - } -#if 0 - Tracing.trace("Preparing statement"); -#endif - ++_stats.prepare_invocations; - return statement->prepare(_db.local(), _cql_stats); -} - -::shared_ptr -query_processor::parse_statement(const sstring_view& query) -{ - try { - auto statement = util::do_with_parser(query, std::mem_fn(&cql3_parser::CqlParser::query)); - if (!statement) { - throw exceptions::syntax_exception("Parsing failed"); - } - return statement; - } catch (const exceptions::recognition_exception& e) { - throw exceptions::syntax_exception(sprint("Invalid or malformed CQL query string: %s", e.what())); - } catch (const exceptions::cassandra_exception& e) { - throw; - } catch (const std::exception& e) { - log.error("The statement: {} could not be parsed: {}", query, e.what()); - throw exceptions::syntax_exception(sprint("Failed parsing statement: [%s] reason: %s", query, e.what())); - } -} - -query_options query_processor::make_internal_options(const statements::prepared_statement::checked_weak_ptr& p, - const std::initializer_list& values, - db::consistency_level cl) -{ - if (p->bound_names.size() != values.size()) { - throw std::invalid_argument(sprint("Invalid number of values. Expecting %d but got %d", p->bound_names.size(), values.size())); - } - auto ni = p->bound_names.begin(); - std::vector bound_values; - for (auto& v : values) { - auto& n = *ni++; - if (v.type() == bytes_type) { - bound_values.push_back(cql3::raw_value::make_value(value_cast(v))); - } else if (v.is_null()) { - bound_values.push_back(cql3::raw_value::make_null()); - } else { - bound_values.push_back(cql3::raw_value::make_value(n->type->decompose(v))); - } - } - return query_options(cl, bound_values); -} - -statements::prepared_statement::checked_weak_ptr query_processor::prepare_internal(const sstring& query_string) -{ - auto& p = _internal_statements[query_string]; - if (p == nullptr) { - auto np = parse_statement(query_string)->prepare(_db.local(), _cql_stats); - np->statement->validate(_proxy, *_internal_state); - p = std::move(np); // inserts it into map - } - return p->checked_weak_from_this(); -} - -future<::shared_ptr> -query_processor::execute_internal(const sstring& query_string, - const std::initializer_list& values) -{ - if (log.is_enabled(logging::log_level::trace)) { - log.trace("execute_internal: \"{}\" ({})", query_string, ::join(", ", values)); - } - return execute_internal(prepare_internal(query_string), values); -} - -future<::shared_ptr> -query_processor::execute_internal(statements::prepared_statement::checked_weak_ptr p, - const std::initializer_list& values) -{ - auto opts = make_internal_options(p, values); - return do_with(std::move(opts), [this, p = std::move(p)](auto& opts) { - return p->statement->execute_internal(_proxy, *_internal_state, opts).then([stmt = p->statement](auto msg) { - return make_ready_future<::shared_ptr>(::make_shared(msg)); - }); - }); -} - -future<::shared_ptr> -query_processor::process(const sstring& query_string, - db::consistency_level cl, - const std::initializer_list& values, - bool cache) -{ - if (cache) { - return process(prepare_internal(query_string), cl, values); - } else { - auto p = parse_statement(query_string)->prepare(_db.local(), _cql_stats); - p->statement->validate(_proxy, *_internal_state); - auto checked_weak_ptr = p->checked_weak_from_this(); - return process(std::move(checked_weak_ptr), cl, values).finally([p = std::move(p)] {}); - } -} - -future<::shared_ptr> -query_processor::process(statements::prepared_statement::checked_weak_ptr p, - db::consistency_level cl, - const std::initializer_list& values) -{ - auto opts = make_internal_options(p, values, cl); - return do_with(std::move(opts), [this, p = std::move(p)](auto & opts) { - return p->statement->execute(_proxy, *_internal_state, opts).then([](auto msg) { - return make_ready_future<::shared_ptr>(::make_shared(msg)); - }); - }); -} - -future<::shared_ptr> -query_processor::process_batch(::shared_ptr batch, - service::query_state& query_state, - query_options& options) -{ - return batch->check_access(query_state.get_client_state()).then([this, &query_state, &options, batch] { - batch->validate(); - batch->validate(_proxy, query_state.get_client_state()); - return batch->execute(_proxy, query_state, options); - }); -} - -query_processor::migration_subscriber::migration_subscriber(query_processor* qp) - : _qp{qp} -{ -} - -void query_processor::migration_subscriber::on_create_keyspace(const sstring& ks_name) -{ -} - -void query_processor::migration_subscriber::on_create_column_family(const sstring& ks_name, const sstring& cf_name) -{ -} - -void query_processor::migration_subscriber::on_create_user_type(const sstring& ks_name, const sstring& type_name) -{ -} - -void query_processor::migration_subscriber::on_create_function(const sstring& ks_name, const sstring& function_name) -{ - log.warn("{} event ignored", __func__); -} - -void query_processor::migration_subscriber::on_create_aggregate(const sstring& ks_name, const sstring& aggregate_name) -{ - log.warn("{} event ignored", __func__); -} - -void query_processor::migration_subscriber::on_create_view(const sstring& ks_name, const sstring& view_name) -{ -} - -void query_processor::migration_subscriber::on_update_keyspace(const sstring& ks_name) -{ -} - -void query_processor::migration_subscriber::on_update_column_family(const sstring& ks_name, const sstring& cf_name, bool columns_changed) -{ - // #1255: Ignoring columns_changed deliberately. - log.info("Column definitions for {}.{} changed, invalidating related prepared statements", ks_name, cf_name); - remove_invalid_prepared_statements(ks_name, cf_name); -} - -void query_processor::migration_subscriber::on_update_user_type(const sstring& ks_name, const sstring& type_name) -{ -} - -void query_processor::migration_subscriber::on_update_function(const sstring& ks_name, const sstring& function_name) -{ -} - -void query_processor::migration_subscriber::on_update_aggregate(const sstring& ks_name, const sstring& aggregate_name) -{ -} - -void query_processor::migration_subscriber::on_update_view(const sstring& ks_name, const sstring& view_name, bool columns_changed) -{ -} - -void query_processor::migration_subscriber::on_drop_keyspace(const sstring& ks_name) -{ - remove_invalid_prepared_statements(ks_name, std::experimental::nullopt); -} - -void query_processor::migration_subscriber::on_drop_column_family(const sstring& ks_name, const sstring& cf_name) -{ - remove_invalid_prepared_statements(ks_name, cf_name); -} - -void query_processor::migration_subscriber::on_drop_user_type(const sstring& ks_name, const sstring& type_name) -{ -} - -void query_processor::migration_subscriber::on_drop_function(const sstring& ks_name, const sstring& function_name) -{ - log.warn("{} event ignored", __func__); -} - -void query_processor::migration_subscriber::on_drop_aggregate(const sstring& ks_name, const sstring& aggregate_name) -{ - log.warn("{} event ignored", __func__); -} - -void query_processor::migration_subscriber::on_drop_view(const sstring& ks_name, const sstring& view_name) -{ -} - -void query_processor::migration_subscriber::remove_invalid_prepared_statements(sstring ks_name, std::experimental::optional cf_name) -{ - _qp->invalidate_prepared_statements([&] (::shared_ptr stmt) { - return this->should_invalidate(ks_name, cf_name, stmt); - }); -} - -bool query_processor::migration_subscriber::should_invalidate(sstring ks_name, std::experimental::optional cf_name, ::shared_ptr statement) -{ - return statement->depends_on_keyspace(ks_name) && (!cf_name || statement->depends_on_column_family(*cf_name)); -} - -} diff --git a/scylla/cql3/query_processor.hh b/scylla/cql3/query_processor.hh deleted file mode 100644 index 0e4ed60..0000000 --- a/scylla/cql3/query_processor.hh +++ /dev/null @@ -1,559 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include -#include -#include - -#include "core/shared_ptr.hh" -#include "exceptions/exceptions.hh" -#include "cql3/query_options.hh" -#include "cql3/statements/raw/parsed_statement.hh" -#include "cql3/statements/raw/cf_statement.hh" -#include "service/migration_manager.hh" -#include "service/query_state.hh" -#include "log.hh" -#include "core/distributed.hh" -#include "statements/prepared_statement.hh" -#include "transport/messages/result_message.hh" -#include "untyped_result_set.hh" - -namespace cql3 { - -namespace statements { -class batch_statement; -} - -class query_processor { -public: - class migration_subscriber; -private: - std::unique_ptr _migration_subscriber; - distributed& _proxy; - distributed& _db; - - struct stats { - uint64_t prepare_invocations = 0; - } _stats; - - cql_stats _cql_stats; - - seastar::metrics::metric_groups _metrics; - - class internal_state; - std::unique_ptr _internal_state; - -public: - query_processor(distributed& proxy, distributed& db); - ~query_processor(); - - static const sstring CQL_VERSION; - - distributed& db() { - return _db; - } - distributed& proxy() { - return _proxy; - } - - cql_stats& get_cql_stats() { - return _cql_stats; - } - -#if 0 - public static final QueryProcessor instance = new QueryProcessor(); -#endif -private: -#if 0 - private static final Logger logger = LoggerFactory.getLogger(QueryProcessor.class); - private static final MemoryMeter meter = new MemoryMeter().withGuessing(MemoryMeter.Guess.FALLBACK_BEST).ignoreKnownSingletons(); - private static final long MAX_CACHE_PREPARED_MEMORY = Runtime.getRuntime().maxMemory() / 256; - - private static EntryWeigher cqlMemoryUsageWeigher = new EntryWeigher() - { - @Override - public int weightOf(MD5Digest key, ParsedStatement.Prepared value) - { - return Ints.checkedCast(measure(key) + measure(value.statement) + measure(value.boundNames)); - } - }; - - private static EntryWeigher thriftMemoryUsageWeigher = new EntryWeigher() - { - @Override - public int weightOf(Integer key, ParsedStatement.Prepared value) - { - return Ints.checkedCast(measure(key) + measure(value.statement) + measure(value.boundNames)); - } - }; -#endif - - std::unordered_map> _prepared_statements; - std::unordered_map> _thrift_prepared_statements; - std::unordered_map> _internal_statements; -#if 0 - - // A map for prepared statements used internally (which we don't want to mix with user statement, in particular we don't - // bother with expiration on those. - private static final ConcurrentMap internalStatements = new ConcurrentHashMap<>(); - - // Direct calls to processStatement do not increment the preparedStatementsExecuted/regularStatementsExecuted - // counters. Callers of processStatement are responsible for correctly notifying metrics - public static final CQLMetrics metrics = new CQLMetrics(); - - private static final AtomicInteger lastMinuteEvictionsCount = new AtomicInteger(0); - - static - { - preparedStatements = new ConcurrentLinkedHashMap.Builder() - .maximumWeightedCapacity(MAX_CACHE_PREPARED_MEMORY) - .weigher(cqlMemoryUsageWeigher) - .listener(new EvictionListener() - { - public void onEviction(MD5Digest md5Digest, ParsedStatement.Prepared prepared) - { - metrics.preparedStatementsEvicted.inc(); - lastMinuteEvictionsCount.incrementAndGet(); - } - }).build(); - - thriftPreparedStatements = new ConcurrentLinkedHashMap.Builder() - .maximumWeightedCapacity(MAX_CACHE_PREPARED_MEMORY) - .weigher(thriftMemoryUsageWeigher) - .listener(new EvictionListener() - { - public void onEviction(Integer integer, ParsedStatement.Prepared prepared) - { - metrics.preparedStatementsEvicted.inc(); - lastMinuteEvictionsCount.incrementAndGet(); - } - }) - .build(); - - ScheduledExecutors.scheduledTasks.scheduleAtFixedRate(new Runnable() - { - public void run() - { - long count = lastMinuteEvictionsCount.getAndSet(0); - if (count > 0) - logger.info("{} prepared statements discarded in the last minute because cache limit reached ({} bytes)", - count, - MAX_CACHE_PREPARED_MEMORY); - } - }, 1, 1, TimeUnit.MINUTES); - } - - public static int preparedStatementsCount() - { - return preparedStatements.size() + thriftPreparedStatements.size(); - } - - // Work around initialization dependency - private static enum InternalStateInstance - { - INSTANCE; - - private final QueryState queryState; - - InternalStateInstance() - { - ClientState state = ClientState.forInternalCalls(); - try - { - state.setKeyspace(SystemKeyspace.NAME); - } - catch (InvalidRequestException e) - { - throw new RuntimeException(); - } - this.queryState = new QueryState(state); - } - } - - private static QueryState internalQueryState() - { - return InternalStateInstance.INSTANCE.queryState; - } - - private QueryProcessor() - { - MigrationManager.instance.register(new MigrationSubscriber()); - } -#endif -public: - statements::prepared_statement::checked_weak_ptr get_prepared(const bytes& id) { - auto it = _prepared_statements.find(id); - if (it == _prepared_statements.end()) { - return statements::prepared_statement::checked_weak_ptr(); - } - return it->second->checked_weak_from_this(); - } - - statements::prepared_statement::checked_weak_ptr get_prepared_for_thrift(int32_t id) { - auto it = _thrift_prepared_statements.find(id); - if (it == _thrift_prepared_statements.end()) { - return statements::prepared_statement::checked_weak_ptr(); - } - return it->second->checked_weak_from_this(); - } -#if 0 - public static void validateKey(ByteBuffer key) throws InvalidRequestException - { - if (key == null || key.remaining() == 0) - { - throw new InvalidRequestException("Key may not be empty"); - } - - // check that key can be handled by FBUtilities.writeShortByteArray - if (key.remaining() > FBUtilities.MAX_UNSIGNED_SHORT) - { - throw new InvalidRequestException("Key length of " + key.remaining() + - " is longer than maximum of " + FBUtilities.MAX_UNSIGNED_SHORT); - } - } - - public static void validateCellNames(Iterable cellNames, CellNameType type) throws InvalidRequestException - { - for (CellName name : cellNames) - validateCellName(name, type); - } - - public static void validateCellName(CellName name, CellNameType type) throws InvalidRequestException - { - validateComposite(name, type); - if (name.isEmpty()) - throw new InvalidRequestException("Invalid empty value for clustering column of COMPACT TABLE"); - } - - public static void validateComposite(Composite name, CType type) throws InvalidRequestException - { - long serializedSize = type.serializer().serializedSize(name, TypeSizes.NATIVE); - if (serializedSize > Cell.MAX_NAME_LENGTH) - throw new InvalidRequestException(String.format("The sum of all clustering columns is too long (%s > %s)", - serializedSize, - Cell.MAX_NAME_LENGTH)); - } -#endif -public: - future<::shared_ptr> process_statement(::shared_ptr statement, - service::query_state& query_state, const query_options& options); - -#if 0 - public static ResultMessage process(String queryString, ConsistencyLevel cl, QueryState queryState) - throws RequestExecutionException, RequestValidationException - { - return instance.process(queryString, queryState, QueryOptions.forInternalCalls(cl, Collections.emptyList())); - } -#endif - - future<::shared_ptr> process(const std::experimental::string_view& query_string, - service::query_state& query_state, query_options& options); - -#if 0 - public static ParsedStatement.Prepared parseStatement(String queryStr, QueryState queryState) throws RequestValidationException - { - return getStatement(queryStr, queryState.getClientState()); - } - - public static UntypedResultSet process(String query, ConsistencyLevel cl) throws RequestExecutionException - { - try - { - ResultMessage result = instance.process(query, QueryState.forInternalCalls(), QueryOptions.forInternalCalls(cl, Collections.emptyList())); - if (result instanceof ResultMessage.Rows) - return UntypedResultSet.create(((ResultMessage.Rows)result).result); - else - return null; - } - catch (RequestValidationException e) - { - throw new RuntimeException(e); - } - } - - private static QueryOptions makeInternalOptions(ParsedStatement.Prepared prepared, Object[] values) - { - if (prepared.boundNames.size() != values.length) - throw new IllegalArgumentException(String.format("Invalid number of values. Expecting %d but got %d", prepared.boundNames.size(), values.length)); - - List boundValues = new ArrayList(values.length); - for (int i = 0; i < values.length; i++) - { - Object value = values[i]; - AbstractType type = prepared.boundNames.get(i).type; - boundValues.add(value instanceof ByteBuffer || value == null ? (ByteBuffer)value : type.decompose(value)); - } - return QueryOptions.forInternalCalls(boundValues); - } - - private static ParsedStatement.Prepared prepareInternal(String query) throws RequestValidationException - { - ParsedStatement.Prepared prepared = internalStatements.get(query); - if (prepared != null) - return prepared; - - // Note: if 2 threads prepare the same query, we'll live so don't bother synchronizing - prepared = parseStatement(query, internalQueryState()); - prepared.statement.validate(internalQueryState().getClientState()); - internalStatements.putIfAbsent(query, prepared); - return prepared; - } -#endif -private: - query_options make_internal_options(const statements::prepared_statement::checked_weak_ptr& p, const std::initializer_list&, db::consistency_level = db::consistency_level::ONE); -public: - future<::shared_ptr> execute_internal( - const sstring& query_string, - const std::initializer_list& = { }); - - statements::prepared_statement::checked_weak_ptr prepare_internal(const sstring& query); - - future<::shared_ptr> execute_internal( - statements::prepared_statement::checked_weak_ptr p, - const std::initializer_list& = { }); - - future<::shared_ptr> process( - const sstring& query_string, - db::consistency_level, const std::initializer_list& = { }, bool cache = false); - future<::shared_ptr> process( - statements::prepared_statement::checked_weak_ptr p, - db::consistency_level, const std::initializer_list& = { }); - - /* - * This function provides a timestamp that is guaranteed to be higher than any timestamp - * previously used in internal queries. - * - * This is useful because the client_state have a built-in mechanism to guarantee monotonicity. - * Bypassing that mechanism by the use of some other clock may yield times in the past, even if the operation - * was done in the future. - */ - api::timestamp_type next_timestamp(); - -#if 0 - public static UntypedResultSet executeInternalWithPaging(String query, int pageSize, Object... values) - { - try - { - ParsedStatement.Prepared prepared = prepareInternal(query); - if (!(prepared.statement instanceof SelectStatement)) - throw new IllegalArgumentException("Only SELECTs can be paged"); - - SelectStatement select = (SelectStatement)prepared.statement; - QueryPager pager = QueryPagers.localPager(select.getPageableCommand(makeInternalOptions(prepared, values))); - return UntypedResultSet.create(select, pager, pageSize); - } - catch (RequestValidationException e) - { - throw new RuntimeException("Error validating query" + e); - } - } - - /** - * Same than executeInternal, but to use for queries we know are only executed once so that the - * created statement object is not cached. - */ - public static UntypedResultSet executeOnceInternal(String query, Object... values) - { - try - { - ParsedStatement.Prepared prepared = parseStatement(query, internalQueryState()); - prepared.statement.validate(internalQueryState().getClientState()); - ResultMessage result = prepared.statement.executeInternal(internalQueryState(), makeInternalOptions(prepared, values)); - if (result instanceof ResultMessage.Rows) - return UntypedResultSet.create(((ResultMessage.Rows)result).result); - else - return null; - } - catch (RequestExecutionException e) - { - throw new RuntimeException(e); - } - catch (RequestValidationException e) - { - throw new RuntimeException("Error validating query " + query, e); - } - } - - public static UntypedResultSet resultify(String query, Row row) - { - return resultify(query, Collections.singletonList(row)); - } - - public static UntypedResultSet resultify(String query, List rows) - { - try - { - SelectStatement ss = (SelectStatement) getStatement(query, null).statement; - ResultSet cqlRows = ss.process(rows); - return UntypedResultSet.create(cqlRows); - } - catch (RequestValidationException e) - { - throw new AssertionError(e); - } - } -#endif - - future<::shared_ptr> - prepare(const std::experimental::string_view& query_string, service::query_state& query_state); - - future<::shared_ptr> - prepare(const std::experimental::string_view& query_string, const service::client_state& client_state, bool for_thrift); - - static bytes compute_id(const std::experimental::string_view& query_string, const sstring& keyspace); - static int32_t compute_thrift_id(const std::experimental::string_view& query_string, const sstring& keyspace); - -private: - ::shared_ptr - get_stored_prepared_statement(const std::experimental::string_view& query_string, const sstring& keyspace, bool for_thrift); - - future<::shared_ptr> - store_prepared_statement(const std::experimental::string_view& query_string, const sstring& keyspace, std::unique_ptr prepared, bool for_thrift); - - // Erases the statements for which filter returns true. - template - void invalidate_prepared_statements(Pred filter) { - static_assert(std::is_same)>>::value, - "bad Pred signature"); - for (auto it = _prepared_statements.begin(); it != _prepared_statements.end(); ) { - if (filter(it->second->statement)) { - it = _prepared_statements.erase(it); - } else { - ++it; - } - } - for (auto it = _thrift_prepared_statements.begin(); it != _thrift_prepared_statements.end(); ) { - if (filter(it->second->statement)) { - it = _thrift_prepared_statements.erase(it); - } else { - ++it; - } - } - } - -#if 0 - public ResultMessage processPrepared(CQLStatement statement, QueryState queryState, QueryOptions options) - throws RequestExecutionException, RequestValidationException - { - List variables = options.getValues(); - // Check to see if there are any bound variables to verify - if (!(variables.isEmpty() && (statement.getBoundTerms() == 0))) - { - if (variables.size() != statement.getBoundTerms()) - throw new InvalidRequestException(String.format("there were %d markers(?) in CQL but %d bound variables", - statement.getBoundTerms(), - variables.size())); - - // at this point there is a match in count between markers and variables that is non-zero - - if (logger.isTraceEnabled()) - for (int i = 0; i < variables.size(); i++) - logger.trace("[{}] '{}'", i+1, variables.get(i)); - } - - metrics.preparedStatementsExecuted.inc(); - return processStatement(statement, queryState, options); - } -#endif - -public: - future<::shared_ptr> process_batch(::shared_ptr, - service::query_state& query_state, query_options& options); - - std::unique_ptr get_statement(const std::experimental::string_view& query, - const service::client_state& client_state); - static ::shared_ptr parse_statement(const std::experimental::string_view& query); - -#if 0 - private static long measure(Object key) - { - return meter.measureDeep(key); - } -#endif -public: - future<> stop(); - - friend class migration_subscriber; -}; - -class query_processor::migration_subscriber : public service::migration_listener { - query_processor* _qp; -public: - migration_subscriber(query_processor* qp); - - virtual void on_create_keyspace(const sstring& ks_name) override; - virtual void on_create_column_family(const sstring& ks_name, const sstring& cf_name) override; - virtual void on_create_user_type(const sstring& ks_name, const sstring& type_name) override; - virtual void on_create_function(const sstring& ks_name, const sstring& function_name) override; - virtual void on_create_aggregate(const sstring& ks_name, const sstring& aggregate_name) override; - virtual void on_create_view(const sstring& ks_name, const sstring& view_name) override; - - virtual void on_update_keyspace(const sstring& ks_name) override; - virtual void on_update_column_family(const sstring& ks_name, const sstring& cf_name, bool columns_changed) override; - virtual void on_update_user_type(const sstring& ks_name, const sstring& type_name) override; - virtual void on_update_function(const sstring& ks_name, const sstring& function_name) override; - virtual void on_update_aggregate(const sstring& ks_name, const sstring& aggregate_name) override; - virtual void on_update_view(const sstring& ks_name, const sstring& view_name, bool columns_changed) override; - - virtual void on_drop_keyspace(const sstring& ks_name) override; - virtual void on_drop_column_family(const sstring& ks_name, const sstring& cf_name) override; - virtual void on_drop_user_type(const sstring& ks_name, const sstring& type_name) override; - virtual void on_drop_function(const sstring& ks_name, const sstring& function_name) override; - virtual void on_drop_aggregate(const sstring& ks_name, const sstring& aggregate_name) override; - virtual void on_drop_view(const sstring& ks_name, const sstring& view_name) override; -private: - void remove_invalid_prepared_statements(sstring ks_name, std::experimental::optional cf_name); - bool should_invalidate(sstring ks_name, std::experimental::optional cf_name, ::shared_ptr statement); -}; - -extern distributed _the_query_processor; - -inline distributed& get_query_processor() { - return _the_query_processor; -} - -inline query_processor& get_local_query_processor() { - return _the_query_processor.local(); -} - -} diff --git a/scylla/cql3/relation.cc b/scylla/cql3/relation.cc deleted file mode 100644 index 51dd409..0000000 --- a/scylla/cql3/relation.cc +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "relation.hh" -#include "exceptions/unrecognized_entity_exception.hh" - -namespace cql3 { - -const column_definition& -relation::to_column_definition(schema_ptr schema, ::shared_ptr entity) { - auto id = entity->prepare_column_identifier(schema); - auto def = get_column_definition(schema, *id); - if (!def) { - throw exceptions::unrecognized_entity_exception(id, shared_from_this()); - } - return *def; -} - -} diff --git a/scylla/cql3/relation.hh b/scylla/cql3/relation.hh deleted file mode 100644 index e9c16b4..0000000 --- a/scylla/cql3/relation.hh +++ /dev/null @@ -1,285 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "operator.hh" -#include "schema.hh" -#include "column_identifier.hh" -#include "variable_specifications.hh" -#include "restrictions/restriction.hh" -#include "statements/bound.hh" -#include "term.hh" - -namespace cql3 { - -class relation : public enable_shared_from_this { -protected: - const operator_type& _relation_type; -public: - relation(const operator_type& relation_type) - : _relation_type(relation_type) { - } - virtual ~relation() {} - - virtual const operator_type& get_operator() const { - return _relation_type; - } - - /** - * Checks if this relation apply to multiple columns. - * - * @return true if this relation apply to multiple columns, false otherwise. - */ - virtual bool is_multi_column() const { - return false; - } - - /** - * Checks if this relation is a token relation (e.g.
token(a) = token(1)
). - * - * @return true if this relation is a token relation, false otherwise. - */ - virtual bool on_token() const { - return false; - } - - /** - * Checks if the operator of this relation is a CONTAINS. - * @return true if the operator of this relation is a CONTAINS, false - * otherwise. - */ - virtual bool is_contains() const final { - return _relation_type == operator_type::CONTAINS; - } - - /** - * Checks if the operator of this relation is a CONTAINS_KEY. - * @return true if the operator of this relation is a CONTAINS_KEY, false - * otherwise. - */ - virtual bool is_contains_key() const final { - return _relation_type == operator_type::CONTAINS_KEY; - } - - /** - * Checks if the operator of this relation is a IN. - * @return true if the operator of this relation is a IN, false - * otherwise. - */ - virtual bool is_IN() const final { - return _relation_type == operator_type::IN; - } - - /** - * Checks if the operator of this relation is a EQ. - * @return true if the operator of this relation is a EQ, false - * otherwise. - */ - virtual bool is_EQ() const final { - return _relation_type == operator_type::EQ; - } - - /** - * Checks if the operator of this relation is a Slice (GT, GTE, LTE, LT). - * - * @return true if the operator of this relation is a Slice, false otherwise. - */ - virtual bool is_slice() const final { - return _relation_type == operator_type::GT - || _relation_type == operator_type::GTE - || _relation_type == operator_type::LTE - || _relation_type == - operator_type::LT; - } - - /** - * Converts this Relation into a Restriction. - * - * @param cfm the Column Family meta data - * @param boundNames the variables specification where to collect the bind variables - * @return the Restriction corresponding to this Relation - * @throws InvalidRequestException if this Relation is not valid - */ - virtual ::shared_ptr to_restriction(database& db, schema_ptr schema, ::shared_ptr bound_names) final { - if (_relation_type == operator_type::EQ) { - return new_EQ_restriction(db, schema, bound_names); - } else if (_relation_type == operator_type::LT) { - return new_slice_restriction(db, schema, bound_names, statements::bound::END, false); - } else if (_relation_type == operator_type::LTE) { - return new_slice_restriction(db, schema, bound_names, statements::bound::END, true); - } else if (_relation_type == operator_type::GTE) { - return new_slice_restriction(db, schema, bound_names, statements::bound::START, true); - } else if (_relation_type == operator_type::GT) { - return new_slice_restriction(db, schema, bound_names, statements::bound::START, false); - } else if (_relation_type == operator_type::IN) { - return new_IN_restriction(db, schema, bound_names); - } else if (_relation_type == operator_type::CONTAINS) { - return new_contains_restriction(db, schema, bound_names, false); - } else if (_relation_type == operator_type::CONTAINS_KEY) { - return new_contains_restriction(db, schema, bound_names, true); - } else if (_relation_type == operator_type::IS_NOT) { - // This case is not supposed to happen: statement_restrictions - // constructor does not call this function for views' IS_NOT. - throw exceptions::invalid_request_exception(sprint("Unsupported \"IS NOT\" relation: %s", to_string())); - } else { - throw exceptions::invalid_request_exception(sprint("Unsupported \"!=\" relation: %s", to_string())); - } - } - - virtual sstring to_string() const = 0; - - friend std::ostream& operator<<(std::ostream& out, const relation& r) { - return out << r.to_string(); - } - - /** - * Creates a new EQ restriction instance. - * - * @param cfm the Column Family meta data - * @param boundNames the variables specification where to collect the bind variables - * @return a new EQ restriction instance. - * @throws InvalidRequestException if the relation cannot be converted into an EQ restriction. - */ - virtual ::shared_ptr new_EQ_restriction(database& db, schema_ptr schema, - ::shared_ptr bound_names) = 0; - - /** - * Creates a new IN restriction instance. - * - * @param cfm the Column Family meta data - * @param bound_names the variables specification where to collect the bind variables - * @return a new IN restriction instance - * @throws InvalidRequestException if the relation cannot be converted into an IN restriction. - */ - virtual ::shared_ptr new_IN_restriction(database& db, schema_ptr schema, - ::shared_ptr bound_names) = 0; - - /** - * Creates a new Slice restriction instance. - * - * @param cfm the Column Family meta data - * @param bound_names the variables specification where to collect the bind variables - * @param bound the slice bound - * @param inclusive true if the bound is included. - * @return a new slice restriction instance - * @throws InvalidRequestException if the Relation is not valid - */ - virtual ::shared_ptr new_slice_restriction(database& db, schema_ptr schema, - ::shared_ptr bound_names, - statements::bound bound, - bool inclusive) = 0; - - /** - * Creates a new Contains restriction instance. - * - * @param cfm the Column Family meta data - * @param bound_names the variables specification where to collect the bind variables - * @param isKey true if the restriction to create is a CONTAINS KEY - * @return a new Contains ::shared_ptr instance - * @throws InvalidRequestException if the Relation is not valid - */ - virtual ::shared_ptr new_contains_restriction(database& db, schema_ptr schema, - ::shared_ptr bound_names, bool isKey) = 0; - - /** - * Renames an identifier in this Relation, if applicable. - * @param from the old identifier - * @param to the new identifier - * @return a pointer object, if the old identifier is not in the set of entities that this relation covers; - * otherwise a new Relation with "from" replaced by "to" is returned. - */ - virtual ::shared_ptr maybe_rename_identifier(const column_identifier::raw& from, column_identifier::raw to) = 0; - -protected: - - /** - * Converts the specified Raw into a Term. - * @param receivers the columns to which the values must be associated at - * @param raw the raw term to convert - * @param keyspace the keyspace name - * @param boundNames the variables specification where to collect the bind variables - * - * @return the Term corresponding to the specified Raw - * @throws InvalidRequestException if the Raw term is not valid - */ - virtual ::shared_ptr to_term(const std::vector<::shared_ptr>& receivers, - ::shared_ptr raw, - database& db, - const sstring& keyspace, - ::shared_ptr boundNames) = 0; - - /** - * Converts the specified Raw terms into a Terms. - * @param receivers the columns to which the values must be associated at - * @param raws the raw terms to convert - * @param keyspace the keyspace name - * @param boundNames the variables specification where to collect the bind variables - * - * @return the Terms corresponding to the specified Raw terms - * @throws InvalidRequestException if the Raw terms are not valid - */ - std::vector<::shared_ptr> to_terms(const std::vector<::shared_ptr>& receivers, - const std::vector<::shared_ptr>& raws, - database& db, - const sstring& keyspace, - ::shared_ptr boundNames) { - std::vector<::shared_ptr> terms; - for (auto&& r : raws) { - terms.emplace_back(to_term(receivers, r, db, keyspace, boundNames)); - } - return terms; - } - - /** - * Converts the specified entity into a column definition. - * - * @param cfm the column family meta data - * @param entity the entity to convert - * @return the column definition corresponding to the specified entity - * @throws InvalidRequestException if the entity cannot be recognized - */ - virtual const column_definition& to_column_definition(schema_ptr schema, ::shared_ptr entity) final; -}; - -using relation_ptr = ::shared_ptr; - -} diff --git a/scylla/cql3/restrictions/abstract_restriction.hh b/scylla/cql3/restrictions/abstract_restriction.hh deleted file mode 100644 index cd2255e..0000000 --- a/scylla/cql3/restrictions/abstract_restriction.hh +++ /dev/null @@ -1,160 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include - -#include "core/shared_ptr.hh" -#include "core/sstring.hh" -#include "cql3/restrictions/restriction.hh" -#include "cql3/term.hh" -#include "types.hh" - -namespace cql3 { - -namespace restrictions { - -/** - * Base class for Restrictions - */ -class abstract_restriction : public restriction { -public: - virtual bool is_on_token() const override { - return false; - } - - virtual bool is_multi_column() const override { - return false; - } - - virtual bool is_slice() const override { - return false; - } - - virtual bool is_EQ() const override { - return false; - } - - virtual bool is_IN() const override { - return false; - } - - virtual bool is_contains() const override { - return false; - } - - virtual bool has_bound(statements::bound b) const override { - return true; - } - - virtual std::vector bounds(statements::bound b, const query_options& options) const override { - return values(options); - } - - virtual bool is_inclusive(statements::bound b) const override { - return true; - } - - /** - * Whether the specified row satisfied this restriction. - * Assumes the row is live, but not all cells. If a cell - * isn't live and there's a restriction on its column, - * then the function returns false. - * - * @param schema the schema the row belongs to - * @param key the partition key - * @param ckey the clustering key - * @param cells the remaining row columns - * @return the restriction resulting of the merge - * @throws InvalidRequestException if the restrictions cannot be merged - */ - virtual bool is_satisfied_by(const schema& schema, - const partition_key& key, - const clustering_key_prefix& ckey, - const row& cells, - const query_options& options, - gc_clock::time_point now) const = 0; - -protected: -#if 0 - protected static ByteBuffer validateIndexedValue(ColumnSpecification columnSpec, - ByteBuffer value) - throws InvalidRequestException - { - checkNotNull(value, "Unsupported null value for indexed column %s", columnSpec.name); - checkFalse(value.remaining() > 0xFFFF, "Index expression values may not be larger than 64K"); - return value; - } -#endif - /** - * Checks if the specified term is using the specified function. - * - * @param term the term to check - * @param ks_name the function keyspace name - * @param function_name the function name - * @return true if the specified term is using the specified function, false otherwise. - */ - static bool term_uses_function(::shared_ptr term, const sstring& ks_name, const sstring& function_name) { - return bool(term) && term->uses_function(ks_name, function_name); - } - - /** - * Checks if one of the specified term is using the specified function. - * - * @param terms the terms to check - * @param ks_name the function keyspace name - * @param function_name the function name - * @return true if one of the specified term is using the specified function, false otherwise. - */ - static bool term_uses_function(const std::vector<::shared_ptr>& terms, const sstring& ks_name, const sstring& function_name) { - for (auto&& value : terms) { - if (term_uses_function(value, ks_name, function_name)) { - return true; - } - } - return false; - } -}; - -} - -} diff --git a/scylla/cql3/restrictions/forwarding_primary_key_restrictions.hh b/scylla/cql3/restrictions/forwarding_primary_key_restrictions.hh deleted file mode 100644 index d3c4aa3..0000000 --- a/scylla/cql3/restrictions/forwarding_primary_key_restrictions.hh +++ /dev/null @@ -1,139 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "cql3/restrictions/primary_key_restrictions.hh" - -namespace cql3 { - -namespace restrictions { - -/** - * A primary_key_restrictions which forwards all its method calls to another - * primary_key_restrictions. Subclasses should override one or more methods to modify the behavior - * of the backing primary_key_restrictions as desired per the decorator pattern. - */ -template -class forwarding_primary_key_restrictions : public primary_key_restrictions { - using bounds_range_type = typename primary_key_restrictions::bounds_range_type; -protected: - /** - * Returns the backing delegate instance that methods are forwarded to. - */ - virtual ::shared_ptr> get_delegate() const = 0; - -public: - virtual bool uses_function(const sstring& ks_name, const sstring& function_name) const override { - return get_delegate()->uses_function(ks_name, function_name); - } - - virtual std::vector get_column_defs() const override { - return get_delegate()->get_column_defs(); - } - - virtual void merge_with(::shared_ptr restriction) override { - get_delegate()->merge_with(restriction); - } - -#if 0 - virtual bool has_supporting_index(::shared_ptr index_manager) override { - return get_delegate()->has_supporting_index(index_manager); - } -#endif - - virtual std::vector values(const query_options& options) const override { - return get_delegate()->values(options); - } - - virtual std::vector bounds(statements::bound b, const query_options& options) const override { - return get_delegate()->bounds(b, options); - } - - virtual std::vector values_as_keys(const query_options& options) const override { - return get_delegate()->values_as_keys(options); - } - - virtual std::vector bounds_ranges(const query_options& options) const override { - return get_delegate()->bounds_ranges(options); - } - - virtual bool is_on_token() const override { - return get_delegate()->is_on_token(); - } - - virtual bool is_multi_column() const override { - return get_delegate()->is_multi_column(); - } - - virtual bool is_slice() const override { - return get_delegate()->is_slice(); - } - - virtual bool is_contains() const override { - return get_delegate()->is_contains(); - } - - virtual bool is_IN() const override { - return get_delegate()->is_IN(); - } - - virtual bool empty() const override { - return get_delegate()->empty(); - } - - virtual uint32_t size() const override { - return get_delegate()->size(); - } - -#if 0 - virtual void addIndexExpressionTo(List expressions, QueryOptions options) { - get_delegate()->addIndexExpressionTo(expressions, options); - } -#endif - - sstring to_string() const override { - return get_delegate()->to_string(); - } -}; - -} -} diff --git a/scylla/cql3/restrictions/multi_column_restriction.hh b/scylla/cql3/restrictions/multi_column_restriction.hh deleted file mode 100644 index d4d014c..0000000 --- a/scylla/cql3/restrictions/multi_column_restriction.hh +++ /dev/null @@ -1,494 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "cql3/tuples.hh" -#include "cql3/statements/request_validations.hh" -#include "cql3/restrictions/primary_key_restrictions.hh" -#include "cql3/statements/request_validations.hh" - -namespace cql3 { - -namespace restrictions { - -class multi_column_restriction : public primary_key_restrictions { -protected: - schema_ptr _schema; - std::vector _column_defs; -public: - multi_column_restriction(schema_ptr schema, std::vector&& defs) - : _schema(schema) - , _column_defs(std::move(defs)) - { } - - virtual bool is_multi_column() const override { - return true; - } - - virtual std::vector get_column_defs() const override { - return _column_defs; - } - - virtual std::vector values(const query_options& options) const override { - auto src = values_as_keys(options); - std::vector res; - std::transform(src.begin(), src.end(), std::back_inserter(res), [this] (auto&& r) { - auto view = r.representation(); - return bytes(view.begin(), view.end()); - }); - return res; - } - - virtual void merge_with(::shared_ptr other) override { - statements::request_validations::check_true(other->is_multi_column(), - "Mixing single column relations and multi column relations on clustering columns is not allowed"); - auto as_pkr = static_pointer_cast>(other); - do_merge_with(as_pkr); - } - - bool is_satisfied_by(const schema& schema, - const partition_key& key, - const clustering_key_prefix& ckey, - const row& cells, - const query_options& options, - gc_clock::time_point now) const override { - for (auto&& range : bounds_ranges(options)) { - if (!range.contains(ckey, clustering_key_prefix::prefix_equal_tri_compare(schema))) { - return false; - } - } - return true; - } - -protected: - virtual void do_merge_with(::shared_ptr> other) = 0; - - /** - * Returns the names of the columns that are specified within this Restrictions and the other one - * as a comma separated String. - * - * @param otherRestrictions the other restrictions - * @return the names of the columns that are specified within this Restrictions and the other one - * as a comma separated String. - */ - sstring get_columns_in_commons(::shared_ptr other) const { - auto ours = get_column_defs(); - auto theirs = other->get_column_defs(); - - std::sort(ours.begin(), ours.end()); - std::sort(theirs.begin(), theirs.end()); - std::vector common; - std::set_intersection(ours.begin(), ours.end(), theirs.begin(), theirs.end(), std::back_inserter(common)); - - sstring str; - for (auto&& c : common) { - if (!str.empty()) { - str += " ,"; - } - str += c->name_as_text(); - } - return str; - } -#if 0 - @Override - public final boolean hasSupportingIndex(SecondaryIndexManager indexManager) - { - for (ColumnDefinition columnDef : columnDefs) - { - SecondaryIndex index = indexManager.getIndexForColumn(columnDef.name.bytes); - if (index != null && isSupportedBy(index)) - return true; - } - return false; - } - - /** - * Check if this type of restriction is supported for the specified column by the specified index. - * @param index the Secondary index - * - * @return true this type of restriction is supported by the specified index, - * false otherwise. - */ - protected abstract boolean isSupportedBy(SecondaryIndex index); -#endif -public: - class EQ; - class IN; - class IN_with_values; - class IN_with_marker; - - class slice; -}; - -class multi_column_restriction::EQ final : public multi_column_restriction { -private: - ::shared_ptr _value; -public: - EQ(schema_ptr schema, std::vector defs, ::shared_ptr value) - : multi_column_restriction(schema, std::move(defs)) - , _value(std::move(value)) - { } - - virtual bool uses_function(const sstring& ks_name, const sstring& function_name) const override { - return abstract_restriction::term_uses_function(_value, ks_name, function_name); - } - - virtual sstring to_string() const override { - return sprint("EQ(%s)", _value->to_string()); - } - - virtual void do_merge_with(::shared_ptr> other) override { - throw exceptions::invalid_request_exception(sprint( - "%s cannot be restricted by more than one relation if it includes an Equal", - get_columns_in_commons(other))); - } - - virtual std::vector values_as_keys(const query_options& options) const override { - return { composite_value(options) }; - }; - - virtual std::vector bounds_ranges(const query_options& options) const override { - return { bounds_range_type::make_singular(composite_value(options)) }; - } - -#if 0 - @Override - protected boolean isSupportedBy(SecondaryIndex index) - { - return index.supportsOperator(Operator.EQ); - } -#endif - - clustering_key_prefix composite_value(const query_options& options) const { - auto t = static_pointer_cast(_value->bind(options)); - auto values = t->get_elements(); - std::vector components; - for (unsigned i = 0; i < values.size(); i++) { - auto component = statements::request_validations::check_not_null(values[i], - "Invalid null value in condition for column %s", - _column_defs.at(i)->name_as_text()); - components.emplace_back(*component); - } - return clustering_key_prefix::from_exploded(*_schema, std::move(components)); - } - -#if 0 - @Override - public final void addIndexExpressionTo(List expressions, - QueryOptions options) throws InvalidRequestException - { - Tuples.Value t = ((Tuples.Value) value.bind(options)); - List values = t.getElements(); - for (int i = 0; i < values.size(); i++) - { - ColumnDefinition columnDef = columnDefs.get(i); - ByteBuffer component = validateIndexedValue(columnDef, values.get(i)); - expressions.add(new IndexExpression(columnDef.name.bytes, Operator.EQ, component)); - } - } -#endif -}; - -class multi_column_restriction::IN : public multi_column_restriction { -public: - using multi_column_restriction::multi_column_restriction; - - virtual bool is_IN() const override { - return true; - } - - virtual std::vector values_as_keys(const query_options& options) const override { - auto split_in_values = split_values(options); - std::vector keys; - for (auto&& components : split_in_values) { - for (unsigned i = 0; i < components.size(); i++) { - statements::request_validations::check_not_null(components[i], "Invalid null value in condition for column %s", _column_defs.at(i)->name_as_text()); - } - keys.emplace_back(clustering_key_prefix::from_optional_exploded(*_schema, components)); - } - std::sort(keys.begin(), keys.end(), clustering_key_prefix::less_compare(*_schema)); - keys.erase(std::unique(keys.begin(), keys.end(), clustering_key_prefix::equality(*_schema)), keys.end()); - return keys; - } - - virtual std::vector bounds_ranges(const query_options& options) const override { - auto split_in_values = split_values(options); - std::vector bounds; - for (auto&& components : split_in_values) { - for (unsigned i = 0; i < components.size(); i++) { - statements::request_validations::check_not_null(components[i], "Invalid null value in condition for column %s", _column_defs.at(i)->name_as_text()); - } - auto prefix = clustering_key_prefix::from_optional_exploded(*_schema, components); - bounds.emplace_back(bounds_range_type::make_singular(prefix)); - } - auto less_cmp = clustering_key_prefix::less_compare(*_schema); - std::sort(bounds.begin(), bounds.end(), [&] (bounds_range_type& x, bounds_range_type& y) { - return less_cmp(x.start()->value(), y.start()->value()); - }); - auto eq_cmp = clustering_key_prefix::equality(*_schema); - bounds.erase(std::unique(bounds.begin(), bounds.end(), [&] (bounds_range_type& x, bounds_range_type& y) { - return eq_cmp(x.start()->value(), y.start()->value()); - }), bounds.end()); - return bounds; - } - -#if 0 - @Override - public void addIndexExpressionTo(List expressions, - QueryOptions options) throws InvalidRequestException - { - List> splitInValues = splitValues(options); - checkTrue(splitInValues.size() == 1, "IN restrictions are not supported on indexed columns"); - - List values = splitInValues.get(0); - checkTrue(values.size() == 1, "IN restrictions are not supported on indexed columns"); - - ColumnDefinition columnDef = columnDefs.get(0); - ByteBuffer component = validateIndexedValue(columnDef, values.get(0)); - expressions.add(new IndexExpression(columnDef.name.bytes, Operator.EQ, component)); - } -#endif - - virtual void do_merge_with(::shared_ptr> other) override { - throw exceptions::invalid_request_exception(sprint("%s cannot be restricted by more than one relation if it includes a IN", - get_columns_in_commons(other))); - } - -#if 0 - @Override - protected boolean isSupportedBy(SecondaryIndex index) - { - return index.supportsOperator(Operator.IN); - } -#endif -protected: - virtual std::vector> split_values(const query_options& options) const = 0; -}; - -/** - * An IN restriction that has a set of terms for in values. - * For example: "SELECT ... WHERE (a, b, c) IN ((1, 2, 3), (4, 5, 6))" or "WHERE (a, b, c) IN (?, ?)" - */ -class multi_column_restriction::IN_with_values final : public multi_column_restriction::IN { -private: - std::vector<::shared_ptr> _values; -public: - IN_with_values(schema_ptr schema, std::vector defs, std::vector<::shared_ptr> value) - : multi_column_restriction::IN(schema, std::move(defs)) - , _values(std::move(value)) - { } - - virtual bool uses_function(const sstring& ks_name, const sstring& function_name) const override { - return abstract_restriction::term_uses_function(_values, ks_name, function_name); - } - - virtual sstring to_string() const override { - return sprint("IN(%s)", std::to_string(_values)); - } - -protected: - virtual std::vector> split_values(const query_options& options) const override { - std::vector> buffers(_values.size()); - std::transform(_values.begin(), _values.end(), buffers.begin(), [&] (const ::shared_ptr& value) { - auto term = static_pointer_cast(value->bind(options)); - return term->get_elements(); - }); - return buffers; - } -}; - - -/** - * An IN restriction that uses a single marker for a set of IN values that are tuples. - * For example: "SELECT ... WHERE (a, b, c) IN ?" - */ -class multi_column_restriction::IN_with_marker final : public multi_column_restriction::IN { -private: - shared_ptr _marker; -public: - IN_with_marker(schema_ptr schema, std::vector defs, shared_ptr marker) - : IN(schema, std::move(defs)), _marker(marker) { - } - - virtual bool uses_function(const sstring& ks_name, const sstring& function_name) const override { - return false; - } - - virtual sstring to_string() const override { - return "IN ?"; - } - -protected: - virtual std::vector> split_values(const query_options& options) const override { - auto in_marker = static_pointer_cast(_marker); - auto in_value = static_pointer_cast(in_marker->bind(options)); - statements::request_validations::check_not_null(in_value, "Invalid null value for IN restriction"); - return in_value->get_split_values(); - } -}; - -class multi_column_restriction::slice final : public multi_column_restriction { -private: - term_slice _slice; - - slice(schema_ptr schema, std::vector defs, term_slice slice) - : multi_column_restriction(schema, std::move(defs)) - , _slice(slice) - { } -public: - slice(schema_ptr schema, std::vector defs, statements::bound bound, bool inclusive, shared_ptr term) - : slice(schema, defs, term_slice::new_instance(bound, inclusive, term)) - { } - - virtual bool is_slice() const override { - return true; - } - - virtual std::vector values_as_keys(const query_options&) const override { - throw exceptions::unsupported_operation_exception(); - } - - virtual std::vector bounds(statements::bound b, const query_options& options) const override { - throw std::runtime_error(sprint("%s not implemented", __PRETTY_FUNCTION__)); -#if 0 - return Composites.toByteBuffers(boundsAsComposites(b, options)); -#endif - } - - virtual std::vector bounds_ranges(const query_options& options) const override { - // FIXME: doesn't work properly with mixed CLUSTERING ORDER (CASSANDRA-7281) - auto read_bound = [&] (statements::bound b) -> std::experimental::optional { - if (!has_bound(b)) { - return {}; - } - auto vals = component_bounds(b, options); - for (unsigned i = 0; i < vals.size(); i++) { - statements::request_validations::check_not_null(vals[i], "Invalid null value in condition for column %s", _column_defs.at(i)->name_as_text()); - } - auto prefix = clustering_key_prefix::from_optional_exploded(*_schema, vals); - return bounds_range_type::bound(prefix, is_inclusive(b)); - }; - auto range = wrapping_range(read_bound(statements::bound::START), read_bound(statements::bound::END)); - auto bounds = bound_view::from_range(range); - if (bound_view::compare(*_schema)(bounds.second, bounds.first)) { - return { }; - } - return { bounds_range_type(std::move(range)) }; - } -#if 0 - @Override - public void addIndexExpressionTo(List expressions, - QueryOptions options) throws InvalidRequestException - { - throw invalidRequest("Slice restrictions are not supported on indexed columns which are part of a multi column relation"); - } - - @Override - protected boolean isSupportedBy(SecondaryIndex index) - { - return slice.isSupportedBy(index); - } - - private static Composite.EOC eocFor(Restriction r, Bound eocBound, Bound inclusiveBound) - { - if (eocBound.isStart()) - return r.isInclusive(inclusiveBound) ? Composite.EOC.NONE : Composite.EOC.END; - - return r.isInclusive(inclusiveBound) ? Composite.EOC.END : Composite.EOC.START; - } -#endif -public: - virtual bool has_bound(statements::bound b) const override { - return _slice.has_bound(b); - } - - virtual bool uses_function(const sstring& ks_name, const sstring& function_name) const override { - return (_slice.has_bound(statements::bound::START) && abstract_restriction::term_uses_function(_slice.bound(statements::bound::START), ks_name, function_name)) - || (_slice.has_bound(statements::bound::END) && abstract_restriction::term_uses_function(_slice.bound(statements::bound::END), ks_name, function_name)); - } - - virtual bool is_inclusive(statements::bound b) const override { - return _slice.is_inclusive(b); - } - - virtual void do_merge_with(::shared_ptr> other) override { - using namespace statements::request_validations; - check_true(other->is_slice(), - "Column \"%s\" cannot be restricted by both an equality and an inequality relation", - get_columns_in_commons(other)); - auto other_slice = static_pointer_cast(other); - - check_false(has_bound(statements::bound::START) && other_slice->has_bound(statements::bound::START), - "More than one restriction was found for the start bound on %s", - get_columns_in_commons(other)); - check_false(has_bound(statements::bound::END) && other_slice->has_bound(statements::bound::END), - "More than one restriction was found for the end bound on %s", - get_columns_in_commons(other)); - - if (_column_defs.size() < other_slice->_column_defs.size()) { - _column_defs = other_slice->_column_defs; - } - _slice.merge(other_slice->_slice); - } - - virtual sstring to_string() const override { - return sstring("SLICE") + _slice.to_string(); - } - -private: - /** - * Similar to bounds(), but returns one ByteBuffer per-component in the bound instead of a single - * ByteBuffer to represent the entire bound. - * @param b the bound type - * @param options the query options - * @return one ByteBuffer per-component in the bound - * @throws InvalidRequestException if the components cannot be retrieved - */ - std::vector component_bounds(statements::bound b, const query_options& options) const { - auto value = static_pointer_cast(_slice.bound(b)->bind(options)); - return value->get_elements(); - } -}; - -} - -} diff --git a/scylla/cql3/restrictions/primary_key_restrictions.hh b/scylla/cql3/restrictions/primary_key_restrictions.hh deleted file mode 100644 index 127d237..0000000 --- a/scylla/cql3/restrictions/primary_key_restrictions.hh +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include - -#include "cql3/query_options.hh" -#include "cql3/statements/bound.hh" -#include "cql3/restrictions/restrictions.hh" -#include "cql3/restrictions/restriction.hh" -#include "cql3/restrictions/abstract_restriction.hh" -#include "types.hh" -#include "query-request.hh" -#include "core/shared_ptr.hh" - -namespace cql3 { -namespace restrictions { - -/** - * A set of restrictions on a primary key part (partition key or clustering key). - * - * What was in AbstractPrimaryKeyRestrictions was moved here (In pre 1.8 Java interfaces could not have default - * implementations of methods). - */ - -template -struct range_type_for; - -template<> -struct range_type_for : public std::remove_reference {}; -template<> -struct range_type_for : public std::remove_reference {}; - -template -class primary_key_restrictions: public abstract_restriction, - public restrictions, - public enable_shared_from_this> { -public: - typedef typename range_type_for::type bounds_range_type; - - virtual ::shared_ptr> merge_to(schema_ptr, ::shared_ptr restriction) { - merge_with(restriction); - return this->shared_from_this(); - } - - virtual std::vector values_as_keys(const query_options& options) const = 0; - virtual std::vector bounds_ranges(const query_options& options) const = 0; - - using restrictions::uses_function; - - bool empty() const override { - return get_column_defs().empty(); - } - uint32_t size() const override { - return uint32_t(get_column_defs().size()); - } -}; - -} -} diff --git a/scylla/cql3/restrictions/restriction.hh b/scylla/cql3/restrictions/restriction.hh deleted file mode 100644 index d755339..0000000 --- a/scylla/cql3/restrictions/restriction.hh +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include - -#include "cql3/query_options.hh" -#include "cql3/statements/bound.hh" -#include "types.hh" - -namespace cql3 { - -namespace restrictions { - -/** - * A restriction/clause on a column. - * The goal of this class being to group all conditions for a column in a SELECT. - */ -class restriction { -public: - virtual ~restriction() {} - virtual bool is_on_token() const = 0; - virtual bool is_slice() const = 0; - virtual bool is_EQ() const = 0; - virtual bool is_IN() const = 0; - virtual bool is_contains() const = 0; - virtual bool is_multi_column() const = 0; - - virtual std::vector values(const query_options& options) const = 0; - - virtual bytes_opt value(const query_options& options) const { - auto vec = values(options); - assert(vec.size() == 1); - return std::move(vec[0]); - } - - /** - * Returns true if one of the restrictions use the specified function. - * - * @param ks_name the keyspace name - * @param function_name the function name - * @return true if one of the restrictions use the specified function, false otherwise. - */ - virtual bool uses_function(const sstring& ks_name, const sstring& function_name) const = 0; - - /** - * Checks if the specified bound is set or not. - * @param b the bound type - * @return true if the specified bound is set, false otherwise - */ - virtual bool has_bound(statements::bound b) const = 0; - - virtual std::vector bounds(statements::bound b, const query_options& options) const = 0; - - /** - * Checks if the specified bound is inclusive or not. - * @param b the bound type - * @return true if the specified bound is inclusive, false otherwise - */ - virtual bool is_inclusive(statements::bound b) const = 0; - - /** - * Merges this restriction with the specified one. - * - * @param otherRestriction the restriction to merge into this one - * @return the restriction resulting of the merge - * @throws InvalidRequestException if the restrictions cannot be merged - */ - virtual void merge_with(::shared_ptr other) = 0; - -#if 0 - /** - * Check if the restriction is on indexed columns. - * - * @param indexManager the index manager - * @return true if the restriction is on indexed columns, false - */ - public boolean hasSupportingIndex(SecondaryIndexManager indexManager); - - /** - * Adds to the specified list the IndexExpressions corresponding to this Restriction. - * - * @param expressions the list to add the IndexExpressions to - * @param options the query options - * @throws InvalidRequestException if this Restriction cannot be converted into - * IndexExpressions - */ - public void addIndexExpressionTo(List expressions, - QueryOptions options) - throws InvalidRequestException; -#endif - - virtual sstring to_string() const = 0; -}; - -} - -} diff --git a/scylla/cql3/restrictions/restrictions.hh b/scylla/cql3/restrictions/restrictions.hh deleted file mode 100644 index 1fb3bc4..0000000 --- a/scylla/cql3/restrictions/restrictions.hh +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include - -#include "cql3/query_options.hh" -#include "types.hh" -#include "schema.hh" - -namespace cql3 { - -namespace restrictions { - -/** - * Sets of restrictions - */ -class restrictions { -public: - virtual ~restrictions() {} - - /** - * Returns the column definitions in position order. - * @return the column definitions in position order. - */ - virtual std::vector get_column_defs() const = 0; - - /** - * Returns true if one of the restrictions use the specified function. - * - * @param ks_name the keyspace name - * @param function_name the function name - * @return true if one of the restrictions use the specified function, false otherwise. - */ - virtual bool uses_function(const sstring& ks_name, const sstring& function_name) const = 0; - -#if 0 - /** - * Check if the restriction is on indexed columns. - * - * @param index_manager the index manager - * @return true if the restriction is on indexed columns, false - */ - virtual bool has_supporting_index(::shared_ptr index_manager) const = 0; - - /** - * Adds to the specified list the index_expressions corresponding to this Restriction. - * - * @param expressions the list to add the index_expressions to - * @param options the query options - * @throws InvalidRequestException if this Restriction cannot be converted into - * index_expressions - */ - virtual void add_index_expression_to(std::vector<::shared_ptr>& expressions, - const query_options& options) = 0; -#endif - - /** - * Checks if this SingleColumnprimary_key_restrictions is empty or not. - * - * @return true if this SingleColumnprimary_key_restrictions is empty, false otherwise. - */ - virtual bool empty() const = 0; - - /** - * Returns the number of columns that have a restriction. - * - * @return the number of columns that have a restriction. - */ - virtual uint32_t size() const = 0; -}; - -} - -} diff --git a/scylla/cql3/restrictions/single_column_primary_key_restrictions.hh b/scylla/cql3/restrictions/single_column_primary_key_restrictions.hh deleted file mode 100644 index 887e08e..0000000 --- a/scylla/cql3/restrictions/single_column_primary_key_restrictions.hh +++ /dev/null @@ -1,411 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include -#include "schema.hh" -#include "cartesian_product.hh" -#include "cql3/restrictions/primary_key_restrictions.hh" -#include "cql3/restrictions/single_column_restrictions.hh" -#include -#include -#include - -namespace cql3 { - -namespace restrictions { - -/** - * A set of single column restrictions on a primary key part (partition key or clustering key). - */ -template -class single_column_primary_key_restrictions : public primary_key_restrictions { - using range_type = query::range; - using range_bound = typename range_type::bound; - using bounds_range_type = typename primary_key_restrictions::bounds_range_type; -private: - schema_ptr _schema; - ::shared_ptr _restrictions; - bool _slice; - bool _contains; - bool _in; -public: - single_column_primary_key_restrictions(schema_ptr schema) - : _schema(schema) - , _restrictions(::make_shared(schema)) - , _slice(false) - , _contains(false) - , _in(false) - { } - - virtual bool is_on_token() const override { - return false; - } - - virtual bool is_multi_column() const override { - return false; - } - - virtual bool is_slice() const override { - return _slice; - } - - virtual bool is_contains() const override { - return _contains; - } - - virtual bool is_IN() const override { - return _in; - } - - virtual bool has_bound(statements::bound b) const override { - return boost::algorithm::all_of(_restrictions->restrictions(), [b] (auto&& r) { return r.second->has_bound(b); }); - } - - virtual bool uses_function(const sstring& ks_name, const sstring& function_name) const override { - return _restrictions->uses_function(ks_name, function_name); - } - - void do_merge_with(::shared_ptr restriction) { - if (!_restrictions->empty()) { - auto last_column = *_restrictions->last_column(); - auto new_column = restriction->get_column_def(); - - if (_slice && _schema->position(new_column) > _schema->position(last_column)) { - throw exceptions::invalid_request_exception(sprint( - "Clustering column \"%s\" cannot be restricted (preceding column \"%s\" is restricted by a non-EQ relation)", - new_column.name_as_text(), last_column.name_as_text())); - } - - if (_schema->position(new_column) < _schema->position(last_column)) { - if (restriction->is_slice()) { - throw exceptions::invalid_request_exception(sprint( - "PRIMARY KEY column \"%s\" cannot be restricted (preceding column \"%s\" is restricted by a non-EQ relation)", - last_column.name_as_text(), new_column.name_as_text())); - } - } - - if (_in && _schema->position(new_column) > _schema->position(last_column)) { - throw exceptions::invalid_request_exception(sprint("Clustering column \"%s\" cannot be restricted by an IN relation", - new_column.name_as_text())); - } - } - - _slice |= restriction->is_slice(); - _in |= restriction->is_IN(); - _contains |= restriction->is_contains(); - _restrictions->add_restriction(restriction); - } - - virtual void merge_with(::shared_ptr restriction) override { - if (restriction->is_multi_column()) { - throw exceptions::invalid_request_exception( - "Mixing single column relations and multi column relations on clustering columns is not allowed"); - } - if (restriction->is_on_token()) { - throw exceptions::invalid_request_exception( - sprint( - "Columns \"%s\" cannot be restricted by both a normal relation and a token relation", - join(", ", get_column_defs()))); - } - do_merge_with(::static_pointer_cast(restriction)); - } - - virtual std::vector values_as_keys(const query_options& options) const override { - std::vector> value_vector; - value_vector.reserve(_restrictions->size()); - for (auto&& e : _restrictions->restrictions()) { - const column_definition* def = e.first; - auto&& r = e.second; - assert(!r->is_slice()); - - std::vector values = r->values(options); - for (auto&& val : values) { - if (!val) { - throw exceptions::invalid_request_exception(sprint("Invalid null value for column %s", def->name_as_text())); - } - } - if (values.empty()) { - return {}; - } - value_vector.emplace_back(std::move(values)); - } - - std::vector result; - result.reserve(cartesian_product_size(value_vector)); - for (auto&& v : make_cartesian_product(value_vector)) { - result.emplace_back(ValueType::from_optional_exploded(*_schema, std::move(v))); - } - return result; - } - -private: - std::vector compute_bounds(const query_options& options) const { - std::vector ranges; - - static constexpr auto invalid_null_msg = std::is_same::value - ? "Invalid null value for partition key part %s" : "Invalid null value for clustering key part %s"; - - if (_restrictions->is_all_eq()) { - ranges.reserve(1); - if (_restrictions->size() == 1) { - auto&& e = *_restrictions->restrictions().begin(); - const column_definition* def = e.first; - auto&& r = e.second; - auto&& val = r->value(options); - if (!val) { - throw exceptions::invalid_request_exception(sprint(invalid_null_msg, def->name_as_text())); - } - ranges.emplace_back(range_type::make_singular(ValueType::from_single_value(*_schema, std::move(*val)))); - return ranges; - } - std::vector components; - components.reserve(_restrictions->size()); - for (auto&& e : _restrictions->restrictions()) { - const column_definition* def = e.first; - auto&& r = e.second; - assert(components.size() == _schema->position(*def)); - auto&& val = r->value(options); - if (!val) { - throw exceptions::invalid_request_exception(sprint(invalid_null_msg, def->name_as_text())); - } - components.emplace_back(std::move(*val)); - } - ranges.emplace_back(range_type::make_singular(ValueType::from_exploded(*_schema, std::move(components)))); - return ranges; - } - - std::vector> vec_of_values; - for (auto&& e : _restrictions->restrictions()) { - const column_definition* def = e.first; - auto&& r = e.second; - - if (vec_of_values.size() != _schema->position(*def) || r->is_contains()) { - // The prefixes built so far are the longest we can build, - // the rest of the constraints will have to be applied using filtering. - break; - } - - if (r->is_slice()) { - if (cartesian_product_is_empty(vec_of_values)) { - auto read_bound = [r, &options, this] (statements::bound b) -> std::experimental::optional { - if (!r->has_bound(b)) { - return {}; - } - auto value = r->bounds(b, options)[0]; - if (!value) { - throw exceptions::invalid_request_exception(sprint(invalid_null_msg, r->to_string())); - } - return {range_bound(ValueType::from_single_value(*_schema, *value), r->is_inclusive(b))}; - }; - ranges.emplace_back(range_type( - read_bound(statements::bound::START), - read_bound(statements::bound::END))); - if (def->type->is_reversed()) { - ranges.back().reverse(); - } - return std::move(ranges); - } - - ranges.reserve(cartesian_product_size(vec_of_values)); - for (auto&& prefix : make_cartesian_product(vec_of_values)) { - auto read_bound = [r, &prefix, &options, this](statements::bound bound) -> range_bound { - if (r->has_bound(bound)) { - auto value = std::move(r->bounds(bound, options)[0]); - if (!value) { - throw exceptions::invalid_request_exception(sprint(invalid_null_msg, r->to_string())); - } - prefix.emplace_back(std::move(value)); - auto val = ValueType::from_optional_exploded(*_schema, prefix); - prefix.pop_back(); - return range_bound(std::move(val), r->is_inclusive(bound)); - } else { - return range_bound(ValueType::from_optional_exploded(*_schema, prefix)); - } - }; - - ranges.emplace_back(range_type( - read_bound(statements::bound::START), - read_bound(statements::bound::END))); - - if (def->type->is_reversed()) { - ranges.back().reverse(); - } - } - - return std::move(ranges); - } - - auto values = r->values(options); - for (auto&& val : values) { - if (!val) { - throw exceptions::invalid_request_exception(sprint(invalid_null_msg, def->name_as_text())); - } - } - if (values.empty()) { - return {}; - } - vec_of_values.emplace_back(std::move(values)); - } - - ranges.reserve(cartesian_product_size(vec_of_values)); - for (auto&& prefix : make_cartesian_product(vec_of_values)) { - ranges.emplace_back(range_type::make_singular(ValueType::from_optional_exploded(*_schema, std::move(prefix)))); - } - - return std::move(ranges); - } - -public: - std::vector bounds_ranges(const query_options& options) const override; - - std::vector values(const query_options& options) const override { - auto src = values_as_keys(options); - std::vector res; - std::transform(src.begin(), src.end(), std::back_inserter(res), [this](const ValueType & r) { - auto view = r.representation(); - return bytes(view.begin(), view.end()); - }); - return res; - } - std::vector bounds(statements::bound b, const query_options& options) const override { - // TODO: if this proved to be required. - fail(unimplemented::cause::LEGACY_COMPOSITE_KEYS); // not 100% correct... - } - -#if 0 - virtual bool hasSupportingIndex(SecondaryIndexManager indexManager) override { - return restrictions.hasSupportingIndex(indexManager); - } - - virtual void addIndexExpressionTo(List expressions, QueryOptions options) override { - restrictions.addIndexExpressionTo(expressions, options); - } -#endif - - virtual std::vector get_column_defs() const override { - return _restrictions->get_column_defs(); - } - - virtual bool empty() const override { - return _restrictions->empty(); - } - - virtual uint32_t size() const override { - return _restrictions->size(); - } - sstring to_string() const override { - return sprint("Restrictions(%s)", join(", ", get_column_defs())); - } - - virtual bool is_satisfied_by(const schema& schema, - const partition_key& key, - const clustering_key_prefix& ckey, - const row& cells, - const query_options& options, - gc_clock::time_point now) const override { - return boost::algorithm::all_of( - _restrictions->restrictions() | boost::adaptors::map_values, - [&] (auto&& r) { return r->is_satisfied_by(schema, key, ckey, cells, options, now); }); - } -}; - -template<> -dht::partition_range_vector -single_column_primary_key_restrictions::bounds_ranges(const query_options& options) const { - dht::partition_range_vector ranges; - ranges.reserve(size()); - for (query::range& r : compute_bounds(options)) { - if (!r.is_singular()) { - throw exceptions::invalid_request_exception("Range queries on partition key values not supported."); - } - ranges.emplace_back(std::move(r).transform( - [this] (partition_key&& k) -> query::ring_position { - auto token = dht::global_partitioner().get_token(*_schema, k); - return { std::move(token), std::move(k) }; - })); - } - return ranges; -} - -template<> -std::vector -single_column_primary_key_restrictions::bounds_ranges(const query_options& options) const { - auto wrapping_bounds = compute_bounds(options); - auto bounds = boost::copy_range(wrapping_bounds - | boost::adaptors::filtered([&](auto&& r) { - auto bounds = bound_view::from_range(r); - return !bound_view::compare(*_schema)(bounds.second, bounds.first); - }) - | boost::adaptors::transformed([&](auto&& r) { return query::clustering_range(std::move(r)); - })); - auto less_cmp = clustering_key_prefix::less_compare(*_schema); - std::sort(bounds.begin(), bounds.end(), [&] (query::clustering_range& x, query::clustering_range& y) { - if (!x.start() && !y.start()) { - return false; - } - if (!x.start()) { - return true; - } - if (!y.start()) { - return false; - } - return less_cmp(x.start()->value(), y.start()->value()); - }); - auto eq_cmp = clustering_key_prefix::equality(*_schema); - bounds.erase(std::unique(bounds.begin(), bounds.end(), [&] (query::clustering_range& x, query::clustering_range& y) { - if (!x.start() && !y.start()) { - return true; - } - if (!x.start() || !y.start()) { - return false; - } - return eq_cmp(x.start()->value(), y.start()->value()); - }), bounds.end()); - return bounds; -} - -} -} - - - diff --git a/scylla/cql3/restrictions/single_column_restriction.hh b/scylla/cql3/restrictions/single_column_restriction.hh deleted file mode 100644 index 0a3fd51..0000000 --- a/scylla/cql3/restrictions/single_column_restriction.hh +++ /dev/null @@ -1,508 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "cql3/restrictions/abstract_restriction.hh" -#include "cql3/restrictions/term_slice.hh" -#include "cql3/term.hh" -#include "cql3/abstract_marker.hh" -#include "core/shared_ptr.hh" -#include "schema.hh" -#include "to_string.hh" -#include "exceptions/exceptions.hh" -#include "keys.hh" -#include "mutation_partition.hh" - -namespace cql3 { - -namespace restrictions { - -class single_column_restriction : public abstract_restriction { -protected: - /** - * The definition of the column to which apply the restriction. - */ - const column_definition& _column_def; -public: - single_column_restriction(const column_definition& column_def) - : _column_def(column_def) - { } - - const column_definition& get_column_def() const { - return _column_def; - } - -#if 0 - @Override - public void addIndexExpressionTo(List expressions, - QueryOptions options) throws InvalidRequestException - { - List values = values(options); - checkTrue(values.size() == 1, "IN restrictions are not supported on indexed columns"); - - ByteBuffer value = validateIndexedValue(columnDef, values.get(0)); - expressions.add(new IndexExpression(columnDef.name.bytes, Operator.EQ, value)); - } - - @Override - public boolean hasSupportingIndex(SecondaryIndexManager indexManager) - { - SecondaryIndex index = indexManager.getIndexForColumn(columnDef.name.bytes); - return index != null && isSupportedBy(index); - } - - /** - * Check if this type of restriction is supported by the specified index. - * - * @param index the Secondary index - * @return true this type of restriction is supported by the specified index, - * false otherwise. - */ - protected abstract boolean isSupportedBy(SecondaryIndex index); -#endif - - class EQ; - class IN; - class IN_with_values; - class IN_with_marker; - - class slice; - class contains; - -protected: - bytes_view_opt get_value(const schema& schema, - const partition_key& key, - const clustering_key_prefix& ckey, - const row& cells, - gc_clock::time_point now) const; -}; - -class single_column_restriction::EQ final : public single_column_restriction { -private: - ::shared_ptr _value; -public: - EQ(const column_definition& column_def, ::shared_ptr value) - : single_column_restriction(column_def) - , _value(std::move(value)) - { } - - virtual bool uses_function(const sstring& ks_name, const sstring& function_name) const override { - return abstract_restriction::term_uses_function(_value, ks_name, function_name); - } - - virtual bool is_EQ() const override { - return true; - } - - virtual std::vector values(const query_options& options) const override { - std::vector v; - v.push_back(to_bytes_opt(_value->bind_and_get(options))); - return v; - } - - virtual bytes_opt value(const query_options& options) const override { - return to_bytes_opt(_value->bind_and_get(options)); - } - - virtual sstring to_string() const override { - return sprint("EQ(%s)", _value->to_string()); - } - - virtual void merge_with(::shared_ptr other) { - throw exceptions::invalid_request_exception(sprint( - "%s cannot be restricted by more than one relation if it includes an Equal", _column_def.name_as_text())); - } - - virtual bool is_satisfied_by(const schema& schema, - const partition_key& key, - const clustering_key_prefix& ckey, - const row& cells, - const query_options& options, - gc_clock::time_point now) const override; - -#if 0 - @Override - protected boolean isSupportedBy(SecondaryIndex index) - { - return index.supportsOperator(Operator.EQ); - } -#endif -}; - -class single_column_restriction::IN : public single_column_restriction { -public: - IN(const column_definition& column_def) - : single_column_restriction(column_def) - { } - - virtual bool is_IN() const override { - return true; - } - - virtual void merge_with(::shared_ptr r) override { - throw exceptions::invalid_request_exception(sprint( - "%s cannot be restricted by more than one relation if it includes a IN", _column_def.name_as_text())); - } - - virtual bool is_satisfied_by(const schema& schema, - const partition_key& key, - const clustering_key_prefix& ckey, - const row& cells, - const query_options& options, - gc_clock::time_point now) const override; - -#if 0 - @Override - protected final boolean isSupportedBy(SecondaryIndex index) - { - return index.supportsOperator(Operator.IN); - } -#endif -}; - -class single_column_restriction::IN_with_values : public single_column_restriction::IN { -protected: - std::vector<::shared_ptr> _values; -public: - IN_with_values(const column_definition& column_def, std::vector<::shared_ptr> values) - : single_column_restriction::IN(column_def) - , _values(std::move(values)) - { } - - virtual bool uses_function(const sstring& ks_name, const sstring& function_name) const override { - return abstract_restriction::term_uses_function(_values, ks_name, function_name); - } - - virtual std::vector values(const query_options& options) const override { - std::vector ret; - for (auto&& v : _values) { - ret.emplace_back(to_bytes_opt(v->bind_and_get(options))); - } - return ret; - } - - virtual sstring to_string() const override { - return sprint("IN(%s)", std::to_string(_values)); - } -}; - -class single_column_restriction::IN_with_marker : public IN { -public: - shared_ptr _marker; -public: - IN_with_marker(const column_definition& column_def, shared_ptr marker) - : IN(column_def), _marker(std::move(marker)) { - } - - virtual bool uses_function(const sstring& ks_name, const sstring& function_name) const override { - return false; - } - - virtual std::vector values(const query_options& options) const override { - auto&& lval = dynamic_pointer_cast(_marker->bind(options)); - if (!lval) { - throw exceptions::invalid_request_exception("Invalid null value for IN restriction"); - } - return lval->get_elements(); - } - - virtual sstring to_string() const override { - return "IN ?"; - } -}; - -class single_column_restriction::slice : public single_column_restriction { -private: - term_slice _slice; -public: - slice(const column_definition& column_def, statements::bound bound, bool inclusive, ::shared_ptr term) - : single_column_restriction(column_def) - , _slice(term_slice::new_instance(bound, inclusive, std::move(term))) - { } - - virtual bool uses_function(const sstring& ks_name, const sstring& function_name) const override { - return (_slice.has_bound(statements::bound::START) && abstract_restriction::term_uses_function(_slice.bound(statements::bound::START), ks_name, function_name)) - || (_slice.has_bound(statements::bound::END) && abstract_restriction::term_uses_function(_slice.bound(statements::bound::END), ks_name, function_name)); - } - - virtual bool is_slice() const override { - return true; - } - - virtual std::vector values(const query_options& options) const override { - throw exceptions::unsupported_operation_exception(); - } - - virtual bool has_bound(statements::bound b) const override { - return _slice.has_bound(b); - } - - virtual std::vector bounds(statements::bound b, const query_options& options) const override { - return {to_bytes_opt(_slice.bound(b)->bind_and_get(options))}; - } - - virtual bool is_inclusive(statements::bound b) const override { - return _slice.is_inclusive(b); - } - - virtual void merge_with(::shared_ptr r) override { - if (!r->is_slice()) { - throw exceptions::invalid_request_exception(sprint( - "Column \"%s\" cannot be restricted by both an equality and an inequality relation", _column_def.name_as_text())); - } - - auto other_slice = static_pointer_cast(r); - - if (has_bound(statements::bound::START) && other_slice->has_bound(statements::bound::START)) { - throw exceptions::invalid_request_exception(sprint( - "More than one restriction was found for the start bound on %s", _column_def.name_as_text())); - } - - if (has_bound(statements::bound::END) && other_slice->has_bound(statements::bound::END)) { - throw exceptions::invalid_request_exception(sprint( - "More than one restriction was found for the end bound on %s", _column_def.name_as_text())); - } - - _slice.merge(other_slice->_slice); - } - -#if 0 - virtual void addIndexExpressionTo(List expressions, override - QueryOptions options) throws InvalidRequestException - { - for (statements::bound b : {statements::bound::START, statements::bound::END}) - { - if (has_bound(b)) - { - ByteBuffer value = validateIndexedValue(columnDef, _slice.bound(b).bindAndGet(options)); - Operator op = _slice.getIndexOperator(b); - // If the underlying comparator for name is reversed, we need to reverse the IndexOperator: user operation - // always refer to the "forward" sorting even if the clustering order is reversed, but the 2ndary code does - // use the underlying comparator as is. - op = columnDef.isReversedType() ? op.reverse() : op; - expressions.add(new IndexExpression(columnDef.name.bytes, op, value)); - } - } - } - - virtual bool isSupportedBy(SecondaryIndex index) override - { - return _slice.isSupportedBy(index); - } -#endif - - virtual sstring to_string() const override { - return sprint("SLICE%s", _slice); - } - - virtual bool is_satisfied_by(const schema& schema, - const partition_key& key, - const clustering_key_prefix& ckey, - const row& cells, - const query_options& options, - gc_clock::time_point now) const override; -}; - -// This holds CONTAINS, CONTAINS_KEY, and map[key] = value restrictions because we might want to have any combination of them. -class single_column_restriction::contains final : public single_column_restriction { -private: - std::vector<::shared_ptr> _values; - std::vector<::shared_ptr> _keys; - std::vector<::shared_ptr> _entry_keys; - std::vector<::shared_ptr> _entry_values; -public: - contains(const column_definition& column_def, ::shared_ptr t, bool is_key) - : single_column_restriction(column_def) { - if (is_key) { - _keys.emplace_back(std::move(t)); - } else { - _values.emplace_back(std::move(t)); - } - } - - contains(const column_definition& column_def, ::shared_ptr map_key, ::shared_ptr map_value) - : single_column_restriction(column_def) { - _entry_keys.emplace_back(std::move(map_key)); - _entry_values.emplace_back(std::move(map_value)); - } - - virtual std::vector values(const query_options& options) const override { - return bind_and_get(_values, options); - } - - virtual bool is_contains() const override { - return true; - } - - virtual void merge_with(::shared_ptr other_restriction) override { - if (!other_restriction->is_contains()) { - throw exceptions::invalid_request_exception(sprint( - "Collection column %s can only be restricted by CONTAINS, CONTAINS KEY, or map-entry equality", - get_column_def().name_as_text())); - } - - auto other = static_pointer_cast(other_restriction); - std::copy(other->_values.begin(), other->_values.end(), std::back_inserter(_values)); - std::copy(other->_keys.begin(), other->_keys.end(), std::back_inserter(_keys)); - std::copy(other->_entry_keys.begin(), other->_entry_keys.end(), std::back_inserter(_entry_keys)); - std::copy(other->_entry_values.begin(), other->_entry_values.end(), std::back_inserter(_entry_values)); - } - -#if 0 - virtual void add_index_expression_to(std::vector<::shared_ptr>& expressions, - const query_options& options) override { - add_expressions_for(expressions, values(options), operator_type::CONTAINS); - add_expressions_for(expressions, keys(options), operator_type::CONTAINS_KEY); - add_expressions_for(expressions, entries(options), operator_type::EQ); - } - - private void add_expressions_for(std::vector<::shared_ptr>& target, std::vector values, - const operator_type& op) { - for (ByteBuffer value : values) - { - validateIndexedValue(columnDef, value); - target.add(new IndexExpression(columnDef.name.bytes, op, value)); - } - } - - virtual bool is_supported_by(SecondaryIndex index) override { - bool supported = false; - - if (numberOfValues() > 0) - supported |= index.supportsOperator(Operator.CONTAINS); - - if (numberOfKeys() > 0) - supported |= index.supportsOperator(Operator.CONTAINS_KEY); - - if (numberOfEntries() > 0) - supported |= index.supportsOperator(Operator.EQ); - - return supported; - } -#endif - - uint32_t number_of_values() const { - return _values.size(); - } - - uint32_t number_of_keys() const { - return _keys.size(); - } - - uint32_t number_of_entries() const { - return _entry_keys.size(); - } - - virtual bool uses_function(const sstring& ks_name, const sstring& function_name) const override { - return abstract_restriction::term_uses_function(_values, ks_name, function_name) - || abstract_restriction::term_uses_function(_keys, ks_name, function_name) - || abstract_restriction::term_uses_function(_entry_keys, ks_name, function_name) - || abstract_restriction::term_uses_function(_entry_values, ks_name, function_name); - } - - virtual sstring to_string() const override { - return sprint("CONTAINS(values=%s, keys=%s, entryKeys=%s, entryValues=%s)", - std::to_string(_values), std::to_string(_keys), std::to_string(_entry_keys), std::to_string(_entry_values)); - } - - virtual bool has_bound(statements::bound b) const override { - throw exceptions::unsupported_operation_exception(); - } - - virtual std::vector bounds(statements::bound b, const query_options& options) const override { - throw exceptions::unsupported_operation_exception(); - } - - virtual bool is_inclusive(statements::bound b) const override { - throw exceptions::unsupported_operation_exception(); - } - - virtual bool is_satisfied_by(const schema& schema, - const partition_key& key, - const clustering_key_prefix& ckey, - const row& cells, - const query_options& options, - gc_clock::time_point now) const override; - -#if 0 - private List keys(const query_options& options) { - return bindAndGet(keys, options); - } - - private List entries(QueryOptions options) throws InvalidRequestException - { - List entryBuffers = new ArrayList<>(_entry_keys.size()); - List keyBuffers = bindAndGet(_entry_keys, options); - List valueBuffers = bindAndGet(_entry_values, options); - for (int i = 0; i < _entry_keys.size(); i++) - { - if (valueBuffers.get(i) == null) - throw new InvalidRequestException("Unsupported null value for map-entry equality"); - entryBuffers.add(CompositeType.build(keyBuffers.get(i), valueBuffers.get(i))); - } - return entryBuffers; - } -#endif - -private: - /** - * Binds the query options to the specified terms and returns the resulting values. - * - * @param terms the terms - * @param options the query options - * @return the value resulting from binding the query options to the specified terms - * @throws invalid_request_exception if a problem occurs while binding the query options - */ - static std::vector bind_and_get(std::vector<::shared_ptr> terms, const query_options& options) { - std::vector values; - values.reserve(terms.size()); - for (auto&& term : terms) { - values.emplace_back(to_bytes_opt(term->bind_and_get(options))); - } - return values; - } -}; - - -} - -} diff --git a/scylla/cql3/restrictions/single_column_restrictions.hh b/scylla/cql3/restrictions/single_column_restrictions.hh deleted file mode 100644 index 9dd5a64..0000000 --- a/scylla/cql3/restrictions/single_column_restrictions.hh +++ /dev/null @@ -1,239 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "cql3/restrictions/restrictions.hh" -#include "cql3/restrictions/single_column_restriction.hh" -#include "schema.hh" -#include "types.hh" - -namespace cql3 { - -namespace restrictions { - -/** - * Sets of single column _restrictions. - */ -class single_column_restrictions : public restrictions { -private: - /** - * The comparator used to sort the restrictions. - */ - struct column_definition_comparator { - schema_ptr _schema; - bool operator()(const column_definition* def1, const column_definition* def2) const { - auto pos1 = _schema->position(*def1); - auto pos2 = _schema->position(*def2); - if (pos1 != pos2) { - return pos1 < pos2; - } - // FIXME: shouldn't we use regular column name comparator here? Origin does not... - return less_unsigned(def1->name(), def2->name()); - } - }; - - /** - * The _restrictions per column. - */ -public: - using restrictions_map = std::map, column_definition_comparator>; -private: - restrictions_map _restrictions; - bool _is_all_eq = true; -public: - single_column_restrictions(schema_ptr schema) - : _restrictions(column_definition_comparator{std::move(schema)}) - { } - -#if 0 - @Override - public final void addIndexExpressionTo(List expressions, - QueryOptions options) throws InvalidRequestException - { - for (Restriction restriction : _restrictions.values()) - restriction.addIndexExpressionTo(expressions, options); - } -#endif - - virtual std::vector get_column_defs() const override { - std::vector r; - for (auto&& e : _restrictions) { - r.push_back(e.first); - } - return r; - } - - /** - * Returns the restriction associated to the specified column. - * - * @param column_def the column definition - * @return the restriction associated to the specified column - */ - ::shared_ptr get_restriction(const column_definition& column_def) const { - auto i = _restrictions.find(&column_def); - if (i == _restrictions.end()) { - return {}; - } - return i->second; - } - - virtual bool uses_function(const sstring& ks_name, const sstring& function_name) const override { - for (auto&& e : _restrictions) { - if (e.second->uses_function(ks_name, function_name)) { - return true; - } - } - return false; - } - - virtual bool empty() const override { - return _restrictions.empty(); - } - - virtual uint32_t size() const override { - return _restrictions.size(); - } - - /** - * Adds the specified restriction to this set of _restrictions. - * - * @param restriction the restriction to add - * @throws InvalidRequestException if the new restriction cannot be added - */ - void add_restriction(::shared_ptr restriction) { - _is_all_eq &= restriction->is_EQ(); - - auto i = _restrictions.find(&restriction->get_column_def()); - if (i == _restrictions.end()) { - _restrictions.emplace_hint(i, &restriction->get_column_def(), std::move(restriction)); - } else { - i->second->merge_with(restriction); - } - } - -#if 0 - virtual bool has_supporting_index(::shared_ptr index_manager) const override { - for (auto&& e : _restrictions) { - if (e.second->has_supporting_index(index_manager)) { - return true; - } - } - return false; - } -#endif - - /** - * Returns the column after the specified one. - * - * @param column_def the column for which the next one need to be found - * @return the column after the specified one. - */ - const column_definition* next_column(const column_definition& column_def) const { - auto i = _restrictions.find(&column_def); - if (i == _restrictions.end()) { - return nullptr; - } - ++i; - if (i == _restrictions.end()) { - return nullptr; - } - return i->first; - } - - /** - * Returns the definition of the last column. - * - * @return the definition of the last column. - */ - const column_definition* last_column() const { - if (_restrictions.empty()) { - return nullptr; - } - auto i = _restrictions.end(); - --i; - return i->first; - } - - /** - * Returns the last restriction. - * - * @return the last restriction. - */ - ::shared_ptr last_restriction() const { - if (_restrictions.empty()) { - return {}; - } - auto i = _restrictions.end(); - --i; - return i->second; - } - - const restrictions_map& restrictions() const { - return _restrictions; - } - - /** - * Checks if the _restrictions contains multiple contains, contains key, or map[key] = value. - * - * @return true if the _restrictions contains multiple contains, contains key, or , - * map[key] = value; false otherwise - */ - bool has_multiple_contains() const { - uint32_t number_of_contains = 0; - for (auto&& e : _restrictions) { - if (e.second->is_contains()) { - auto contains_ = static_pointer_cast(e.second); - number_of_contains += contains_->number_of_values(); - number_of_contains += contains_->number_of_keys(); - number_of_contains += contains_->number_of_entries(); - } - } - return number_of_contains > 1; - } - - bool is_all_eq() const { - return _is_all_eq; - } -}; - -} -} diff --git a/scylla/cql3/restrictions/statement_restrictions.cc b/scylla/cql3/restrictions/statement_restrictions.cc deleted file mode 100644 index ecd928b..0000000 --- a/scylla/cql3/restrictions/statement_restrictions.cc +++ /dev/null @@ -1,687 +0,0 @@ - -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include -#include -#include - -#include "statement_restrictions.hh" -#include "single_column_primary_key_restrictions.hh" -#include "token_restriction.hh" - -#include "cql3/single_column_relation.hh" -#include "cql3/constants.hh" - -#include "stdx.hh" - -namespace cql3 { -namespace restrictions { - -using boost::adaptors::filtered; -using boost::adaptors::transformed; - -template -class statement_restrictions::initial_key_restrictions : public primary_key_restrictions { -public: - using bounds_range_type = typename primary_key_restrictions::bounds_range_type; - - ::shared_ptr> do_merge_to(schema_ptr schema, ::shared_ptr restriction) const { - if (restriction->is_multi_column()) { - throw std::runtime_error(sprint("%s not implemented", __PRETTY_FUNCTION__)); - } - return ::make_shared>(schema)->merge_to(schema, restriction); - } - ::shared_ptr> merge_to(schema_ptr schema, ::shared_ptr restriction) override { - if (restriction->is_multi_column()) { - throw std::runtime_error(sprint("%s not implemented", __PRETTY_FUNCTION__)); - } - if (restriction->is_on_token()) { - return static_pointer_cast(restriction); - } - return ::make_shared>(schema)->merge_to(restriction); - } - void merge_with(::shared_ptr restriction) override { - throw exceptions::unsupported_operation_exception(); - } - std::vector values(const query_options& options) const override { - // throw? should not reach? - return {}; - } - std::vector values_as_keys(const query_options& options) const override { - // throw? should not reach? - return {}; - } - std::vector bounds_ranges(const query_options&) const override { - // throw? should not reach? - return {}; - } - std::vector get_column_defs() const override { - // throw? should not reach? - return {}; - } - bool uses_function(const sstring&, const sstring&) const override { - return false; - } - bool empty() const override { - return true; - } - uint32_t size() const override { - return 0; - } - sstring to_string() const override { - return "Initial restrictions"; - } - virtual bool is_satisfied_by(const schema& schema, - const partition_key& key, - const clustering_key_prefix& ckey, - const row& cells, - const query_options& options, - gc_clock::time_point now) const override { - return true; - } -}; - -template<> -::shared_ptr> -statement_restrictions::initial_key_restrictions::merge_to(schema_ptr schema, ::shared_ptr restriction) { - if (restriction->is_on_token()) { - return static_pointer_cast(restriction); - } - return do_merge_to(std::move(schema), std::move(restriction)); -} - -template<> -::shared_ptr> -statement_restrictions::initial_key_restrictions::merge_to(schema_ptr schema, ::shared_ptr restriction) { - if (restriction->is_multi_column()) { - return static_pointer_cast>(restriction); - } - return do_merge_to(std::move(schema), std::move(restriction)); -} - -template -::shared_ptr> statement_restrictions::get_initial_key_restrictions() { - static thread_local ::shared_ptr> initial_kr = ::make_shared>(); - return initial_kr; -} - -std::vector<::shared_ptr> -statement_restrictions::get_partition_key_unrestricted_components() const { - std::vector<::shared_ptr> r; - - auto restricted = _partition_key_restrictions->get_column_defs(); - auto is_not_restricted = [&restricted] (const column_definition& def) { - return !boost::count(restricted, &def); - }; - - boost::copy(_schema->partition_key_columns() | filtered(is_not_restricted) | transformed(to_identifier), - std::back_inserter(r)); - return r; -} - -statement_restrictions::statement_restrictions(schema_ptr schema) - : _schema(schema) - , _partition_key_restrictions(get_initial_key_restrictions()) - , _clustering_columns_restrictions(get_initial_key_restrictions()) - , _nonprimary_key_restrictions(::make_shared(schema)) -{ } -#if 0 -static const column_definition* -to_column_definition(const schema_ptr& schema, const ::shared_ptr& entity) { - return get_column_definition(schema, - *entity->prepare_column_identifier(schema)); -} -#endif - -statement_restrictions::statement_restrictions(database& db, - schema_ptr schema, - statements::statement_type type, - const std::vector<::shared_ptr>& where_clause, - ::shared_ptr bound_names, - bool selects_only_static_columns, - bool select_a_collection, - bool for_view) - : statement_restrictions(schema) -{ - /* - * WHERE clause. For a given entity, rules are: - EQ relation conflicts with anything else (including a 2nd EQ) - * - Can't have more than one LT(E) relation (resp. GT(E) relation) - IN relation are restricted to row keys - * (for now) and conflicts with anything else (we could allow two IN for the same entity but that doesn't seem - * very useful) - The value_alias cannot be restricted in any way (we don't support wide rows with indexed value - * in CQL so far) - */ - if (!where_clause.empty()) { - for (auto&& relation : where_clause) { - if (relation->get_operator() == cql3::operator_type::IS_NOT) { - single_column_relation* r = - dynamic_cast(relation.get()); - // The "IS NOT NULL" restriction is only supported (and - // mandatory) for materialized view creation: - if (!r) { - throw exceptions::invalid_request_exception("IS NOT only supports single column"); - } - // currently, the grammar only allows the NULL argument to be - // "IS NOT", so this assertion should not be able to fail - assert(r->get_value() == cql3::constants::NULL_LITERAL); - - auto col_id = r->get_entity()->prepare_column_identifier(schema); - const auto *cd = get_column_definition(schema, *col_id); - if (!cd) { - throw exceptions::invalid_request_exception(sprint("restriction '%s' unknown column %s", relation->to_string(), r->get_entity()->to_string())); - } - _not_null_columns.insert(cd); - - if (!for_view) { - throw exceptions::invalid_request_exception(sprint("restriction '%s' is only supported in materialized view creation", relation->to_string())); - } - } else { - add_restriction(relation->to_restriction(db, schema, bound_names)); - } - } - } - - warn(unimplemented::cause::INDEXES); -#if 0 - ColumnFamilyStore cfs = Keyspace.open(cfm.ks_name).getColumnFamilyStore(cfm.cfName); - secondary_index_manager secondaryIndexManager = cfs.index_manager; -#endif - bool has_queriable_clustering_column_index = false; /*_clustering_columns_restrictions->has_supporting_index(secondaryIndexManager);*/ - bool has_queriable_index = false; /*has_queriable_clustering_column_index - || _partition_key_restrictions->has_supporting_index(secondaryIndexManager) - || nonprimary_key_restrictions->has_supporting_index(secondaryIndexManager);*/ - - // At this point, the select statement if fully constructed, but we still have a few things to validate - process_partition_key_restrictions(has_queriable_index, for_view); - - // Some but not all of the partition key columns have been specified; - // hence we need turn these restrictions into index expressions. - if (_uses_secondary_indexing) { - _index_restrictions.push_back(_partition_key_restrictions); - } - - if (selects_only_static_columns && has_clustering_columns_restriction()) { - if (type.is_update() || type.is_delete()) { - throw exceptions::invalid_request_exception(sprint( - "Invalid restrictions on clustering columns since the %s statement modifies only static columns", type)); - } - - if (type.is_select()) { - throw exceptions::invalid_request_exception( - "Cannot restrict clustering columns when selecting only static columns"); - } - } - - process_clustering_columns_restrictions(has_queriable_index, select_a_collection, for_view); - - // Covers indexes on the first clustering column (among others). - if (_is_key_range && has_queriable_clustering_column_index) - _uses_secondary_indexing = true; - - if (_uses_secondary_indexing) { - _index_restrictions.push_back(_clustering_columns_restrictions); - } else if (_clustering_columns_restrictions->is_contains()) { - fail(unimplemented::cause::INDEXES); -#if 0 - _index_restrictions.push_back(new Forwardingprimary_key_restrictions() { - - @Override - protected primary_key_restrictions getDelegate() - { - return _clustering_columns_restrictions; - } - - @Override - public void add_index_expression_to(List<::shared_ptr> expressions, const query_options& options) throws InvalidRequestException - { - List<::shared_ptr> list = new ArrayList<>(); - super.add_index_expression_to(list, options); - - for (::shared_ptr expression : list) - { - if (expression.is_contains() || expression.is_containsKey()) - expressions.add(expression); - } - } - }); - uses_secondary_indexing = true; -#endif - } - // Even if uses_secondary_indexing is false at this point, we'll still have to use one if - // there is restrictions not covered by the PK. - if (!_nonprimary_key_restrictions->empty()) { - _uses_secondary_indexing = true; - _index_restrictions.push_back(_nonprimary_key_restrictions); - } - - if (_uses_secondary_indexing && !for_view) { - fail(unimplemented::cause::INDEXES); -#if 0 - validate_secondary_index_selections(selects_only_static_columns); -#endif - } -} - -void statement_restrictions::add_restriction(::shared_ptr restriction) { - if (restriction->is_multi_column()) { - _clustering_columns_restrictions = _clustering_columns_restrictions->merge_to(_schema, restriction); - } else if (restriction->is_on_token()) { - _partition_key_restrictions = _partition_key_restrictions->merge_to(_schema, restriction); - } else { - add_single_column_restriction(::static_pointer_cast(restriction)); - } -} - -void statement_restrictions::add_single_column_restriction(::shared_ptr restriction) { - auto& def = restriction->get_column_def(); - if (def.is_partition_key()) { - _partition_key_restrictions = _partition_key_restrictions->merge_to(_schema, restriction); - } else if (def.is_clustering_key()) { - _clustering_columns_restrictions = _clustering_columns_restrictions->merge_to(_schema, restriction); - } else { - _nonprimary_key_restrictions->add_restriction(restriction); - } -} - -bool statement_restrictions::uses_function(const sstring& ks_name, const sstring& function_name) const { - return _partition_key_restrictions->uses_function(ks_name, function_name) - || _clustering_columns_restrictions->uses_function(ks_name, function_name) - || _nonprimary_key_restrictions->uses_function(ks_name, function_name); -} - -void statement_restrictions::process_partition_key_restrictions(bool has_queriable_index, bool for_view) { - // If there is a queriable index, no special condition are required on the other restrictions. - // But we still need to know 2 things: - // - If we don't have a queriable index, is the query ok - // - Is it queriable without 2ndary index, which is always more efficient - // If a component of the partition key is restricted by a relation, all preceding - // components must have a EQ. Only the last partition key component can be in IN relation. - if (_partition_key_restrictions->is_on_token()) { - _is_key_range = true; - } else if (has_partition_key_unrestricted_components()) { - if (!_partition_key_restrictions->empty() && !for_view) { - if (!has_queriable_index) { - throw exceptions::invalid_request_exception(sprint("Partition key parts: %s must be restricted as other parts are", - join(", ", get_partition_key_unrestricted_components()))); - } - } - - _is_key_range = true; - _uses_secondary_indexing = has_queriable_index; - } -} - -bool statement_restrictions::has_partition_key_unrestricted_components() const { - return _partition_key_restrictions->size() < _schema->partition_key_size(); -} - -bool statement_restrictions::has_unrestricted_clustering_columns() const { - return _clustering_columns_restrictions->size() < _schema->clustering_key_size(); -} - -void statement_restrictions::process_clustering_columns_restrictions(bool has_queriable_index, bool select_a_collection, bool for_view) { - if (!has_clustering_columns_restriction()) { - return; - } - - if (_clustering_columns_restrictions->is_IN() && select_a_collection) { - throw exceptions::invalid_request_exception( - "Cannot restrict clustering columns by IN relations when a collection is selected by the query"); - } - if (_clustering_columns_restrictions->is_contains() && !has_queriable_index) { - throw exceptions::invalid_request_exception( - "Cannot restrict clustering columns by a CONTAINS relation without a secondary index"); - } - - auto clustering_columns_iter = _schema->clustering_key_columns().begin(); - - for (auto&& restricted_column : _clustering_columns_restrictions->get_column_defs()) { - const column_definition* clustering_column = &(*clustering_columns_iter); - ++clustering_columns_iter; - - if (clustering_column != restricted_column && !for_view) { - if (!has_queriable_index) { - throw exceptions::invalid_request_exception(sprint( - "PRIMARY KEY column \"%s\" cannot be restricted as preceding column \"%s\" is not restricted", - restricted_column->name_as_text(), clustering_column->name_as_text())); - } - - _uses_secondary_indexing = true; // handle gaps and non-keyrange cases. - break; - } - } - - if (_clustering_columns_restrictions->is_contains()) { - _uses_secondary_indexing = true; - } -} - -dht::partition_range_vector statement_restrictions::get_partition_key_ranges(const query_options& options) const { - if (_partition_key_restrictions->empty()) { - return {dht::partition_range::make_open_ended_both_sides()}; - } - return _partition_key_restrictions->bounds_ranges(options); -} - -std::vector statement_restrictions::get_clustering_bounds(const query_options& options) const { - if (_clustering_columns_restrictions->empty()) { - return {query::clustering_range::make_open_ended_both_sides()}; - } - return _clustering_columns_restrictions->bounds_ranges(options); -} - -bool statement_restrictions::need_filtering() { - uint32_t number_of_restricted_columns = 0; - for (auto&& restrictions : _index_restrictions) { - number_of_restricted_columns += restrictions->size(); - } - - return number_of_restricted_columns > 1 - || (number_of_restricted_columns == 0 && has_clustering_columns_restriction()) - || (number_of_restricted_columns != 0 && _nonprimary_key_restrictions->has_multiple_contains()); -} - -void statement_restrictions::validate_secondary_index_selections(bool selects_only_static_columns) { - if (key_is_in_relation()) { - throw exceptions::invalid_request_exception( - "Select on indexed columns and with IN clause for the PRIMARY KEY are not supported"); - } - // When the user only select static columns, the intent is that we don't query the whole partition but just - // the static parts. But 1) we don't have an easy way to do that with 2i and 2) since we don't support index on - // static columns - // so far, 2i means that you've restricted a non static column, so the query is somewhat non-sensical. - if (selects_only_static_columns) { - throw exceptions::invalid_request_exception( - "Queries using 2ndary indexes don't support selecting only static columns"); - } -} - -static bytes_view_opt do_get_value(const schema& schema, - const column_definition& cdef, - const partition_key& key, - const clustering_key_prefix& ckey, - const row& cells, - gc_clock::time_point now) { - switch(cdef.kind) { - case column_kind::partition_key: - return key.get_component(schema, cdef.component_index()); - case column_kind::clustering_key: - return ckey.get_component(schema, cdef.component_index()); - default: - auto cell = cells.find_cell(cdef.id); - if (!cell) { - return stdx::nullopt; - } - assert(cdef.is_atomic()); - auto c = cell->as_atomic_cell(); - return c.is_dead(now) ? stdx::nullopt : bytes_view_opt(c.value()); - } -} - -bytes_view_opt single_column_restriction::get_value(const schema& schema, - const partition_key& key, - const clustering_key_prefix& ckey, - const row& cells, - gc_clock::time_point now) const { - return do_get_value(schema, _column_def, key, ckey, cells, std::move(now)); -} - -bool single_column_restriction::EQ::is_satisfied_by(const schema& schema, - const partition_key& key, - const clustering_key_prefix& ckey, - const row& cells, - const query_options& options, - gc_clock::time_point now) const { - if (_column_def.type->is_counter()) { - fail(unimplemented::cause::COUNTERS); - } - auto operand = value(options); - if (operand) { - auto cell_value = get_value(schema, key, ckey, cells, now); - return cell_value && _column_def.type->compare(*operand, *cell_value) == 0; - } - return false; -} - -bool single_column_restriction::IN::is_satisfied_by(const schema& schema, - const partition_key& key, - const clustering_key_prefix& ckey, - const row& cells, - const query_options& options, - gc_clock::time_point now) const { - if (_column_def.type->is_counter()) { - fail(unimplemented::cause::COUNTERS); - } - auto cell_value = get_value(schema, key, ckey, cells, now); - if (!cell_value) { - return false; - } - auto operands = values(options); - return std::any_of(operands.begin(), operands.end(), [&] (auto&& operand) { - return operand && _column_def.type->compare(*operand, *cell_value) == 0; - }); -} - -static query::range to_range(const term_slice& slice, const query_options& options) { - using range_type = query::range; - auto extract_bound = [&] (statements::bound bound) -> stdx::optional { - if (!slice.has_bound(bound)) { - return { }; - } - auto value = slice.bound(bound)->bind_and_get(options); - if (!value) { - return { }; - } - return { range_type::bound(*value, slice.is_inclusive(bound)) }; - }; - return range_type( - extract_bound(statements::bound::START), - extract_bound(statements::bound::END)); -} - -bool single_column_restriction::slice::is_satisfied_by(const schema& schema, - const partition_key& key, - const clustering_key_prefix& ckey, - const row& cells, - const query_options& options, - gc_clock::time_point now) const { - if (_column_def.type->is_counter()) { - fail(unimplemented::cause::COUNTERS); - } - auto cell_value = get_value(schema, key, ckey, cells, now); - if (!cell_value) { - return false; - } - return to_range(_slice, options).contains(*cell_value, _column_def.type->as_tri_comparator()); -} - -bool single_column_restriction::contains::is_satisfied_by(const schema& schema, - const partition_key& key, - const clustering_key_prefix& ckey, - const row& cells, - const query_options& options, - gc_clock::time_point now) const { - if (_column_def.type->is_counter()) { - fail(unimplemented::cause::COUNTERS); - } - if (!_column_def.type->is_collection()) { - return false; - } - - auto col_type = static_pointer_cast(_column_def.type); - if ((!_keys.empty() || !_entry_keys.empty()) && !col_type->is_map()) { - return false; - } - assert(_entry_keys.size() == _entry_values.size()); - - auto&& map_key_type = col_type->name_comparator(); - auto&& element_type = col_type->is_set() ? col_type->name_comparator() : col_type->value_comparator(); - if (_column_def.type->is_multi_cell()) { - auto cell = cells.find_cell(_column_def.id); - auto&& elements = col_type->deserialize_mutation_form(cell->as_collection_mutation()).cells; - auto end = std::remove_if(elements.begin(), elements.end(), [now] (auto&& element) { - return element.second.is_dead(now); - }); - for (auto&& value : _values) { - auto val = value->bind_and_get(options); - if (!val) { - continue; - } - auto found = std::find_if(elements.begin(), end, [&] (auto&& element) { - return element_type->compare(element.second.value(), *val) == 0; - }); - if (found == end) { - return false; - } - } - for (auto&& key : _keys) { - auto k = key->bind_and_get(options); - if (!k) { - continue; - } - auto found = std::find_if(elements.begin(), end, [&] (auto&& element) { - return map_key_type->compare(element.first, *k) == 0; - }); - if (found == end) { - return false; - } - } - for (uint32_t i = 0; i < _entry_keys.size(); ++i) { - auto map_key = _entry_keys[i]->bind_and_get(options); - auto map_value = _entry_values[i]->bind_and_get(options); - if (!map_key || !map_value) { - continue; - } - auto found = std::find_if(elements.begin(), end, [&] (auto&& element) { - return map_key_type->compare(element.first, *map_key) == 0; - }); - if (found == end || element_type->compare(found->second.value(), *map_value) != 0) { - return false; - } - } - } else { - auto cell_value = get_value(schema, key, ckey, cells, now); - if (!cell_value) { - return false; - } - auto deserialized = _column_def.type->deserialize(*cell_value); - for (auto&& value : _values) { - auto val = value->bind_and_get(options); - if (!val) { - continue; - } - auto exists_in = [&](auto&& range) { - auto found = std::find_if(range.begin(), range.end(), [&] (auto&& element) { - return element_type->compare(element.serialize(), *val) == 0; - }); - return found != range.end(); - }; - if (col_type->is_list()) { - if (!exists_in(value_cast(deserialized))) { - return false; - } - } else if (col_type->is_set()) { - if (!exists_in(value_cast(deserialized))) { - return false; - } - } else { - auto data_map = value_cast(deserialized); - if (!exists_in(data_map | boost::adaptors::transformed([] (auto&& p) { return p.second; }))) { - return false; - } - } - } - if (col_type->is_map()) { - auto& data_map = value_cast(deserialized); - for (auto&& key : _keys) { - auto k = key->bind_and_get(options); - if (!k) { - continue; - } - auto found = std::find_if(data_map.begin(), data_map.end(), [&] (auto&& element) { - return map_key_type->compare(element.first.serialize(), *k) == 0; - }); - if (found == data_map.end()) { - return false; - } - } - for (uint32_t i = 0; i < _entry_keys.size(); ++i) { - auto map_key = _entry_keys[i]->bind_and_get(options); - auto map_value = _entry_values[i]->bind_and_get(options); - if (!map_key || !map_value) { - continue; - } - auto found = std::find_if(data_map.begin(), data_map.end(), [&] (auto&& element) { - return map_key_type->compare(element.first.serialize(), *map_key) == 0; - }); - if (found == data_map.end() || element_type->compare(found->second.serialize(), *map_value) != 0) { - return false; - } - } - } - } - - return true; -} - -bool token_restriction::EQ::is_satisfied_by(const schema& schema, - const partition_key& key, - const clustering_key_prefix& ckey, - const row& cells, - const query_options& options, - gc_clock::time_point now) const { - bool satisfied = false; - auto cdef = _column_definitions.begin(); - for (auto&& operand : values(options)) { - if (operand) { - auto cell_value = do_get_value(schema, **cdef, key, ckey, cells, now); - satisfied = cell_value && (*cdef)->type->compare(*operand, *cell_value) == 0; - } - if (!satisfied) { - break; - } - } - return satisfied; -} - -bool token_restriction::slice::is_satisfied_by(const schema& schema, - const partition_key& key, - const clustering_key_prefix& ckey, - const row& cells, - const query_options& options, - gc_clock::time_point now) const { - bool satisfied = false; - auto range = to_range(_slice, options); - for (auto* cdef : _column_definitions) { - auto cell_value = do_get_value(schema, *cdef, key, ckey, cells, now); - if (!cell_value) { - return false; - } - satisfied = range.contains(*cell_value, cdef->type->as_tri_comparator()); - if (!satisfied) { - break; - } - } - return satisfied; -} - -} -} diff --git a/scylla/cql3/restrictions/statement_restrictions.hh b/scylla/cql3/restrictions/statement_restrictions.hh deleted file mode 100644 index 9afacf4..0000000 --- a/scylla/cql3/restrictions/statement_restrictions.hh +++ /dev/null @@ -1,407 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include -#include "to_string.hh" -#include "schema.hh" -#include "cql3/restrictions/restrictions.hh" -#include "cql3/restrictions/primary_key_restrictions.hh" -#include "cql3/restrictions/single_column_restrictions.hh" -#include "cql3/relation.hh" -#include "cql3/variable_specifications.hh" -#include "cql3/statements/statement_type.hh" - -namespace cql3 { - -namespace restrictions { - - -/** - * The restrictions corresponding to the relations specified on the where-clause of CQL query. - */ -class statement_restrictions { -private: - schema_ptr _schema; - - template - class initial_key_restrictions; - - template - static ::shared_ptr> get_initial_key_restrictions(); - - /** - * Restrictions on partitioning columns - */ - ::shared_ptr> _partition_key_restrictions; - - /** - * Restrictions on clustering columns - */ - ::shared_ptr> _clustering_columns_restrictions; - - /** - * Restriction on non-primary key columns (i.e. secondary index restrictions) - */ - ::shared_ptr _nonprimary_key_restrictions; - - std::unordered_set _not_null_columns; - - /** - * The restrictions used to build the index expressions - */ - std::vector<::shared_ptr> _index_restrictions; - - /** - * true if the secondary index need to be queried, false otherwise - */ - bool _uses_secondary_indexing = false; - - /** - * Specify if the query will return a range of partition keys. - */ - bool _is_key_range = false; - -public: - /** - * Creates a new empty StatementRestrictions. - * - * @param cfm the column family meta data - * @return a new empty StatementRestrictions. - */ - statement_restrictions(schema_ptr schema); - - statement_restrictions(database& db, - schema_ptr schema, - statements::statement_type type, - const std::vector<::shared_ptr>& where_clause, - ::shared_ptr bound_names, - bool selects_only_static_columns, - bool select_a_collection, - bool for_view = false); -private: - void add_restriction(::shared_ptr restriction); - void add_single_column_restriction(::shared_ptr restriction); -public: - bool uses_function(const sstring& ks_name, const sstring& function_name) const; - - /** - * Checks if the restrictions on the partition key is an IN restriction. - * - * @return true the restrictions on the partition key is an IN restriction, false - * otherwise. - */ - bool key_is_in_relation() const { - return _partition_key_restrictions->is_IN(); - } - - /** - * Checks if the query request a range of partition keys. - * - * @return true if the query request a range of partition keys, false otherwise. - */ - bool is_key_range() const { - return _is_key_range; - } - - /** - * Checks if the secondary index need to be queried. - * - * @return true if the secondary index need to be queried, false otherwise. - */ - bool uses_secondary_indexing() const { - return _uses_secondary_indexing; - } - - ::shared_ptr> get_partition_key_restrictions() const { - return _partition_key_restrictions; - } - - ::shared_ptr> get_clustering_columns_restrictions() const { - return _clustering_columns_restrictions; - } - - /** - * Checks if the partition key has some unrestricted components. - * @return true if the partition key has some unrestricted components, false otherwise. - */ - bool has_partition_key_unrestricted_components() const; - - /** - * Checks if the clustering key has some unrestricted components. - * @return true if the clustering key has some unrestricted components, false otherwise. - */ - bool has_unrestricted_clustering_columns() const; -private: - void process_partition_key_restrictions(bool has_queriable_index, bool for_view); - - /** - * Returns the partition key components that are not restricted. - * @return the partition key components that are not restricted. - */ - std::vector<::shared_ptr> get_partition_key_unrestricted_components() const; - - /** - * Processes the clustering column restrictions. - * - * @param has_queriable_index true if some of the queried data are indexed, false otherwise - * @param select_a_collection true if the query should return a collection column - * @throws InvalidRequestException if the request is invalid - */ - void process_clustering_columns_restrictions(bool has_queriable_index, bool select_a_collection, bool for_view); - - /** - * Returns the Restrictions for the specified type of columns. - * - * @param kind the column type - * @return the restrictions for the specified type of columns - */ - ::shared_ptr get_restrictions(column_kind kind) const { - switch (kind) { - case column_kind::partition_key: return _partition_key_restrictions; - case column_kind::clustering_key: return _clustering_columns_restrictions; - default: return _nonprimary_key_restrictions; - } - } - -#if 0 - std::vector<::shared_ptr> get_index_expressions(const query_options& options) { - if (!_uses_secondary_indexing || _index_restrictions.empty()) { - return {}; - } - - std::vector<::shared_ptr> expressions; - for (auto&& restrictions : _index_restrictions) { - restrictions->add_index_expression_to(expressions, options); - } - - return expressions; - } -#endif - -#if 0 - /** - * Returns the partition keys for which the data is requested. - * - * @param options the query options - * @return the partition keys for which the data is requested. - * @throws InvalidRequestException if the partition keys cannot be retrieved - */ - std::vector get_partition_keys(const query_options& options) const { - return _partition_key_restrictions->values(options); - } -#endif - -public: - /** - * Returns the specified range of the partition key. - * - * @param b the boundary type - * @param options the query options - * @return the specified bound of the partition key - * @throws InvalidRequestException if the boundary cannot be retrieved - */ - dht::partition_range_vector get_partition_key_ranges(const query_options& options) const; - -#if 0 - /** - * Returns the partition key bounds. - * - * @param options the query options - * @return the partition key bounds - * @throws InvalidRequestException if the query is invalid - */ - AbstractBounds get_partition_key_bounds(const query_options& options) { - auto p = global_partitioner(); - - if (_partition_key_restrictions->is_on_token()) { - return get_partition_key_bounds_for_token_restrictions(p, options); - } - - return get_partition_key_bounds(p, options); - } - -private: - private AbstractBounds get_partition_key_bounds(IPartitioner p, - const query_options& options) throws InvalidRequestException - { - ByteBuffer startKeyBytes = get_partition_key_bound(Bound.START, options); - ByteBuffer finishKeyBytes = get_partition_key_bound(Bound.END, options); - - RowPosition startKey = RowPosition.ForKey.get(startKeyBytes, p); - RowPosition finishKey = RowPosition.ForKey.get(finishKeyBytes, p); - - if (startKey.compareTo(finishKey) > 0 && !finishKey.isMinimum()) - return null; - - if (_partition_key_restrictions->isInclusive(Bound.START)) - { - return _partition_key_restrictions->isInclusive(Bound.END) - ? new Bounds<>(startKey, finishKey) - : new IncludingExcludingBounds<>(startKey, finishKey); - } - - return _partition_key_restrictions->isInclusive(Bound.END) - ? new Range<>(startKey, finishKey) - : new ExcludingBounds<>(startKey, finishKey); - } - - private AbstractBounds get_partition_key_bounds_for_token_restriction(IPartitioner p, - const query_options& options) - throws InvalidRequestException - { - Token startToken = getTokenBound(Bound.START, options, p); - Token endToken = getTokenBound(Bound.END, options, p); - - bool includeStart = _partition_key_restrictions->isInclusive(Bound.START); - bool includeEnd = _partition_key_restrictions->isInclusive(Bound.END); - - /* - * If we ask SP.getRangeSlice() for (token(200), token(200)], it will happily return the whole ring. - * However, wrapping range doesn't really make sense for CQL, and we want to return an empty result in that - * case (CASSANDRA-5573). So special case to create a range that is guaranteed to be empty. - * - * In practice, we want to return an empty result set if either startToken > endToken, or both are equal but - * one of the bound is excluded (since [a, a] can contains something, but not (a, a], [a, a) or (a, a)). - * Note though that in the case where startToken or endToken is the minimum token, then this special case - * rule should not apply. - */ - int cmp = startToken.compareTo(endToken); - if (!startToken.isMinimum() && !endToken.isMinimum() - && (cmp > 0 || (cmp == 0 && (!includeStart || !includeEnd)))) - return null; - - RowPosition start = includeStart ? startToken.minKeyBound() : startToken.maxKeyBound(); - RowPosition end = includeEnd ? endToken.maxKeyBound() : endToken.minKeyBound(); - - return new Range<>(start, end); - } - - private Token getTokenBound(Bound b, const query_options& options, IPartitioner p) throws InvalidRequestException - { - if (!_partition_key_restrictions->hasBound(b)) - return p.getMinimumToken(); - - ByteBuffer value = _partition_key_restrictions->bounds(b, options).get(0); - checkNotNull(value, "Invalid null token value"); - return p.getTokenFactory().fromByteArray(value); - } -#endif - -public: - /** - * Checks if the query does not contains any restriction on the clustering columns. - * - * @return true if the query does not contains any restriction on the clustering columns, - * false otherwise. - */ - bool has_no_clustering_columns_restriction() const { - return _clustering_columns_restrictions->empty(); - } - -#if 0 - // For non-composite slices, we don't support internally the difference between exclusive and - // inclusive bounds, so we deal with it manually. - bool is_non_composite_slice_with_exclusive_bounds() - { - return !cfm.comparator.isCompound() - && _clustering_columns_restrictions->isSlice() - && (!_clustering_columns_restrictions->isInclusive(Bound.START) || !_clustering_columns_restrictions->isInclusive(Bound.END)); - } - - /** - * Returns the requested clustering columns as Composites. - * - * @param options the query options - * @return the requested clustering columns as Composites - * @throws InvalidRequestException if the query is not valid - */ - public List getClusteringColumnsAsComposites(QueryOptions options) throws InvalidRequestException - { - return clusteringColumnsRestrictions.valuesAsComposites(options); - } -#endif - -public: - std::vector get_clustering_bounds(const query_options& options) const; - - /** - * Checks if the query need to use filtering. - * @return true if the query need to use filtering, false otherwise. - */ - bool need_filtering(); - - void validate_secondary_index_selections(bool selects_only_static_columns); - - /** - * Checks if the query has some restrictions on the clustering columns. - * - * @return true if the query has some restrictions on the clustering columns, - * false otherwise. - */ - bool has_clustering_columns_restriction() const { - return !_clustering_columns_restrictions->empty(); - } - - /** - * @return true if column is restricted by some restriction, false otherwise - */ - bool is_restricted(const column_definition* cdef) const { - if (_not_null_columns.find(cdef) != _not_null_columns.end()) { - return true; - } - - auto&& restricted = get_restrictions(cdef->kind).get()->get_column_defs(); - return std::find(restricted.begin(), restricted.end(), cdef) != restricted.end(); - } - - /** - * @return the non-primary key restrictions. - */ - const single_column_restrictions::restrictions_map& get_non_pk_restriction() const { - return _nonprimary_key_restrictions->restrictions(); - } -}; - -} - -} diff --git a/scylla/cql3/restrictions/term_slice.hh b/scylla/cql3/restrictions/term_slice.hh deleted file mode 100644 index 1db71e2..0000000 --- a/scylla/cql3/restrictions/term_slice.hh +++ /dev/null @@ -1,172 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "cql3/restrictions/abstract_restriction.hh" -#include "cql3/term.hh" -#include "core/shared_ptr.hh" -#include "to_string.hh" -#include "exceptions/exceptions.hh" - -namespace cql3 { - -namespace restrictions { - -class term_slice final { -private: - struct bound { - bool inclusive; - ::shared_ptr t; - }; - bound _bounds[2]; -private: - term_slice(::shared_ptr start, bool include_start, ::shared_ptr end, bool include_end) - : _bounds{{include_start, std::move(start)}, {include_end, std::move(end)}} - { } -public: - static term_slice new_instance(statements::bound bound, bool include, ::shared_ptr term) { - if (is_start(bound)) { - return term_slice(std::move(term), include, {}, false); - } else { - return term_slice({}, false, std::move(term), include); - } - } - - /** - * Returns the boundary value. - * - * @param bound the boundary type - * @return the boundary value - */ - ::shared_ptr bound(statements::bound b) const { - return _bounds[get_idx(b)].t; - } - - /** - * Checks if this slice has a boundary for the specified type. - * - * @param b the boundary type - * @return true if this slice has a boundary for the specified type, false otherwise. - */ - bool has_bound(statements::bound b) const { - return bool(_bounds[get_idx(b)].t); - } - - /** - * Checks if this slice boundary is inclusive for the specified type. - * - * @param b the boundary type - * @return true if this slice boundary is inclusive for the specified type, - * false otherwise. - */ - bool is_inclusive(statements::bound b) const { - return !_bounds[get_idx(b)].t || _bounds[get_idx(b)].inclusive; - } - - /** - * Merges this slice with the specified one. - * - * @param other the slice to merge with - */ - void merge(const term_slice& other) { - if (has_bound(statements::bound::START)) { - assert(!other.has_bound(statements::bound::START)); - _bounds[get_idx(statements::bound::END)] = other._bounds[get_idx(statements::bound::END)]; - } else { - assert(!other.has_bound(statements::bound::END)); - _bounds[get_idx(statements::bound::START)] = other._bounds[get_idx(statements::bound::START)]; - } - } - - sstring to_string() const { - static auto print_term = [] (::shared_ptr t) -> sstring { - return t ? t->to_string() : "null"; - }; - return sprint("(%s %s, %s %s)", - _bounds[0].inclusive ? ">=" : ">", print_term(_bounds[0].t), - _bounds[1].inclusive ? "<=" : "<", print_term(_bounds[1].t)); - } - - friend std::ostream& operator<<(std::ostream& out, const term_slice& slice) { - return out << slice.to_string(); - } - -#if 0 - /** - * Returns the index operator corresponding to the specified boundary. - * - * @param b the boundary type - * @return the index operator corresponding to the specified boundary - */ - public Operator getIndexOperator(statements::bound b) - { - if (b.isStart()) - return boundInclusive[get_idx(b)] ? Operator.GTE : Operator.GT; - - return boundInclusive[get_idx(b)] ? Operator.LTE : Operator.LT; - } - - /** - * Check if this TermSlice is supported by the specified index. - * - * @param index the Secondary index - * @return true this type of TermSlice is supported by the specified index, - * false otherwise. - */ - public bool isSupportedBy(SecondaryIndex index) - { - bool supported = false; - - if (has_bound(statements::bound::START)) - supported |= isInclusive(statements::bound::START) ? index.supportsOperator(Operator.GTE) - : index.supportsOperator(Operator.GT); - if (has_bound(statements::bound::END)) - supported |= isInclusive(statements::bound::END) ? index.supportsOperator(Operator.LTE) - : index.supportsOperator(Operator.LT); - - return supported; - } -#endif -}; - -} -} diff --git a/scylla/cql3/restrictions/token_restriction.hh b/scylla/cql3/restrictions/token_restriction.hh deleted file mode 100644 index 786847b..0000000 --- a/scylla/cql3/restrictions/token_restriction.hh +++ /dev/null @@ -1,267 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "restriction.hh" -#include "primary_key_restrictions.hh" -#include "exceptions/exceptions.hh" -#include "term_slice.hh" -#include "keys.hh" - -class column_definition; - -namespace cql3 { - -namespace restrictions { - -/** - * Restriction using the token function. - */ -class token_restriction: public primary_key_restrictions { -private: - /** - * The definition of the columns to which apply the token restriction. - */ - std::vector _column_definitions; -public: - token_restriction(std::vector c) - : _column_definitions(std::move(c)) { - } - - bool is_on_token() const override { - return true; - } - std::vector get_column_defs() const override { - return _column_definitions; - } - -#if 0 - bool has_supporting_index(::shared_ptr index_manager) const override { - return false; - } - - void add_index_expression_to(std::vector<::shared_ptr>& expressions, - const query_options& options) override { - throw exceptions::unsupported_operation_exception(); - } -#endif - - std::vector values_as_keys(const query_options& options) const override { - throw exceptions::unsupported_operation_exception(); - } - - std::vector bounds_ranges(const query_options& options) const override { - auto get_token_bound = [this, &options](statements::bound b) { - if (!has_bound(b)) { - return is_start(b) ? dht::minimum_token() : dht::maximum_token(); - } - auto buf= bounds(b, options).front(); - if (!buf) { - throw exceptions::invalid_request_exception("Invalid null token value"); - } - auto tk = dht::global_partitioner().from_bytes(*buf); - if (tk.is_minimum() && !is_start(b)) { - // The token was parsed as a minimum marker (token::kind::before_all_keys), but - // as it appears in the end bound position, it is actually the maximum marker - // (token::kind::after_all_keys). - return dht::maximum_token(); - } - return tk; - }; - - const auto start_token = get_token_bound(statements::bound::START); - const auto end_token = get_token_bound(statements::bound::END); - const auto include_start = this->is_inclusive(statements::bound::START); - const auto include_end = this->is_inclusive(statements::bound::END); - - /* - * If we ask SP.getRangeSlice() for (token(200), token(200)], it will happily return the whole ring. - * However, wrapping range doesn't really make sense for CQL, and we want to return an empty result in that - * case (CASSANDRA-5573). So special case to create a range that is guaranteed to be empty. - * - * In practice, we want to return an empty result set if either startToken > endToken, or both are equal but - * one of the bound is excluded (since [a, a] can contains something, but not (a, a], [a, a) or (a, a)). - */ - if (start_token > end_token - || (start_token == end_token - && (!include_start || !include_end))) { - return {}; - } - - typedef typename bounds_range_type::bound bound; - - auto start = bound(include_start - ? dht::ring_position::starting_at(start_token) - : dht::ring_position::ending_at(start_token)); - auto end = bound(include_end - ? dht::ring_position::ending_at(end_token) - : dht::ring_position::starting_at(end_token)); - - return { bounds_range_type(std::move(start), std::move(end)) }; - } - - class EQ; - class slice; -}; - - -class token_restriction::EQ final : public token_restriction { -private: - ::shared_ptr _value; -public: - EQ(std::vector column_defs, ::shared_ptr value) - : token_restriction(column_defs) - , _value(std::move(value)) - {} - - bool is_EQ() const { - return true; - } - - bool uses_function(const sstring& ks_name, const sstring& function_name) const override { - return abstract_restriction::term_uses_function(_value, ks_name, function_name); - } - - void merge_with(::shared_ptr) override { - throw exceptions::invalid_request_exception( - join(", ", get_column_defs()) - + " cannot be restricted by more than one relation if it includes an Equal"); - } - - std::vector values(const query_options& options) const override { - return { to_bytes_opt(_value->bind_and_get(options)) }; - } - - sstring to_string() const override { - return sprint("EQ(%s)", _value->to_string()); - } - - virtual bool is_satisfied_by(const schema& schema, - const partition_key& key, - const clustering_key_prefix& ckey, - const row& cells, - const query_options& options, - gc_clock::time_point now) const override; -}; - -class token_restriction::slice final : public token_restriction { -private: - term_slice _slice; -public: - slice(std::vector column_defs, statements::bound bound, bool inclusive, ::shared_ptr term) - : token_restriction(column_defs) - , _slice(term_slice::new_instance(bound, inclusive, std::move(term))) - {} - - bool is_slice() const override { - return true; - } - - bool has_bound(statements::bound b) const override { - return _slice.has_bound(b); - } - - std::vector values(const query_options& options) const override { - throw exceptions::unsupported_operation_exception(); - } - - std::vector bounds(statements::bound b, const query_options& options) const override { - return { to_bytes_opt(_slice.bound(b)->bind_and_get(options)) }; - } - - bool uses_function(const sstring& ks_name, - const sstring& function_name) const override { - return (_slice.has_bound(statements::bound::START) - && abstract_restriction::term_uses_function( - _slice.bound(statements::bound::START), ks_name, - function_name)) - || (_slice.has_bound(statements::bound::END) - && abstract_restriction::term_uses_function( - _slice.bound(statements::bound::END), - ks_name, function_name)); - } - bool is_inclusive(statements::bound b) const override { - return _slice.is_inclusive(b); - } - void merge_with(::shared_ptr restriction) override { - try { - if (!restriction->is_on_token()) { - throw exceptions::invalid_request_exception( - "Columns \"%s\" cannot be restricted by both a normal relation and a token relation"); - } - if (!restriction->is_slice()) { - throw exceptions::invalid_request_exception( - "Columns \"%s\" cannot be restricted by both an equality and an inequality relation"); - } - - auto* other_slice = static_cast(restriction.get()); - - if (has_bound(statements::bound::START) - && other_slice->has_bound(statements::bound::START)) { - throw exceptions::invalid_request_exception( - "More than one restriction was found for the start bound on %s"); - } - if (has_bound(statements::bound::END) - && other_slice->has_bound(statements::bound::END)) { - throw exceptions::invalid_request_exception( - "More than one restriction was found for the end bound on %s"); - } - _slice.merge(other_slice->_slice); - } catch (exceptions::invalid_request_exception & e) { - throw exceptions::invalid_request_exception( - sprint(e.what(), join(", ", get_column_defs()))); - } - } - sstring to_string() const override { - return sprint("SLICE%s", _slice); - } - - virtual bool is_satisfied_by(const schema& schema, - const partition_key& key, - const clustering_key_prefix& ckey, - const row& cells, - const query_options& options, - gc_clock::time_point now) const override; -}; - -} - -} diff --git a/scylla/cql3/result_set.cc b/scylla/cql3/result_set.cc deleted file mode 100644 index 5f12167..0000000 --- a/scylla/cql3/result_set.cc +++ /dev/null @@ -1,193 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "cql3/result_set.hh" - -namespace cql3 { - -metadata::metadata(std::vector<::shared_ptr> names_) - : _flags(flag_enum_set()) - , names(std::move(names_)) { - _column_count = names.size(); -} - -metadata::metadata(flag_enum_set flags, std::vector<::shared_ptr> names_, uint32_t column_count, - ::shared_ptr paging_state) - : _flags(flags) - , names(std::move(names_)) - , _column_count(column_count) - , _paging_state(std::move(paging_state)) -{ } - -// The maximum number of values that the ResultSet can hold. This can be bigger than columnCount due to CASSANDRA-4911 -uint32_t metadata::value_count() const { - return _flags.contains() ? _column_count : names.size(); -} - -void metadata::add_non_serialized_column(::shared_ptr name) { - // See comment above. Because columnCount doesn't account the newly added name, it - // won't be serialized. - names.emplace_back(std::move(name)); -} - -bool metadata::all_in_same_cf() const { - if (_flags.contains()) { - return false; - } - - return column_specification::all_in_same_table(names); -} - -void metadata::set_has_more_pages(::shared_ptr paging_state) { - if (!paging_state) { - return; - } - - _flags.set(); - _paging_state = std::move(paging_state); -} - -void metadata::set_skip_metadata() { - _flags.set(); -} - -metadata::flag_enum_set metadata::flags() const { - return _flags; -} - -uint32_t metadata::column_count() const { - return _column_count; -} - -::shared_ptr metadata::paging_state() const { - return _paging_state; -} - -const std::vector<::shared_ptr>& metadata::get_names() const { - return names; -} - -prepared_metadata::prepared_metadata(const std::vector<::shared_ptr>& names, - const std::vector& partition_key_bind_indices) - : _names{names} - , _partition_key_bind_indices{partition_key_bind_indices} -{ - if (!names.empty() && column_specification::all_in_same_table(_names)) { - _flags.set(); - } -} - -prepared_metadata::flag_enum_set prepared_metadata::flags() const { - return _flags; -} - -const std::vector<::shared_ptr>& prepared_metadata::names() const { - return _names; -} - -const std::vector& prepared_metadata::partition_key_bind_indices() const { - return _partition_key_bind_indices; -} - -result_set::result_set(std::vector<::shared_ptr> metadata_) - : _metadata(::make_shared(std::move(metadata_))) -{ } - -result_set::result_set(::shared_ptr metadata) - : _metadata(std::move(metadata)) -{ } - -size_t result_set::size() const { - return _rows.size(); -} - -bool result_set::empty() const { - return _rows.empty(); -} - -void result_set::add_row(std::vector row) { - assert(row.size() == _metadata->value_count()); - _rows.emplace_back(std::move(row)); -} - -void result_set::add_column_value(bytes_opt value) { - if (_rows.empty() || _rows.back().size() == _metadata->value_count()) { - std::vector row; - row.reserve(_metadata->value_count()); - _rows.emplace_back(std::move(row)); - } - - _rows.back().emplace_back(std::move(value)); -} - -void result_set::reverse() { - std::reverse(_rows.begin(), _rows.end()); -} - -void result_set::trim(size_t limit) { - if (_rows.size() > limit) { - _rows.resize(limit); - } -} - -metadata& result_set::get_metadata() { - return *_metadata; -} - -const metadata& result_set::get_metadata() const { - return *_metadata; -} - -const std::deque>& result_set::rows() const { - return _rows; -} - -shared_ptr -make_empty_metadata() { - static thread_local shared_ptr empty_metadata_cache = [] { - auto result = ::make_shared(std::vector<::shared_ptr>{}); - result->set_skip_metadata(); - return result; - }(); - return empty_metadata_cache; -} - -} diff --git a/scylla/cql3/result_set.hh b/scylla/cql3/result_set.hh deleted file mode 100644 index 11cc476..0000000 --- a/scylla/cql3/result_set.hh +++ /dev/null @@ -1,168 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include -#include -#include "enum_set.hh" -#include "service/pager/paging_state.hh" -#include "schema.hh" - -namespace cql3 { - -class metadata { -public: - enum class flag : uint8_t { - GLOBAL_TABLES_SPEC = 0, - HAS_MORE_PAGES = 1, - NO_METADATA = 2, - }; - - using flag_enum = super_enum; - - using flag_enum_set = enum_set; - -private: - flag_enum_set _flags; - -public: - // Please note that columnCount can actually be smaller than names, even if names is not null. This is - // used to include columns in the resultSet that we need to do post-query re-orderings - // (SelectStatement.orderResults) but that shouldn't be sent to the user as they haven't been requested - // (CASSANDRA-4911). So the serialization code will exclude any columns in name whose index is >= columnCount. - std::vector<::shared_ptr> names; - -private: - uint32_t _column_count; - ::shared_ptr _paging_state; - -public: - metadata(std::vector<::shared_ptr> names_); - - metadata(flag_enum_set flags, std::vector<::shared_ptr> names_, uint32_t column_count, - ::shared_ptr paging_state); - - // The maximum number of values that the ResultSet can hold. This can be bigger than columnCount due to CASSANDRA-4911 - uint32_t value_count() const; - - void add_non_serialized_column(::shared_ptr name); - -private: - bool all_in_same_cf() const; - -public: - void set_has_more_pages(::shared_ptr paging_state); - - void set_skip_metadata(); - - flag_enum_set flags() const; - - uint32_t column_count() const; - - ::shared_ptr paging_state() const; - - const std::vector<::shared_ptr>& get_names() const; -}; - -::shared_ptr make_empty_metadata(); - -class prepared_metadata { -public: - enum class flag : uint8_t { - GLOBAL_TABLES_SPEC = 0, - }; - - using flag_enum = super_enum; - - using flag_enum_set = enum_set; -private: - flag_enum_set _flags; - std::vector<::shared_ptr> _names; - std::vector _partition_key_bind_indices; -public: - prepared_metadata(const std::vector<::shared_ptr>& names, - const std::vector& partition_key_bind_indices); - - flag_enum_set flags() const; - const std::vector<::shared_ptr>& names() const; - const std::vector& partition_key_bind_indices() const; -}; - -class result_set { -public: - ::shared_ptr _metadata; - std::deque> _rows; -public: - result_set(std::vector<::shared_ptr> metadata_); - - result_set(::shared_ptr metadata); - - size_t size() const; - - bool empty() const; - - void add_row(std::vector row); - - void add_column_value(bytes_opt value); - - void reverse(); - - void trim(size_t limit); - - template - void sort(const RowComparator& cmp) { - std::sort(_rows.begin(), _rows.end(), std::ref(cmp)); - } - - metadata& get_metadata(); - - const metadata& get_metadata() const; - - // Returns a range of rows. A row is a range of bytes_opt. - const std::deque>& rows() const; -}; - -} diff --git a/scylla/cql3/selection/abstract_function_selector.cc b/scylla/cql3/selection/abstract_function_selector.cc deleted file mode 100644 index dd3b2e7..0000000 --- a/scylla/cql3/selection/abstract_function_selector.cc +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "abstract_function_selector.hh" -#include "aggregate_function_selector.hh" -#include "scalar_function_selector.hh" -#include "to_string.hh" - -namespace cql3 { - -namespace selection { - -shared_ptr -abstract_function_selector::new_factory(shared_ptr fun, shared_ptr factories) { - if (fun->is_aggregate()) { - if (factories->does_aggregation()) { - throw exceptions::invalid_request_exception("aggregate functions cannot be used as arguments of aggregate functions"); - } - } else { - if (factories->does_aggregation() && !factories->contains_only_aggregate_functions()) { - throw exceptions::invalid_request_exception(sprint("the %s function arguments must be either all aggregates or all none aggregates", - fun->name())); - } - } - - struct fun_selector_factory : public factory { - shared_ptr _fun; - shared_ptr _factories; - - fun_selector_factory(shared_ptr fun, - shared_ptr factories) - : _fun(std::move(fun)), _factories(std::move(factories)) { - } - - virtual sstring column_name() override { - return sprint("%s(%s)", _fun->name(), join(", ", _factories->get_column_names())); - } - - virtual data_type get_return_type() override { - return _fun->return_type(); - } - - virtual bool uses_function(const sstring& ks_name, const sstring& function_name) override { - return _fun->uses_function(ks_name, function_name); - } - - virtual shared_ptr new_instance() override { - using ret_type = shared_ptr; - return _fun->is_aggregate() ? ret_type(::make_shared(_fun, _factories->new_instances())) - : ret_type(::make_shared(_fun, _factories->new_instances())); - } - - virtual bool is_write_time_selector_factory() override { - return _factories->contains_write_time_selector_factory(); - } - - virtual bool is_ttl_selector_factory() override { - return _factories->contains_ttl_selector_factory(); - } - - virtual bool is_aggregate_selector_factory() override { - return _fun->is_aggregate() || _factories->contains_only_aggregate_functions(); - } - }; - - return make_shared(std::move(fun), std::move(factories)); -} - -} - -} diff --git a/scylla/cql3/selection/abstract_function_selector.hh b/scylla/cql3/selection/abstract_function_selector.hh deleted file mode 100644 index 97a7b93..0000000 --- a/scylla/cql3/selection/abstract_function_selector.hh +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "selector.hh" -#include "selector_factories.hh" -#include "cql3/functions/function.hh" - -namespace cql3 { -namespace selection { - -class abstract_function_selector : public selector { -protected: - shared_ptr _fun; - - /** - * The list used to pass the function arguments is recycled to avoid the cost of instantiating a new list - * with each function call. - */ - std::vector _args; - std::vector> _arg_selectors; -public: - static shared_ptr new_factory(shared_ptr fun, shared_ptr factories); - - abstract_function_selector(shared_ptr fun, std::vector> arg_selectors) - : _fun(std::move(fun)), _arg_selectors(std::move(arg_selectors)) { - _args.resize(_arg_selectors.size()); - } - - virtual data_type get_type() override { - return _fun->return_type(); - } - -#if 0 - @Override - public String toString() - { - return new StrBuilder().append(fun.name()) - .append("(") - .appendWithSeparators(argSelectors, ", ") - .append(")") - .toString(); - } -#endif -}; - -template -class abstract_function_selector_for : public abstract_function_selector { - shared_ptr _tfun; // We can't use static_pointer_cast due to virtual inheritance, - // so store it locally to amortize cost of dynamic_pointer_cast -protected: - shared_ptr fun() { return _tfun; } -public: - abstract_function_selector_for(shared_ptr fun, std::vector> arg_selectors) - : abstract_function_selector(fun, std::move(arg_selectors)) - , _tfun(dynamic_pointer_cast(fun)) { - } -}; - -} -} diff --git a/scylla/cql3/selection/aggregate_function_selector.hh b/scylla/cql3/selection/aggregate_function_selector.hh deleted file mode 100644 index 28743b4..0000000 --- a/scylla/cql3/selection/aggregate_function_selector.hh +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "abstract_function_selector.hh" -#include "cql3/functions/aggregate_function.hh" - -#pragma once - -namespace cql3 { - -namespace selection { - -class aggregate_function_selector : public abstract_function_selector_for { - std::unique_ptr _aggregate; -public: - virtual bool is_aggregate() override { - return true; - } - - virtual void add_input(cql_serialization_format sf, result_set_builder& rs) override { - // Aggregation of aggregation is not supported - size_t m = _arg_selectors.size(); - for (size_t i = 0; i < m; ++i) { - auto&& s = _arg_selectors[i]; - s->add_input(sf, rs); - _args[i] = s->get_output(sf); - s->reset(); - } - _aggregate->add_input(sf, _args); - } - - virtual bytes_opt get_output(cql_serialization_format sf) override { - return _aggregate->compute(sf); - } - - virtual void reset() override { - _aggregate->reset(); - } - - aggregate_function_selector(shared_ptr func, - std::vector> arg_selectors) - : abstract_function_selector_for( - dynamic_pointer_cast(func), std::move(arg_selectors)) - , _aggregate(fun()->new_aggregate()) { - } - - virtual sstring assignment_testable_source_context() const override { - // FIXME: - return "FIXME"; - } -}; - -} -} diff --git a/scylla/cql3/selection/field_selector.hh b/scylla/cql3/selection/field_selector.hh deleted file mode 100644 index 34e58bb..0000000 --- a/scylla/cql3/selection/field_selector.hh +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "selector.hh" -#include "types.hh" - -namespace cql3 { - -namespace selection { - -class field_selector : public selector { - user_type _type; - size_t _field; - shared_ptr _selected; -public: - static shared_ptr new_factory(user_type type, size_t field, shared_ptr factory) { - struct field_selector_factory : selector::factory { - user_type _type; - size_t _field; - shared_ptr _factory; - - field_selector_factory(user_type type, size_t field, shared_ptr factory) - : _type(std::move(type)), _field(field), _factory(std::move(factory)) { - } - - virtual sstring column_name() override { - auto&& name = _type->field_name(_field); - auto sname = sstring(reinterpret_cast(name.begin()), name.size()); - return sprint("%s.%s", _factory->column_name(), sname); - } - - virtual data_type get_return_type() override { - return _type->field_type(_field); - } - - shared_ptr new_instance() override { - return make_shared(_type, _field, _factory->new_instance()); - } - - bool is_aggregate_selector_factory() override { - return _factory->is_aggregate_selector_factory(); - } - }; - return make_shared(std::move(type), field, std::move(factory)); - } - - virtual bool is_aggregate() override { - return false; - } - - virtual void add_input(cql_serialization_format sf, result_set_builder& rs) override { - _selected->add_input(sf, rs); - } - - virtual bytes_opt get_output(cql_serialization_format sf) override { - auto&& value = _selected->get_output(sf); - if (!value) { - return std::experimental::nullopt; - } - auto&& buffers = _type->split(*value); - bytes_opt ret; - if (_field < buffers.size() && buffers[_field]) { - ret = to_bytes(*buffers[_field]); - } - return ret; - } - - virtual data_type get_type() override { - return _type->field_type(_field); - } - - virtual void reset() { - _selected->reset(); - } - - virtual sstring assignment_testable_source_context() const override { - auto&& name = _type->field_name(_field); - auto sname = sstring(reinterpret_cast(name.begin(), name.size())); - return sprint("%s.%s", _selected, sname); - } - - field_selector(user_type type, size_t field, shared_ptr selected) - : _type(std::move(type)), _field(field), _selected(std::move(selected)) { - } -}; - -} -} diff --git a/scylla/cql3/selection/raw_selector.hh b/scylla/cql3/selection/raw_selector.hh deleted file mode 100644 index 0ab0ec0..0000000 --- a/scylla/cql3/selection/raw_selector.hh +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "cql3/selection/selectable.hh" -#include "cql3/column_identifier.hh" - -namespace cql3 { - -namespace selection { - -class raw_selector { -public: - const ::shared_ptr selectable_; - const ::shared_ptr alias; - - raw_selector(shared_ptr selectable__, shared_ptr alias_) - : selectable_{selectable__} - , alias{alias_} - { } - - /** - * Converts the specified list of RawSelectors into a list of Selectables. - * - * @param raws the RawSelectors to converts. - * @return a list of Selectables - */ - static std::vector<::shared_ptr> to_selectables(const std::vector<::shared_ptr>& raws, - schema_ptr schema) { - std::vector<::shared_ptr> r; - r.reserve(raws.size()); - for (auto&& raw : raws) { - r.emplace_back(raw->selectable_->prepare(schema)); - } - return r; - } - - bool processes_selection() const { - return selectable_->processes_selection(); - } -}; - -} - -} diff --git a/scylla/cql3/selection/scalar_function_selector.hh b/scylla/cql3/selection/scalar_function_selector.hh deleted file mode 100644 index 3fc48ab..0000000 --- a/scylla/cql3/selection/scalar_function_selector.hh +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "abstract_function_selector.hh" -#include "cql3/functions/scalar_function.hh" - -namespace cql3 { - -namespace selection { - -class scalar_function_selector : public abstract_function_selector_for { -public: - virtual bool is_aggregate() override { - // We cannot just return true as it is possible to have a scalar function wrapping an aggregation function - if (_arg_selectors.empty()) { - return false; - } - - return _arg_selectors[0]->is_aggregate(); - } - - virtual void add_input(cql_serialization_format sf, result_set_builder& rs) override { - size_t m = _arg_selectors.size(); - for (size_t i = 0; i < m; ++i) { - auto&& s = _arg_selectors[i]; - s->add_input(sf, rs); - } - } - - virtual void reset() override { - } - - virtual bytes_opt get_output(cql_serialization_format sf) override { - size_t m = _arg_selectors.size(); - for (size_t i = 0; i < m; ++i) { - auto&& s = _arg_selectors[i]; - _args[i] = s->get_output(sf); - s->reset(); - } - return fun()->execute(sf, _args); - } - - scalar_function_selector(shared_ptr fun, std::vector> arg_selectors) - : abstract_function_selector_for( - dynamic_pointer_cast(std::move(fun)), std::move(arg_selectors)) { - } - - virtual sstring assignment_testable_source_context() const override { - // FIXME: - return "FIXME"; - } - -}; - -} -} diff --git a/scylla/cql3/selection/selectable.cc b/scylla/cql3/selection/selectable.cc deleted file mode 100644 index 7236b67..0000000 --- a/scylla/cql3/selection/selectable.cc +++ /dev/null @@ -1,150 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "selectable.hh" -#include "selectable_with_field_selection.hh" -#include "field_selector.hh" -#include "writetime_or_ttl.hh" -#include "selector_factories.hh" -#include "cql3/functions/functions.hh" -#include "abstract_function_selector.hh" -#include "writetime_or_ttl_selector.hh" - -namespace cql3 { - -namespace selection { - -shared_ptr -selectable::writetime_or_ttl::new_selector_factory(database& db, schema_ptr s, std::vector& defs) { - auto&& def = s->get_column_definition(_id->name()); - if (!def) { - throw exceptions::invalid_request_exception(sprint("Undefined name %s in selection clause", _id)); - } - if (def->is_primary_key()) { - throw exceptions::invalid_request_exception( - sprint("Cannot use selection function %s on PRIMARY KEY part %s", - _is_writetime ? "writeTime" : "ttl", - def->name())); - } - if (def->type->is_collection()) { - throw exceptions::invalid_request_exception(sprint("Cannot use selection function %s on collections", - _is_writetime ? "writeTime" : "ttl")); - } - - return writetime_or_ttl_selector::new_factory(def->name_as_text(), add_and_get_index(*def, defs), _is_writetime); -} - -sstring -selectable::writetime_or_ttl::to_string() const { - return sprint("%s(%s)", _is_writetime ? "writetime" : "ttl", _id->to_string()); -} - -shared_ptr -selectable::writetime_or_ttl::raw::prepare(schema_ptr s) { - return make_shared(_id->prepare_column_identifier(s), _is_writetime); -} - -bool -selectable::writetime_or_ttl::raw::processes_selection() const { - return true; -} - -shared_ptr -selectable::with_function::new_selector_factory(database& db, schema_ptr s, std::vector& defs) { - auto&& factories = selector_factories::create_factories_and_collect_column_definitions(_args, db, s, defs); - - // resolve built-in functions before user defined functions - auto&& fun = functions::functions::get(db, s->ks_name(), _function_name, factories->new_instances(), s->ks_name(), s->cf_name()); - if (!fun) { - throw exceptions::invalid_request_exception(sprint("Unknown function '%s'", _function_name)); - } - if (!fun->return_type()) { - throw exceptions::invalid_request_exception(sprint("Unknown function %s called in selection clause", _function_name)); - } - - return abstract_function_selector::new_factory(std::move(fun), std::move(factories)); -} - -sstring -selectable::with_function::to_string() const { - return sprint("%s(%s)", _function_name.name, join(", ", _args)); -} - -shared_ptr -selectable::with_function::raw::prepare(schema_ptr s) { - std::vector> prepared_args; - prepared_args.reserve(_args.size()); - for (auto&& arg : _args) { - prepared_args.push_back(arg->prepare(s)); - } - return ::make_shared(_function_name, std::move(prepared_args)); - } - -bool -selectable::with_function::raw::processes_selection() const { - return true; -} - -shared_ptr -selectable::with_field_selection::new_selector_factory(database& db, schema_ptr s, std::vector& defs) { - auto&& factory = _selected->new_selector_factory(db, s, defs); - auto&& type = factory->new_instance()->get_type(); - auto&& ut = dynamic_pointer_cast(std::move(type)); - if (!ut) { - throw exceptions::invalid_request_exception( - sprint("Invalid field selection: %s of type %s is not a user type", - _selected->to_string(), factory->new_instance()->get_type()->as_cql3_type())); - } - for (size_t i = 0; i < ut->size(); ++i) { - if (ut->field_name(i) != _field->bytes_) { - continue; - } - return field_selector::new_factory(std::move(ut), i, std::move(factory)); - } - throw exceptions::invalid_request_exception(sprint("%s of type %s has no field %s", - _selected->to_string(), ut->as_cql3_type(), _field)); -} - -sstring -selectable::with_field_selection::to_string() const { - return sprint("%s.%s", _selected->to_string(), _field->to_string()); -} - -shared_ptr -selectable::with_field_selection::raw::prepare(schema_ptr s) { - // static_pointer_cast<> needed due to lack of covariant return type - // support with smart pointers - return make_shared(_selected->prepare(s), - static_pointer_cast(_field->prepare(s))); -} - -bool -selectable::with_field_selection::raw::processes_selection() const { - return true; -} - -std::ostream & operator<<(std::ostream &os, const selectable& s) { - return os << s.to_string(); -} - -} - -} diff --git a/scylla/cql3/selection/selectable.hh b/scylla/cql3/selection/selectable.hh deleted file mode 100644 index 94097a9..0000000 --- a/scylla/cql3/selection/selectable.hh +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "schema.hh" -#include "core/shared_ptr.hh" -#include "cql3/selection/selector.hh" -#include "cql3/functions/function_name.hh" - -namespace cql3 { - -namespace selection { - -class selectable { -public: - virtual ~selectable() {} - virtual ::shared_ptr new_selector_factory(database& db, schema_ptr schema, std::vector& defs) = 0; - virtual sstring to_string() const = 0; -protected: - static size_t add_and_get_index(const column_definition& def, std::vector& defs) { - auto i = std::find(defs.begin(), defs.end(), &def); - if (i != defs.end()) { - return std::distance(defs.begin(), i); - } - defs.push_back(&def); - return defs.size() - 1; - } -public: - class raw { - public: - virtual ~raw() {} - - virtual ::shared_ptr prepare(schema_ptr s) = 0; - - /** - * Returns true if any processing is performed on the selected column. - **/ - virtual bool processes_selection() const = 0; - }; - - class writetime_or_ttl; - - class with_function; - - class with_field_selection; -}; - -std::ostream & operator<<(std::ostream &os, const selectable& s); - -class selectable::with_function : public selectable { - functions::function_name _function_name; - std::vector> _args; -public: - with_function(functions::function_name fname, std::vector> args) - : _function_name(std::move(fname)), _args(std::move(args)) { - } - - virtual sstring to_string() const override; - - virtual shared_ptr new_selector_factory(database& db, schema_ptr s, std::vector& defs) override; - class raw : public selectable::raw { - functions::function_name _function_name; - std::vector> _args; - public: - raw(functions::function_name function_name, std::vector> args) - : _function_name(std::move(function_name)), _args(std::move(args)) { - } - virtual shared_ptr prepare(schema_ptr s) override; - virtual bool processes_selection() const override; - }; -}; - -} - -} diff --git a/scylla/cql3/selection/selectable_with_field_selection.hh b/scylla/cql3/selection/selectable_with_field_selection.hh deleted file mode 100644 index 551444c..0000000 --- a/scylla/cql3/selection/selectable_with_field_selection.hh +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - - -#pragma once - -#include "selectable.hh" -#include "cql3/column_identifier.hh" - -namespace cql3 { - -namespace selection { - -class selectable::with_field_selection : public selectable { -public: - shared_ptr _selected; - shared_ptr _field; -public: - with_field_selection(shared_ptr selected, shared_ptr field) - : _selected(std::move(selected)), _field(std::move(field)) { - } - - virtual sstring to_string() const override; - - virtual shared_ptr new_selector_factory(database& db, schema_ptr s, std::vector& defs) override; - - class raw : public selectable::raw { - shared_ptr _selected; - shared_ptr _field; - public: - raw(shared_ptr selected, shared_ptr field) - : _selected(std::move(selected)), _field(std::move(field)) { - } - virtual shared_ptr prepare(schema_ptr s) override; - virtual bool processes_selection() const override; - }; -}; - -} - -} diff --git a/scylla/cql3/selection/selection.cc b/scylla/cql3/selection/selection.cc deleted file mode 100644 index 2a8487a..0000000 --- a/scylla/cql3/selection/selection.cc +++ /dev/null @@ -1,434 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include - -#include "cql3/selection/selection.hh" -#include "cql3/selection/selector_factories.hh" -#include "cql3/result_set.hh" - -namespace cql3 { - -namespace selection { - -selection::selection(schema_ptr schema, - std::vector columns, - std::vector<::shared_ptr> metadata_, - bool collect_timestamps, - bool collect_TTLs) - : _schema(std::move(schema)) - , _columns(std::move(columns)) - , _metadata(::make_shared(std::move(metadata_))) - , _collect_timestamps(collect_timestamps) - , _collect_TTLs(collect_TTLs) - , _contains_static_columns(std::any_of(_columns.begin(), _columns.end(), std::mem_fn(&column_definition::is_static))) -{ } - -query::partition_slice::option_set selection::get_query_options() { - query::partition_slice::option_set opts; - - opts.set_if(_collect_timestamps); - opts.set_if(_collect_TTLs); - - opts.set_if( - std::any_of(_columns.begin(), _columns.end(), - std::mem_fn(&column_definition::is_partition_key))); - - opts.set_if( - std::any_of(_columns.begin(), _columns.end(), - std::mem_fn(&column_definition::is_clustering_key))); - - return opts; -} - -// Special cased selection for when no function is used (this save some allocations). -class simple_selection : public selection { -private: - const bool _is_wildcard; -public: - static ::shared_ptr make(schema_ptr schema, std::vector columns, bool is_wildcard) { - std::vector<::shared_ptr> metadata; - metadata.reserve(columns.size()); - for (auto&& col : columns) { - metadata.emplace_back(col->column_specification); - } - return ::make_shared(schema, std::move(columns), std::move(metadata), is_wildcard); - } - - /* - * In theory, even a simple selection could have multiple time the same column, so we - * could filter those duplicate out of columns. But since we're very unlikely to - * get much duplicate in practice, it's more efficient not to bother. - */ - simple_selection(schema_ptr schema, std::vector columns, - std::vector<::shared_ptr> metadata, bool is_wildcard) - : selection(schema, std::move(columns), std::move(metadata), false, false) - , _is_wildcard(is_wildcard) - { } - - virtual bool is_wildcard() const override { return _is_wildcard; } - virtual bool is_aggregate() const override { return false; } -protected: - class simple_selectors : public selectors { - private: - std::vector _current; - public: - virtual void reset() override { - _current.clear(); - } - - virtual std::vector get_output_row(cql_serialization_format sf) override { - return std::move(_current); - } - - virtual void add_input_row(cql_serialization_format sf, result_set_builder& rs) override { - _current = std::move(*rs.current); - } - - virtual bool is_aggregate() { - return false; - } - }; - - std::unique_ptr new_selectors() const override { - return std::make_unique(); - } -}; - -class selection_with_processing : public selection { -private: - ::shared_ptr _factories; -public: - selection_with_processing(schema_ptr schema, std::vector columns, - std::vector<::shared_ptr> metadata, ::shared_ptr factories) - : selection(schema, std::move(columns), std::move(metadata), - factories->contains_write_time_selector_factory(), - factories->contains_ttl_selector_factory()) - , _factories(std::move(factories)) - { - if (_factories->does_aggregation() && !_factories->contains_only_aggregate_functions()) { - throw exceptions::invalid_request_exception("the select clause must either contains only aggregates or none"); - } - } - - virtual bool uses_function(const sstring& ks_name, const sstring& function_name) const override { - return _factories->uses_function(ks_name, function_name); - } - - virtual uint32_t add_column_for_ordering(const column_definition& c) override { - uint32_t index = selection::add_column_for_ordering(c); - _factories->add_selector_for_ordering(c, index); - return index; - } - - virtual bool is_aggregate() const override { - return _factories->contains_only_aggregate_functions(); - } -protected: - class selectors_with_processing : public selectors { - private: - ::shared_ptr _factories; - std::vector<::shared_ptr> _selectors; - public: - selectors_with_processing(::shared_ptr factories) - : _factories(std::move(factories)) - , _selectors(_factories->new_instances()) - { } - - virtual void reset() override { - for (auto&& s : _selectors) { - s->reset(); - } - } - - virtual bool is_aggregate() override { - return _factories->contains_only_aggregate_functions(); - } - - virtual std::vector get_output_row(cql_serialization_format sf) override { - std::vector output_row; - output_row.reserve(_selectors.size()); - for (auto&& s : _selectors) { - output_row.emplace_back(s->get_output(sf)); - } - return output_row; - } - - virtual void add_input_row(cql_serialization_format sf, result_set_builder& rs) { - for (auto&& s : _selectors) { - s->add_input(sf, rs); - } - } - }; - - std::unique_ptr new_selectors() const override { - return std::make_unique(_factories); - } -}; - -::shared_ptr selection::wildcard(schema_ptr schema) { - auto columns = schema->all_columns_in_select_order(); - auto cds = boost::copy_range>(columns | boost::adaptors::transformed([](const column_definition& c) { - return &c; - })); - return simple_selection::make(schema, std::move(cds), true); -} - -::shared_ptr selection::for_columns(schema_ptr schema, std::vector columns) { - return simple_selection::make(schema, std::move(columns), false); -} - -uint32_t selection::add_column_for_ordering(const column_definition& c) { - _columns.push_back(&c); - _metadata->add_non_serialized_column(c.column_specification); - return _columns.size() - 1; -} - -::shared_ptr selection::from_selectors(database& db, schema_ptr schema, const std::vector<::shared_ptr>& raw_selectors) { - std::vector defs; - - ::shared_ptr factories = - selector_factories::create_factories_and_collect_column_definitions( - raw_selector::to_selectables(raw_selectors, schema), db, schema, defs); - - auto metadata = collect_metadata(schema, raw_selectors, *factories); - if (processes_selection(raw_selectors) || raw_selectors.size() != defs.size()) { - return ::make_shared(schema, std::move(defs), std::move(metadata), std::move(factories)); - } else { - return ::make_shared(schema, std::move(defs), std::move(metadata), false); - } -} - -std::vector<::shared_ptr> -selection::collect_metadata(schema_ptr schema, const std::vector<::shared_ptr>& raw_selectors, - const selector_factories& factories) { - std::vector<::shared_ptr> r; - r.reserve(raw_selectors.size()); - auto i = raw_selectors.begin(); - for (auto&& factory : factories) { - ::shared_ptr col_spec = factory->get_column_specification(schema); - ::shared_ptr alias = (*i++)->alias; - r.push_back(alias ? col_spec->with_alias(alias) : col_spec); - } - return r; -} - -result_set_builder::result_set_builder(const selection& s, gc_clock::time_point now, cql_serialization_format sf) - : _result_set(std::make_unique(::make_shared(*(s.get_result_metadata())))) - , _selectors(s.new_selectors()) - , _now(now) - , _cql_serialization_format(sf) -{ - if (s._collect_timestamps) { - _timestamps.resize(s._columns.size(), 0); - } - if (s._collect_TTLs) { - _ttls.resize(s._columns.size(), 0); - } -} - -void result_set_builder::add_empty() { - current->emplace_back(); - if (!_timestamps.empty()) { - _timestamps[current->size() - 1] = api::missing_timestamp; - } - if (!_ttls.empty()) { - _ttls[current->size() - 1] = -1; - } -} - -void result_set_builder::add(bytes_opt value) { - current->emplace_back(std::move(value)); -} - -void result_set_builder::add(const column_definition& def, const query::result_atomic_cell_view& c) { - current->emplace_back(get_value(def.type, c)); - if (!_timestamps.empty()) { - _timestamps[current->size() - 1] = c.timestamp(); - } - if (!_ttls.empty()) { - gc_clock::duration ttl_left(-1); - expiry_opt e = c.expiry(); - if (e) { - ttl_left = *e - _now; - } - _ttls[current->size() - 1] = ttl_left.count(); - } -} - -void result_set_builder::add_collection(const column_definition& def, bytes_view c) { - current->emplace_back(to_bytes(c)); - // timestamps, ttls meaningless for collections -} - -void result_set_builder::new_row() { - if (current) { - _selectors->add_input_row(_cql_serialization_format, *this); - if (!_selectors->is_aggregate()) { - _result_set->add_row(_selectors->get_output_row(_cql_serialization_format)); - _selectors->reset(); - } - current->clear(); - } else { - // FIXME: we use optional<> here because we don't have an end_row() signal - // instead, !current means that new_row has never been called, so this - // call to new_row() does not end a previous row. - current.emplace(); - } -} - -std::unique_ptr result_set_builder::build() { - if (current) { - _selectors->add_input_row(_cql_serialization_format, *this); - _result_set->add_row(_selectors->get_output_row(_cql_serialization_format)); - _selectors->reset(); - current = std::experimental::nullopt; - } - if (_result_set->empty() && _selectors->is_aggregate()) { - _result_set->add_row(_selectors->get_output_row(_cql_serialization_format)); - } - return std::move(_result_set); -} - -result_set_builder::visitor::visitor( - cql3::selection::result_set_builder& builder, const schema& s, - const selection& selection) - : _builder(builder), _schema(s), _selection(selection), _row_count(0) { -} - -void result_set_builder::visitor::add_value(const column_definition& def, - query::result_row_view::iterator_type& i) { - if (def.type->is_multi_cell()) { - auto cell = i.next_collection_cell(); - if (!cell) { - _builder.add_empty(); - return; - } - _builder.add_collection(def, *cell); - } else { - auto cell = i.next_atomic_cell(); - if (!cell) { - _builder.add_empty(); - return; - } - _builder.add(def, *cell); - } -} - -void result_set_builder::visitor::accept_new_partition(const partition_key& key, - uint32_t row_count) { - _partition_key = key.explode(_schema); - _row_count = row_count; -} - -void result_set_builder::visitor::accept_new_partition(uint32_t row_count) { - _row_count = row_count; -} - -void result_set_builder::visitor::accept_new_row(const clustering_key& key, - const query::result_row_view& static_row, - const query::result_row_view& row) { - _clustering_key = key.explode(_schema); - accept_new_row(static_row, row); -} - -void result_set_builder::visitor::accept_new_row( - const query::result_row_view& static_row, - const query::result_row_view& row) { - auto static_row_iterator = static_row.iterator(); - auto row_iterator = row.iterator(); - _builder.new_row(); - for (auto&& def : _selection.get_columns()) { - switch (def->kind) { - case column_kind::partition_key: - _builder.add(_partition_key[def->component_index()]); - break; - case column_kind::clustering_key: - if (_clustering_key.size() > def->component_index()) { - _builder.add(_clustering_key[def->component_index()]); - } else { - _builder.add({}); - } - break; - case column_kind::regular_column: - add_value(*def, row_iterator); - break; - case column_kind::static_column: - add_value(*def, static_row_iterator); - break; - default: - assert(0); - } - } -} - -void result_set_builder::visitor::accept_partition_end( - const query::result_row_view& static_row) { - if (_row_count == 0) { - _builder.new_row(); - auto static_row_iterator = static_row.iterator(); - for (auto&& def : _selection.get_columns()) { - if (def->is_partition_key()) { - _builder.add(_partition_key[def->component_index()]); - } else if (def->is_static()) { - add_value(*def, static_row_iterator); - } else { - _builder.add_empty(); - } - } - } -} - -api::timestamp_type result_set_builder::timestamp_of(size_t idx) { - return _timestamps[idx]; -} - -int32_t result_set_builder::ttl_of(size_t idx) { - return _ttls[idx]; -} - -bytes_opt result_set_builder::get_value(data_type t, query::result_atomic_cell_view c) { - return {to_bytes(c.value())}; -} - -} - -} diff --git a/scylla/cql3/selection/selection.hh b/scylla/cql3/selection/selection.hh deleted file mode 100644 index e30d443..0000000 --- a/scylla/cql3/selection/selection.hh +++ /dev/null @@ -1,280 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "bytes.hh" -#include "schema.hh" -#include "query-result-reader.hh" -#include "cql3/column_specification.hh" -#include "exceptions/exceptions.hh" -#include "cql3/selection/raw_selector.hh" -#include "cql3/selection/selector_factories.hh" -#include "unimplemented.hh" - -namespace cql3 { - -class result_set; -class metadata; - -namespace selection { - -class selectors { -public: - virtual ~selectors() {} - - virtual bool is_aggregate() = 0; - - /** - * Adds the current row of the specified ResultSetBuilder. - * - * @param rs the ResultSetBuilder - * @throws InvalidRequestException - */ - virtual void add_input_row(cql_serialization_format sf, result_set_builder& rs) = 0; - - virtual std::vector get_output_row(cql_serialization_format sf) = 0; - - virtual void reset() = 0; -}; - -class selection { -private: - schema_ptr _schema; - std::vector _columns; - ::shared_ptr _metadata; - const bool _collect_timestamps; - const bool _collect_TTLs; - const bool _contains_static_columns; -protected: - selection(schema_ptr schema, - std::vector columns, - std::vector<::shared_ptr> metadata_, - bool collect_timestamps, - bool collect_TTLs); - - virtual ~selection() {} -public: - // Overriden by SimpleSelection when appropriate. - virtual bool is_wildcard() const { - return false; - } - - /** - * Checks if this selection contains static columns. - * @return true if this selection contains static columns, false otherwise; - */ - bool contains_static_columns() const { - return _contains_static_columns; - } - - /** - * Checks if this selection contains only static columns. - * @return true if this selection contains only static columns, false otherwise; - */ - bool contains_only_static_columns() const { - if (!contains_static_columns()) { - return false; - } - - if (is_wildcard()) { - return false; - } - - for (auto&& def : _columns) { - if (!def->is_partition_key() && !def->is_static()) { - return false; - } - } - - return true; - } - - /** - * Checks if this selection contains a collection. - * - * @return true if this selection contains a collection, false otherwise. - */ - bool contains_a_collection() const { - if (!_schema->has_multi_cell_collections()) { - return false; - } - - return std::any_of(_columns.begin(), _columns.end(), [] (auto&& def) { - return def->type->is_collection() && def->type->is_multi_cell(); - }); - } - - /** - * Returns the index of the specified column. - * - * @param def the column definition - * @return the index of the specified column - */ - int32_t index_of(const column_definition& def) const { - auto i = std::find(_columns.begin(), _columns.end(), &def); - if (i == _columns.end()) { - return -1; - } - return std::distance(_columns.begin(), i); - } - - bool has_column(const column_definition& def) const { - return std::find(_columns.begin(), _columns.end(), &def) != _columns.end(); - } - - ::shared_ptr get_result_metadata() const { - return _metadata; - } - - static ::shared_ptr wildcard(schema_ptr schema); - static ::shared_ptr for_columns(schema_ptr schema, std::vector columns); - - virtual uint32_t add_column_for_ordering(const column_definition& c); - - virtual bool uses_function(const sstring &ks_name, const sstring& function_name) const { - return false; - } - - query::partition_slice::option_set get_query_options(); -private: - static bool processes_selection(const std::vector<::shared_ptr>& raw_selectors) { - return std::any_of(raw_selectors.begin(), raw_selectors.end(), - [] (auto&& s) { return s->processes_selection(); }); - } - - static std::vector<::shared_ptr> collect_metadata(schema_ptr schema, - const std::vector<::shared_ptr>& raw_selectors, const selector_factories& factories); -public: - static ::shared_ptr from_selectors(database& db, schema_ptr schema, const std::vector<::shared_ptr>& raw_selectors); - - virtual std::unique_ptr new_selectors() const = 0; - - /** - * Returns a range of CQL3 columns this selection needs. - */ - auto const& get_columns() const { - return _columns; - } - - uint32_t get_column_count() const { - return _columns.size(); - } - - virtual bool is_aggregate() const = 0; - - /** - * Checks that selectors are either all aggregates or that none of them is. - * - * @param selectors the selectors to test. - * @param messageTemplate the error message template - * @param messageArgs the error message arguments - * @throws InvalidRequestException if some of the selectors are aggregate but not all of them - */ - template - static void validate_selectors(const std::vector<::shared_ptr>& selectors, const sstring& msg, Args&&... args) { - int32_t aggregates = 0; - for (auto&& s : selectors) { - if (s->is_aggregate()) { - ++aggregates; - } - } - - if (aggregates != 0 && aggregates != selectors.size()) { - throw exceptions::invalid_request_exception(sprint(msg, std::forward(args)...)); - } - } - - friend class result_set_builder; -}; - -class result_set_builder { -private: - std::unique_ptr _result_set; - std::unique_ptr _selectors; -public: - std::experimental::optional> current; -private: - std::vector _timestamps; - std::vector _ttls; - const gc_clock::time_point _now; - cql_serialization_format _cql_serialization_format; -public: - result_set_builder(const selection& s, gc_clock::time_point now, cql_serialization_format sf); - void add_empty(); - void add(bytes_opt value); - void add(const column_definition& def, const query::result_atomic_cell_view& c); - void add_collection(const column_definition& def, bytes_view c); - void new_row(); - std::unique_ptr build(); - api::timestamp_type timestamp_of(size_t idx); - int32_t ttl_of(size_t idx); - - // Implements ResultVisitor concept from query.hh - class visitor { - protected: - result_set_builder& _builder; - const schema& _schema; - const selection& _selection; - uint32_t _row_count; - std::vector _partition_key; - std::vector _clustering_key; - public: - visitor(cql3::selection::result_set_builder& builder, const schema& s, const selection&); - visitor(visitor&&) = default; - - void add_value(const column_definition& def, query::result_row_view::iterator_type& i); - void accept_new_partition(const partition_key& key, uint32_t row_count); - void accept_new_partition(uint32_t row_count); - void accept_new_row(const clustering_key& key, - const query::result_row_view& static_row, - const query::result_row_view& row); - void accept_new_row(const query::result_row_view& static_row, - const query::result_row_view& row); - void accept_partition_end(const query::result_row_view& static_row); - }; -private: - bytes_opt get_value(data_type t, query::result_atomic_cell_view c); -}; - -} - -} diff --git a/scylla/cql3/selection/selector.cc b/scylla/cql3/selection/selector.cc deleted file mode 100644 index 2c94ff6..0000000 --- a/scylla/cql3/selection/selector.cc +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "selector.hh" -#include "cql3/column_identifier.hh" - -namespace cql3 { - -namespace selection { - -::shared_ptr -selector::factory::get_column_specification(schema_ptr schema) { - return ::make_shared(schema->ks_name(), - schema->cf_name(), - ::make_shared(column_name(), true), - get_return_type()); -} - -} - -} - - diff --git a/scylla/cql3/selection/selector.hh b/scylla/cql3/selection/selector.hh deleted file mode 100644 index f0e3267..0000000 --- a/scylla/cql3/selection/selector.hh +++ /dev/null @@ -1,193 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include -#include "cql3/assignment_testable.hh" -#include "types.hh" -#include "schema.hh" - -namespace cql3 { - -namespace selection { - -class result_set_builder; - -/** - * A selector is used to convert the data returned by the storage engine into the data requested by the - * user. They correspond to the <selector> elements from the select clause. - *

Since the introduction of aggregation, selectors cannot be called anymore by multiple threads - * as they have an internal state.

- */ -class selector : public assignment_testable { -public: - class factory; - - virtual ~selector() {} - - /** - * Add the current value from the specified result_set_builder. - * - * @param protocol_version protocol version used for serialization - * @param rs the result_set_builder - * @throws InvalidRequestException if a problem occurs while add the input value - */ - virtual void add_input(cql_serialization_format sf, result_set_builder& rs) = 0; - - /** - * Returns the selector output. - * - * @param protocol_version protocol version used for serialization - * @return the selector output - * @throws InvalidRequestException if a problem occurs while computing the output value - */ - virtual bytes_opt get_output(cql_serialization_format sf) = 0; - - /** - * Returns the selector output type. - * - * @return the selector output type. - */ - virtual data_type get_type() = 0; - - /** - * Checks if this selector is creating aggregates. - * - * @return true if this selector is creating aggregates false - * otherwise. - */ - virtual bool is_aggregate() { - return false; - } - - /** - * Reset the internal state of this selector. - */ - virtual void reset() = 0; - - virtual assignment_testable::test_result test_assignment(database& db, const sstring& keyspace, ::shared_ptr receiver) override { - if (receiver->type == get_type()) { - return assignment_testable::test_result::EXACT_MATCH; - } else if (receiver->type->is_value_compatible_with(*get_type())) { - return assignment_testable::test_result::WEAKLY_ASSIGNABLE; - } else { - return assignment_testable::test_result::NOT_ASSIGNABLE; - } - } -}; - -/** - * A factory for selector instances. - */ -class selector::factory { -public: - virtual ~factory() {} - - virtual bool uses_function(const sstring& ks_name, const sstring& function_name) { - return false; - } - - /** - * Returns the column specification corresponding to the output value of the selector instances created by - * this factory. - * - * @param schema the column family schema - * @return a column specification - */ - ::shared_ptr get_column_specification(schema_ptr schema); - - /** - * Creates a new selector instance. - * - * @return a new selector instance - */ - virtual ::shared_ptr new_instance() = 0; - - /** - * Checks if this factory creates selectors instances that creates aggregates. - * - * @return true if this factory creates selectors instances that creates aggregates, - * false otherwise - */ - virtual bool is_aggregate_selector_factory() { - return false; - } - - /** - * Checks if this factory creates writetime selectors instances. - * - * @return true if this factory creates writetime selectors instances, - * false otherwise - */ - virtual bool is_write_time_selector_factory() { - return false; - } - - /** - * Checks if this factory creates TTL selectors instances. - * - * @return true if this factory creates TTL selectors instances, - * false otherwise - */ - virtual bool is_ttl_selector_factory() { - return false; - } - - /** - * Returns the name of the column corresponding to the output value of the selector instances created by - * this factory. - * - * @return a column name - */ - virtual sstring column_name() = 0; - - /** - * Returns the type of the values returned by the selector instances created by this factory. - * - * @return the selector output type - */ - virtual data_type get_return_type() = 0; -}; - -} - -} diff --git a/scylla/cql3/selection/selector_factories.cc b/scylla/cql3/selection/selector_factories.cc deleted file mode 100644 index e430a79..0000000 --- a/scylla/cql3/selection/selector_factories.cc +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "cql3/selection/selector_factories.hh" -#include "cql3/selection/simple_selector.hh" -#include "cql3/selection/selectable.hh" - -namespace cql3 { - -namespace selection { - -selector_factories::selector_factories(std::vector<::shared_ptr> selectables, - database& db, schema_ptr schema, - std::vector& defs) - : _contains_write_time_factory(false) - , _contains_ttl_factory(false) - , _number_of_aggregate_factories(0) -{ - _factories.reserve(selectables.size()); - - for (auto&& selectable : selectables) { - auto factory = selectable->new_selector_factory(db, schema, defs); - _contains_write_time_factory |= factory->is_write_time_selector_factory(); - _contains_ttl_factory |= factory->is_ttl_selector_factory(); - if (factory->is_aggregate_selector_factory()) { - ++_number_of_aggregate_factories; - } - _factories.emplace_back(std::move(factory)); - } -} - -bool selector_factories::uses_function(const sstring& ks_name, const sstring& function_name) const { - for (auto&& f : _factories) { - if (f && f->uses_function(ks_name, function_name)) { - return true; - } - } - return false; -} - -void selector_factories::add_selector_for_ordering(const column_definition& def, uint32_t index) { - _factories.emplace_back(simple_selector::new_factory(def.name_as_text(), index, def.type)); -} - -std::vector<::shared_ptr> selector_factories::new_instances() const { - std::vector<::shared_ptr> r; - r.reserve(_factories.size()); - for (auto&& f : _factories) { - r.emplace_back(f->new_instance()); - } - return r; -} - -std::vector selector_factories::get_column_names() const { - std::vector r; - r.reserve(_factories.size()); - std::transform(_factories.begin(), _factories.end(), std::back_inserter(r), [] (auto&& f) { - return f->column_name(); - }); - return r; -} - -} - -} diff --git a/scylla/cql3/selection/selector_factories.hh b/scylla/cql3/selection/selector_factories.hh deleted file mode 100644 index 02de8d4..0000000 --- a/scylla/cql3/selection/selector_factories.hh +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include -#include "cql3/selection/selector.hh" -#include "cql3/selection/selectable.hh" - -namespace cql3 { - -namespace selection { - -/** - * A set of selector factories. - */ -class selector_factories { -private: - /** - * The Selector factories. - */ - std::vector<::shared_ptr> _factories; - - /** - * true if one of the factory creates writetime selectors. - */ - bool _contains_write_time_factory; - - /** - * true if one of the factory creates TTL selectors. - */ - bool _contains_ttl_factory; - - /** - * The number of factories creating aggregates. - */ - uint32_t _number_of_aggregate_factories; - -public: - /** - * Creates a new SelectorFactories instance and collect the column definitions. - * - * @param selectables the Selectables for which the factories must be created - * @param cfm the Column Family Definition - * @param defs the collector parameter for the column definitions - * @return a new SelectorFactories instance - * @throws InvalidRequestException if a problem occurs while creating the factories - */ - static ::shared_ptr create_factories_and_collect_column_definitions( - std::vector<::shared_ptr> selectables, - database& db, schema_ptr schema, - std::vector& defs) { - return ::make_shared(std::move(selectables), db, std::move(schema), defs); - } - - selector_factories(std::vector<::shared_ptr> selectables, - database& db, schema_ptr schema, std::vector& defs); -public: - bool uses_function(const sstring& ks_name, const sstring& function_name) const; - - /** - * Adds a new Selector.Factory for a column that is needed only for ORDER BY purposes. - * @param def the column that is needed for ordering - * @param index the index of the column definition in the Selection's list of columns - */ - void add_selector_for_ordering(const column_definition& def, uint32_t index); - - /** - * Checks if this SelectorFactories contains only factories for aggregates. - * - * @return true if this SelectorFactories contains only factories for aggregates, - * false otherwise. - */ - bool contains_only_aggregate_functions() const { - auto size = _factories.size(); - return size != 0 && _number_of_aggregate_factories == size; - } - - /** - * Whether the selector built by this factory does aggregation or not (either directly or in a sub-selector). - * - * @return true if the selector built by this factor does aggregation, false otherwise. - */ - bool does_aggregation() const { - return _number_of_aggregate_factories > 0; - } - - /** - * Checks if this SelectorFactories contains at least one factory for writetime selectors. - * - * @return true if this SelectorFactories contains at least one factory for writetime - * selectors, false otherwise. - */ - bool contains_write_time_selector_factory() const { - return _contains_write_time_factory; - } - - /** - * Checks if this SelectorFactories contains at least one factory for TTL selectors. - * - * @return true if this SelectorFactories contains at least one factory for TTL - * selectors, false otherwise. - */ - bool contains_ttl_selector_factory() const { - return _contains_ttl_factory; - } - - /** - * Creates a list of new selector instances. - * @return a list of new selector instances. - */ - std::vector<::shared_ptr> new_instances() const; - - auto begin() const { - return _factories.begin(); - } - - auto end() const { - return _factories.end(); - } - - /** - * Returns the names of the columns corresponding to the output values of the selector instances created by - * these factories. - * - * @return a list of column names - */ - std::vector get_column_names() const; -}; - -} - -} diff --git a/scylla/cql3/selection/simple_selector.cc b/scylla/cql3/selection/simple_selector.cc deleted file mode 100644 index 2b69de1..0000000 --- a/scylla/cql3/selection/simple_selector.cc +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "cql3/selection/simple_selector.hh" - -namespace cql3 { - -namespace selection { - -::shared_ptr -simple_selector_factory::new_instance() { - return ::make_shared(_column_name, _idx, _type); -} - -} - -} diff --git a/scylla/cql3/selection/simple_selector.hh b/scylla/cql3/selection/simple_selector.hh deleted file mode 100644 index c000588..0000000 --- a/scylla/cql3/selection/simple_selector.hh +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "cql3/selection/selection.hh" -#include "cql3/selection/selector.hh" - -namespace cql3 { - -namespace selection { - -class simple_selector_factory : public selector::factory { -private: - const sstring _column_name; - const uint32_t _idx; - data_type _type; -public: - simple_selector_factory(const sstring& column_name, uint32_t idx, data_type type) - : _column_name(std::move(column_name)) - , _idx(idx) - , _type(type) - { } - - virtual sstring column_name() override { - return _column_name; - } - - virtual data_type get_return_type() override { - return _type; - } - - virtual ::shared_ptr new_instance() override; -}; - -class simple_selector : public selector { -private: - const sstring _column_name; - const uint32_t _idx; - data_type _type; - bytes_opt _current; -public: - static ::shared_ptr new_factory(const sstring& column_name, uint32_t idx, data_type type) { - return ::make_shared(column_name, idx, type); - } - - simple_selector(const sstring& column_name, uint32_t idx, data_type type) - : _column_name(std::move(column_name)) - , _idx(idx) - , _type(type) - { } - - virtual void add_input(cql_serialization_format sf, result_set_builder& rs) override { - // TODO: can we steal it? - _current = (*rs.current)[_idx]; - } - - virtual bytes_opt get_output(cql_serialization_format sf) override { - return std::move(_current); - } - - virtual void reset() override { - _current = {}; - } - - virtual data_type get_type() override { - return _type; - } - - virtual sstring assignment_testable_source_context() const override { - return _column_name; - } - -#if 0 - @Override - public String toString() - { - return columnName; - } -#endif -}; - -} - -} diff --git a/scylla/cql3/selection/writetime_or_ttl.hh b/scylla/cql3/selection/writetime_or_ttl.hh deleted file mode 100644 index 0b52d82..0000000 --- a/scylla/cql3/selection/writetime_or_ttl.hh +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "selectable.hh" -#include "cql3/column_identifier.hh" - -namespace cql3 { - -namespace selection { - -class selectable::writetime_or_ttl : public selectable { -public: - shared_ptr _id; - bool _is_writetime; - - writetime_or_ttl(shared_ptr id, bool is_writetime) - : _id(std::move(id)), _is_writetime(is_writetime) { - } - - virtual sstring to_string() const override; - - virtual shared_ptr new_selector_factory(database& db, schema_ptr s, std::vector& defs) override; - - class raw : public selectable::raw { - shared_ptr _id; - bool _is_writetime; - public: - raw(shared_ptr id, bool is_writetime) - : _id(std::move(id)), _is_writetime(is_writetime) { - } - virtual shared_ptr prepare(schema_ptr s) override; - virtual bool processes_selection() const override; - }; -}; - -} - -} diff --git a/scylla/cql3/selection/writetime_or_ttl_selector.hh b/scylla/cql3/selection/writetime_or_ttl_selector.hh deleted file mode 100644 index 5869ead..0000000 --- a/scylla/cql3/selection/writetime_or_ttl_selector.hh +++ /dev/null @@ -1,141 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - * Modified by ScyllaDB - * - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "selector.hh" -#include "selection.hh" - -namespace cql3 { - -namespace selection { - -class writetime_or_ttl_selector : public selector { - sstring _column_name; - int _idx; - bool _is_writetime; - bytes_opt _current; -public: - static shared_ptr new_factory(sstring column_name, int idx, bool is_writetime) { - class wtots_factory : public selector::factory { - sstring _column_name; - int _idx; - bool _is_writetime; - public: - wtots_factory(sstring column_name, int idx, bool is_writetime) - : _column_name(std::move(column_name)), _idx(idx), _is_writetime(is_writetime) { - } - - virtual sstring column_name() override { - return sprint("%s(%s)", _is_writetime ? "writetime" : "ttl", _column_name); - } - - virtual data_type get_return_type() override { - return _is_writetime ? long_type : int32_type; - } - - virtual shared_ptr new_instance() override { - return make_shared(_column_name, _idx, _is_writetime); - } - - virtual bool is_write_time_selector_factory() override { - return _is_writetime; - } - - virtual bool is_ttl_selector_factory() override { - return !_is_writetime; - } - }; - return make_shared(std::move(column_name), idx, is_writetime); - } - - virtual void add_input(cql_serialization_format sf, result_set_builder& rs) override { - if (_is_writetime) { - int64_t ts = rs.timestamp_of(_idx); - if (ts != api::missing_timestamp) { - _current = bytes(bytes::initialized_later(), 8); - auto i = _current->begin(); - serialize_int64(i, ts); - } else { - _current = std::experimental::nullopt; - } - } else { - int ttl = rs.ttl_of(_idx); - if (ttl > 0) { - _current = bytes(bytes::initialized_later(), 4); - auto i = _current->begin(); - serialize_int32(i, ttl); - } else { - _current = std::experimental::nullopt; - } - } - } - - virtual bytes_opt get_output(cql_serialization_format sf) override { - return _current; - } - - virtual void reset() override { - _current = std::experimental::nullopt; - } - - virtual data_type get_type() override { - return _is_writetime ? long_type : int32_type; - } - - virtual sstring assignment_testable_source_context() const override { - return _column_name; - } - -#if 0 - @Override - public String toString() - { - return columnName; - } -#endif - - writetime_or_ttl_selector(sstring column_name, int idx, bool is_writetime) - : _column_name(std::move(column_name)), _idx(idx), _is_writetime(is_writetime) { - } -}; - -} -} diff --git a/scylla/cql3/sets.cc b/scylla/cql3/sets.cc deleted file mode 100644 index 28b29b6..0000000 --- a/scylla/cql3/sets.cc +++ /dev/null @@ -1,321 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "sets.hh" -#include "constants.hh" -#include "cql3_type.hh" - -namespace cql3 { - -shared_ptr -sets::value_spec_of(shared_ptr column) { - return make_shared(column->ks_name, column->cf_name, - ::make_shared(sprint("value(%s)", *column->name), true), - dynamic_pointer_cast(column->type)->get_elements_type()); -} - -shared_ptr -sets::literal::prepare(database& db, const sstring& keyspace, shared_ptr receiver) { - validate_assignable_to(db, keyspace, receiver); - - // We've parsed empty maps as a set literal to break the ambiguity so - // handle that case now - if (_elements.empty() && dynamic_pointer_cast(receiver->type)) { - // use empty_type for comparator, set is empty anyway. - std::map m(empty_type->as_less_comparator()); - return ::make_shared(std::move(m)); - } - - auto value_spec = value_spec_of(receiver); - std::vector> values; - values.reserve(_elements.size()); - bool all_terminal = true; - for (shared_ptr rt : _elements) - { - auto t = rt->prepare(db, keyspace, value_spec); - - if (t->contains_bind_marker()) { - throw exceptions::invalid_request_exception(sprint("Invalid set literal for %s: bind variables are not supported inside collection literals", *receiver->name)); - } - - if (dynamic_pointer_cast(t)) { - all_terminal = false; - } - - values.push_back(std::move(t)); - } - auto compare = dynamic_pointer_cast(receiver->type)->get_elements_type()->as_less_comparator(); - - auto value = ::make_shared(compare, std::move(values)); - if (all_terminal) { - return value->bind(query_options::DEFAULT); - } else { - return value; - } -} - -void -sets::literal::validate_assignable_to(database& db, const sstring& keyspace, shared_ptr receiver) { - if (!dynamic_pointer_cast(receiver->type)) { - // We've parsed empty maps as a set literal to break the ambiguity so - // handle that case now - if (dynamic_pointer_cast(receiver->type) && _elements.empty()) { - return; - } - - throw exceptions::invalid_request_exception(sprint("Invalid set literal for %s of type %s", *receiver->name, *receiver->type->as_cql3_type())); - } - - auto&& value_spec = value_spec_of(receiver); - for (shared_ptr rt : _elements) { - if (!is_assignable(rt->test_assignment(db, keyspace, value_spec))) { - throw exceptions::invalid_request_exception(sprint("Invalid set literal for %s: value %s is not of type %s", *receiver->name, *rt, *value_spec->type->as_cql3_type())); - } - } -} - -assignment_testable::test_result -sets::literal::test_assignment(database& db, const sstring& keyspace, shared_ptr receiver) { - if (!dynamic_pointer_cast(receiver->type)) { - // We've parsed empty maps as a set literal to break the ambiguity so handle that case now - if (dynamic_pointer_cast(receiver->type) && _elements.empty()) { - return assignment_testable::test_result::WEAKLY_ASSIGNABLE; - } - - return assignment_testable::test_result::NOT_ASSIGNABLE; - } - - // If there is no elements, we can't say it's an exact match (an empty set if fundamentally polymorphic). - if (_elements.empty()) { - return assignment_testable::test_result::WEAKLY_ASSIGNABLE; - } - - auto&& value_spec = value_spec_of(receiver); - // FIXME: make assignment_testable::test_all() accept ranges - std::vector> to_test(_elements.begin(), _elements.end()); - return assignment_testable::test_all(db, keyspace, value_spec, to_test); -} - -sstring -sets::literal::to_string() const { - return "{" + join(", ", _elements) + "}"; -} - -sets::value -sets::value::from_serialized(bytes_view v, set_type type, cql_serialization_format sf) { - try { - // Collections have this small hack that validate cannot be called on a serialized object, - // but compose does the validation (so we're fine). - // FIXME: deserializeForNativeProtocol?! - auto s = value_cast(type->deserialize(v, sf)); - std::set elements(type->get_elements_type()->as_less_comparator()); - for (auto&& element : s) { - elements.insert(elements.end(), type->get_elements_type()->decompose(element)); - } - return value(std::move(elements)); - } catch (marshal_exception& e) { - throw exceptions::invalid_request_exception(e.what()); - } -} - -cql3::raw_value -sets::value::get(const query_options& options) { - return cql3::raw_value::make_value(get_with_protocol_version(options.get_cql_serialization_format())); -} - -bytes -sets::value::get_with_protocol_version(cql_serialization_format sf) { - return collection_type_impl::pack(_elements.begin(), _elements.end(), - _elements.size(), sf); -} - -bool -sets::value::equals(set_type st, const value& v) { - if (_elements.size() != v._elements.size()) { - return false; - } - auto&& elements_type = st->get_elements_type(); - return std::equal(_elements.begin(), _elements.end(), - v._elements.begin(), - [elements_type] (bytes_view v1, bytes_view v2) { - return elements_type->equal(v1, v2); - }); -} - -sstring -sets::value::to_string() const { - sstring result = "{"; - bool first = true; - for (auto&& e : _elements) { - if (!first) { - result += ", "; - } - first = true; - result += to_hex(e); - } - result += "}"; - return result; -} - -bool -sets::delayed_value::contains_bind_marker() const { - // False since we don't support them in collection - return false; -} - -void -sets::delayed_value::collect_marker_specification(shared_ptr bound_names) { -} - -shared_ptr -sets::delayed_value::bind(const query_options& options) { - std::set buffers(_comparator); - for (auto&& t : _elements) { - auto b = t->bind_and_get(options); - - if (b.is_null()) { - throw exceptions::invalid_request_exception("null is not supported inside collections"); - } - if (b.is_unset_value()) { - return constants::UNSET_VALUE; - } - // We don't support value > 64K because the serialization format encode the length as an unsigned short. - if (b->size() > std::numeric_limits::max()) { - throw exceptions::invalid_request_exception(sprint("Set value is too long. Set values are limited to %d bytes but %d bytes value provided", - std::numeric_limits::max(), - b->size())); - } - - buffers.insert(buffers.end(), std::move(to_bytes(*b))); - } - return ::make_shared(std::move(buffers)); -} - - -::shared_ptr -sets::marker::bind(const query_options& options) { - const auto& value = options.get_value_at(_bind_index); - if (value.is_null()) { - return nullptr; - } else if (value.is_unset_value()) { - return constants::UNSET_VALUE; - } else { - auto as_set_type = static_pointer_cast(_receiver->type); - return make_shared(value::from_serialized(*value, as_set_type, options.get_cql_serialization_format())); - } -} - -void -sets::setter::execute(mutation& m, const clustering_key_prefix& row_key, const update_parameters& params) { - const auto& value = _t->bind(params._options); - if (value == constants::UNSET_VALUE) { - return; - } - if (column.type->is_multi_cell()) { - // delete + add - collection_type_impl::mutation mut; - mut.tomb = params.make_tombstone_just_before(); - auto ctype = static_pointer_cast(column.type); - auto col_mut = ctype->serialize_mutation_form(std::move(mut)); - m.set_cell(row_key, column, std::move(col_mut)); - } - adder::do_add(m, row_key, params, value, column); -} - -void -sets::adder::execute(mutation& m, const clustering_key_prefix& row_key, const update_parameters& params) { - const auto& value = _t->bind(params._options); - if (value == constants::UNSET_VALUE) { - return; - } - assert(column.type->is_multi_cell()); // "Attempted to add items to a frozen set"; - do_add(m, row_key, params, value, column); -} - -void -sets::adder::do_add(mutation& m, const clustering_key_prefix& row_key, const update_parameters& params, - shared_ptr value, const column_definition& column) { - auto set_value = dynamic_pointer_cast(std::move(value)); - auto set_type = dynamic_pointer_cast(column.type); - if (column.type->is_multi_cell()) { - // FIXME: mutation_view? not compatible with params.make_cell(). - collection_type_impl::mutation mut; - - if (!set_value || set_value->_elements.empty()) { - return; - } - - for (auto&& e : set_value->_elements) { - mut.cells.emplace_back(e, params.make_cell({})); - } - auto smut = set_type->serialize_mutation_form(mut); - - m.set_cell(row_key, column, std::move(smut)); - } else if (set_value != nullptr) { - // for frozen sets, we're overwriting the whole cell - auto v = set_type->serialize_partially_deserialized_form( - {set_value->_elements.begin(), set_value->_elements.end()}, - cql_serialization_format::internal()); - m.set_cell(row_key, column, params.make_cell(std::move(v))); - } else { - m.set_cell(row_key, column, params.make_dead_cell()); - } -} - -void -sets::discarder::execute(mutation& m, const clustering_key_prefix& row_key, const update_parameters& params) { - assert(column.type->is_multi_cell()); // "Attempted to remove items from a frozen set"; - - auto&& value = _t->bind(params._options); - if (!value) { - return; - } - - collection_type_impl::mutation mut; - auto kill = [&] (bytes idx) { - mut.cells.push_back({std::move(idx), params.make_dead_cell()}); - }; - auto svalue = dynamic_pointer_cast(value); - assert(svalue); - mut.cells.reserve(svalue->_elements.size()); - for (auto&& e : svalue->_elements) { - kill(e); - } - auto ctype = static_pointer_cast(column.type); - m.set_cell(row_key, column, - atomic_cell_or_collection::from_collection_mutation( - ctype->serialize_mutation_form(mut))); -} - -void sets::element_discarder::execute(mutation& m, const clustering_key_prefix& row_key, const update_parameters& params) -{ - assert(column.type->is_multi_cell() && "Attempted to remove items from a frozen set"); - auto elt = _t->bind(params._options); - if (!elt) { - throw exceptions::invalid_request_exception("Invalid null set element"); - } - collection_type_impl::mutation mut; - mut.cells.emplace_back(*elt->get(params._options), params.make_dead_cell()); - auto ctype = static_pointer_cast(column.type); - m.set_cell(row_key, column, ctype->serialize_mutation_form(mut)); -} - -} diff --git a/scylla/cql3/sets.hh b/scylla/cql3/sets.hh deleted file mode 100644 index 88fa757..0000000 --- a/scylla/cql3/sets.hh +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "cql3/abstract_marker.hh" -#include "maps.hh" -#include "column_specification.hh" -#include "column_identifier.hh" -#include "to_string.hh" -#include - -namespace cql3 { - -/** - * Static helper methods and classes for sets. - */ -class sets { - sets() = delete; -public: - static shared_ptr value_spec_of(shared_ptr column); - - class literal : public term::raw { - std::vector> _elements; - public: - explicit literal(std::vector> elements) - : _elements(std::move(elements)) { - } - shared_ptr prepare(database& db, const sstring& keyspace, shared_ptr receiver); - void validate_assignable_to(database& db, const sstring& keyspace, shared_ptr receiver); - assignment_testable::test_result - test_assignment(database& db, const sstring& keyspace, shared_ptr receiver); - virtual sstring to_string() const override; - }; - - class value : public terminal, collection_terminal { - public: - std::set _elements; - public: - value(std::set elements) - : _elements(std::move(elements)) { - } - static value from_serialized(bytes_view v, set_type type, cql_serialization_format sf); - virtual cql3::raw_value get(const query_options& options) override; - virtual bytes get_with_protocol_version(cql_serialization_format sf) override; - bool equals(set_type st, const value& v); - virtual sstring to_string() const override; - }; - - // See Lists.DelayedValue - class delayed_value : public non_terminal { - serialized_compare _comparator; - std::vector> _elements; - public: - delayed_value(serialized_compare comparator, std::vector> elements) - : _comparator(std::move(comparator)), _elements(std::move(elements)) { - } - virtual bool contains_bind_marker() const override; - virtual void collect_marker_specification(shared_ptr bound_names) override; - virtual shared_ptr bind(const query_options& options); - }; - - class marker : public abstract_marker { - public: - marker(int32_t bind_index, ::shared_ptr receiver) - : abstract_marker{bind_index, std::move(receiver)} { - assert(dynamic_cast(_receiver->type.get())); - } - virtual ::shared_ptr bind(const query_options& options) override; - }; - - class setter : public operation { - public: - setter(const column_definition& column, shared_ptr t) - : operation(column, std::move(t)) { - } - virtual void execute(mutation& m, const clustering_key_prefix& row_key, const update_parameters& params) override; - }; - - class adder : public operation { - public: - adder(const column_definition& column, shared_ptr t) - : operation(column, std::move(t)) { - } - virtual void execute(mutation& m, const clustering_key_prefix& row_key, const update_parameters& params) override; - static void do_add(mutation& m, const clustering_key_prefix& row_key, const update_parameters& params, - shared_ptr value, const column_definition& column); - }; - - // Note that this is reused for Map subtraction too (we subtract a set from a map) - class discarder : public operation { - public: - discarder(const column_definition& column, shared_ptr t) - : operation(column, std::move(t)) { - } - virtual void execute(mutation& m, const clustering_key_prefix& row_key, const update_parameters& params) override; - }; - - class element_discarder : public operation { - public: - element_discarder(const column_definition& column, shared_ptr t) - : operation(column, std::move(t)) { } - virtual void execute(mutation& m, const clustering_key_prefix& row_key, const update_parameters& params) override; - }; -}; - -} diff --git a/scylla/cql3/single_column_relation.cc b/scylla/cql3/single_column_relation.cc deleted file mode 100644 index 6628a8d..0000000 --- a/scylla/cql3/single_column_relation.cc +++ /dev/null @@ -1,167 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "cql3/single_column_relation.hh" -#include "cql3/restrictions/single_column_restriction.hh" -#include "cql3/statements/request_validations.hh" -#include "cql3/cql3_type.hh" -#include "cql3/lists.hh" -#include "unimplemented.hh" - -using namespace cql3::restrictions; - -namespace cql3 { - -::shared_ptr -single_column_relation::to_term(const std::vector<::shared_ptr>& receivers, - ::shared_ptr raw, - database& db, - const sstring& keyspace, - ::shared_ptr bound_names) { - // TODO: optimize vector away, accept single column_specification - assert(receivers.size() == 1); - auto term = raw->prepare(db, keyspace, receivers[0]); - term->collect_marker_specification(bound_names); - return term; -} - -::shared_ptr -single_column_relation::new_EQ_restriction(database& db, schema_ptr schema, ::shared_ptr bound_names) { - const column_definition& column_def = to_column_definition(schema, _entity); - if (!_map_key) { - auto term = to_term(to_receivers(schema, column_def), _value, db, schema->ks_name(), bound_names); - return ::make_shared(column_def, std::move(term)); - } - auto&& receivers = to_receivers(schema, column_def); - auto&& entry_key = to_term({receivers[0]}, _map_key, db, schema->ks_name(), bound_names); - auto&& entry_value = to_term({receivers[1]}, _value, db, schema->ks_name(), bound_names); - return make_shared(column_def, std::move(entry_key), std::move(entry_value)); -} - -::shared_ptr -single_column_relation::new_IN_restriction(database& db, schema_ptr schema, ::shared_ptr bound_names) { - const column_definition& column_def = to_column_definition(schema, _entity); - auto receivers = to_receivers(schema, column_def); - assert(_in_values.empty() || !_value); - if (_value) { - auto term = to_term(receivers, _value, db, schema->ks_name(), bound_names); - return make_shared(column_def, dynamic_pointer_cast(term)); - } - auto terms = to_terms(receivers, _in_values, db, schema->ks_name(), bound_names); - return ::make_shared(column_def, std::move(terms)); -} - -std::vector<::shared_ptr> -single_column_relation::to_receivers(schema_ptr schema, const column_definition& column_def) -{ - using namespace statements::request_validations; - auto receiver = column_def.column_specification; - - if (schema->is_dense() && column_def.is_regular()) { - throw exceptions::invalid_request_exception(sprint( - "Predicates on the non-primary-key column (%s) of a COMPACT table are not yet supported", column_def.name_as_text())); - } - - if (is_IN()) { - // For partition keys we only support IN for the last name so far - if (column_def.is_partition_key() && !schema->is_last_partition_key(column_def)) { - throw exceptions::invalid_request_exception(sprint( - "Partition KEY part %s cannot be restricted by IN relation (only the last part of the partition key can)", - column_def.name_as_text())); - } - - // We only allow IN on the row key and the clustering key so far, never on non-PK columns, and this even if - // there's an index - // Note: for backward compatibility reason, we conside a IN of 1 value the same as a EQ, so we let that - // slide. - if (!column_def.is_primary_key() && !can_have_only_one_value()) { - throw exceptions::invalid_request_exception(sprint( - "IN predicates on non-primary-key columns (%s) is not yet supported", column_def.name_as_text())); - } - } else if (is_slice()) { - // Non EQ relation is not supported without token(), even if we have a 2ndary index (since even those - // are ordered by partitioner). - // Note: In theory we could allow it for 2ndary index queries with ALLOW FILTERING, but that would - // probably require some special casing - // Note bis: This is also why we don't bother handling the 'tuple' notation of #4851 for keys. If we - // lift the limitation for 2ndary - // index with filtering, we'll need to handle it though. - if (column_def.is_partition_key()) { - throw exceptions::invalid_request_exception( - "Only EQ and IN relation are supported on the partition key (unless you use the token() function)"); - } - } - - if (is_contains_key()) { - if (!dynamic_cast(receiver->type.get())) { - throw exceptions::invalid_request_exception(sprint("Cannot use CONTAINS KEY on non-map column %s", receiver->name)); - } - } - - if (_map_key) { - check_false(dynamic_cast(receiver->type.get()), "Indexes on list entries (%s[index] = value) are not currently supported.", receiver->name); - check_true(dynamic_cast(receiver->type.get()), "Column %s cannot be used as a map", receiver->name); - check_true(receiver->type->is_multi_cell(), "Map-entry equality predicates on frozen map column %s are not supported", receiver->name); - check_true(is_EQ(), "Only EQ relations are supported on map entries"); - } - - if (receiver->type->is_collection()) { - // We don't support relations against entire collections (unless they're frozen), like "numbers = {1, 2, 3}" - check_false(receiver->type->is_multi_cell() && !is_legal_relation_for_non_frozen_collection(), - "Collection column '%s' (%s) cannot be restricted by a '%s' relation", - receiver->name, - receiver->type->as_cql3_type(), - get_operator()); - - if (is_contains_key() || is_contains()) { - receiver = make_collection_receiver(receiver, is_contains_key()); - } else if (receiver->type->is_multi_cell() && _map_key && is_EQ()) { - return { - make_collection_receiver(receiver, true), - make_collection_receiver(receiver, false), - }; - } - } - - return {std::move(receiver)}; -} - -} diff --git a/scylla/cql3/single_column_relation.hh b/scylla/cql3/single_column_relation.hh deleted file mode 100644 index 1094cac..0000000 --- a/scylla/cql3/single_column_relation.hh +++ /dev/null @@ -1,208 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include -#include "cql3/restrictions/single_column_restriction.hh" - -#include "core/shared_ptr.hh" -#include "to_string.hh" - -#include "cql3/relation.hh" -#include "cql3/column_identifier.hh" -#include "cql3/term.hh" - -namespace cql3 { - -/** - * Relations encapsulate the relationship between an entity of some kind, and - * a value (term). For example, > "start" or "colname1" = "somevalue". - * - */ -class single_column_relation final : public relation { -private: - ::shared_ptr _entity; - ::shared_ptr _map_key; - ::shared_ptr _value; - std::vector<::shared_ptr> _in_values; -private: - single_column_relation(::shared_ptr entity, ::shared_ptr map_key, - const operator_type& type, ::shared_ptr value, std::vector<::shared_ptr> in_values) - : relation(type) - , _entity(std::move(entity)) - , _map_key(std::move(map_key)) - , _value(std::move(value)) - , _in_values(std::move(in_values)) - { } -public: - /** - * Creates a new relation. - * - * @param entity the kind of relation this is; what the term is being compared to. - * @param map_key the key into the entity identifying the value the term is being compared to. - * @param type the type that describes how this entity relates to the value. - * @param value the value being compared. - */ - single_column_relation(::shared_ptr entity, ::shared_ptr map_key, - const operator_type& type, ::shared_ptr value) - : single_column_relation(std::move(entity), std::move(map_key), type, std::move(value), {}) - { } - - /** - * Creates a new relation. - * - * @param entity the kind of relation this is; what the term is being compared to. - * @param type the type that describes how this entity relates to the value. - * @param value the value being compared. - */ - single_column_relation(::shared_ptr entity, const operator_type& type, ::shared_ptr value) - : single_column_relation(std::move(entity), {}, type, std::move(value)) - { } - - static ::shared_ptr create_in_relation(::shared_ptr entity, - std::vector<::shared_ptr> in_values) { - return ::make_shared(single_column_relation(std::move(entity), {}, operator_type::IN, {}, std::move(in_values))); - } - - ::shared_ptr get_entity() { - return _entity; - } - - ::shared_ptr get_map_key() { - return _map_key; - } - - ::shared_ptr get_value() { - return _value; - } - -protected: - virtual ::shared_ptr to_term(const std::vector<::shared_ptr>& receivers, - ::shared_ptr raw, database& db, const sstring& keyspace, - ::shared_ptr bound_names) override; - -#if 0 - public SingleColumnRelation withNonStrictOperator() - { - switch (relationType) - { - case GT: return new SingleColumnRelation(entity, operator_type.GTE, value); - case LT: return new SingleColumnRelation(entity, operator_type.LTE, value); - default: return this; - } - } -#endif - - virtual sstring to_string() const override { - auto entity_as_string = _entity->to_string(); - if (_map_key) { - entity_as_string = sprint("%s[%s]", std::move(entity_as_string), _map_key->to_string()); - } - - if (is_IN()) { - return sprint("%s IN (%s)", entity_as_string, join(", ", _in_values)); - } - - return sprint("%s %s %s", entity_as_string, _relation_type, _value->to_string()); - } - -protected: - virtual ::shared_ptr new_EQ_restriction(database& db, schema_ptr schema, - ::shared_ptr bound_names); - - virtual ::shared_ptr new_IN_restriction(database& db, schema_ptr schema, - ::shared_ptr bound_names) override; - - virtual ::shared_ptr new_slice_restriction(database& db, schema_ptr schema, - ::shared_ptr bound_names, - statements::bound bound, - bool inclusive) override { - auto&& column_def = to_column_definition(schema, _entity); - auto term = to_term(to_receivers(schema, column_def), _value, db, schema->ks_name(), std::move(bound_names)); - return ::make_shared(column_def, bound, inclusive, std::move(term)); - } - - virtual shared_ptr new_contains_restriction(database& db, schema_ptr schema, - ::shared_ptr bound_names, - bool is_key) override { - auto&& column_def = to_column_definition(schema, _entity); - auto term = to_term(to_receivers(schema, column_def), _value, db, schema->ks_name(), std::move(bound_names)); - return ::make_shared(column_def, std::move(term), is_key); - } - - virtual ::shared_ptr maybe_rename_identifier(const column_identifier::raw& from, column_identifier::raw to) override { - return *_entity == from - ? ::make_shared(single_column_relation( - ::make_shared(std::move(to)), _map_key, _relation_type, _value, _in_values)) - : static_pointer_cast(shared_from_this()); - } - -private: - /** - * Returns the receivers for this relation. - * - * @param schema the Column Family meta data - * @param column_def the column definition - * @return the receivers for the specified relation. - * @throws exceptions::invalid_request_exception if the relation is invalid - */ - std::vector<::shared_ptr> to_receivers(schema_ptr schema, const column_definition& column_def); - - static shared_ptr make_collection_receiver(shared_ptr receiver, bool for_key) { - return static_cast(receiver->type.get())->make_collection_receiver(receiver, for_key); - } - - bool is_legal_relation_for_non_frozen_collection() const { - return is_contains_key() || is_contains() || is_map_entry_equality(); - } - - bool is_map_entry_equality() const { - return _map_key && is_EQ(); - } - -private: - bool can_have_only_one_value() { - return is_EQ() || (is_IN() && _in_values.size() == 1); - } -}; - -}; diff --git a/scylla/cql3/statements/alter_keyspace_statement.cc b/scylla/cql3/statements/alter_keyspace_statement.cc deleted file mode 100644 index a55ff52..0000000 --- a/scylla/cql3/statements/alter_keyspace_statement.cc +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "alter_keyspace_statement.hh" -#include "prepared_statement.hh" -#include "service/migration_manager.hh" -#include "db/system_keyspace.hh" -#include "database.hh" - -cql3::statements::alter_keyspace_statement::alter_keyspace_statement(sstring name, ::shared_ptr attrs) - : _name(name) - , _attrs(std::move(attrs)) -{} - -const sstring& cql3::statements::alter_keyspace_statement::keyspace() const { - return _name; -} - -future<> cql3::statements::alter_keyspace_statement::check_access(const service::client_state& state) { - return state.has_keyspace_access(_name, auth::permission::ALTER); -} - -void cql3::statements::alter_keyspace_statement::validate(distributed& proxy, const service::client_state& state) { - try { - service::get_local_storage_proxy().get_db().local().find_keyspace(_name); // throws on failure - auto tmp = _name; - std::transform(tmp.begin(), tmp.end(), tmp.begin(), ::tolower); - if (is_system_keyspace(tmp)) { - throw exceptions::invalid_request_exception("Cannot alter system keyspace"); - } - - _attrs->validate(); - - if (!bool(_attrs->get_replication_strategy_class()) && !_attrs->get_replication_options().empty()) { - throw exceptions::configuration_exception("Missing replication strategy class"); - } -#if 0 - // The strategy is validated through KSMetaData.validate() in announceKeyspaceUpdate below. - // However, for backward compatibility with thrift, this doesn't validate unexpected options yet, - // so doing proper validation here. - AbstractReplicationStrategy.validateReplicationStrategy(name, - AbstractReplicationStrategy.getClass(attrs.getReplicationStrategyClass()), - StorageService.instance.getTokenMetadata(), - DatabaseDescriptor.getEndpointSnitch(), - attrs.getReplicationOptions()); -#endif - - - } catch (no_such_keyspace& e) { - std::throw_with_nested(exceptions::invalid_request_exception("Unknown keyspace " + _name)); - } -} - -future> cql3::statements::alter_keyspace_statement::announce_migration(distributed& proxy, bool is_local_only) { - auto old_ksm = service::get_local_storage_proxy().get_db().local().find_keyspace(_name).metadata(); - return service::get_local_migration_manager().announce_keyspace_update(_attrs->as_ks_metadata_update(old_ksm), is_local_only).then([this] { - using namespace cql_transport; - return make_shared( - event::schema_change::change_type::UPDATED, - keyspace()); - }); -} - -std::unique_ptr -cql3::statements::alter_keyspace_statement::prepare(database& db, cql_stats& stats) { - return std::make_unique(make_shared(*this)); -} - diff --git a/scylla/cql3/statements/alter_keyspace_statement.hh b/scylla/cql3/statements/alter_keyspace_statement.hh deleted file mode 100644 index 6cc2d9d..0000000 --- a/scylla/cql3/statements/alter_keyspace_statement.hh +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include - -#include "cql3/statements/schema_altering_statement.hh" -#include "cql3/statements/ks_prop_defs.hh" - -namespace cql3 { - -namespace statements { - -class alter_keyspace_statement : public schema_altering_statement { - sstring _name; - ::shared_ptr _attrs; - -public: - alter_keyspace_statement(sstring name, ::shared_ptr attrs); - - const sstring& keyspace() const override; - - future<> check_access(const service::client_state& state) override; - void validate(distributed& proxy, const service::client_state& state) override; - future> announce_migration(distributed& proxy, bool is_local_only) override; - virtual std::unique_ptr prepare(database& db, cql_stats& stats) override; -}; - -} -} diff --git a/scylla/cql3/statements/alter_table_statement.cc b/scylla/cql3/statements/alter_table_statement.cc deleted file mode 100644 index 32eca00..0000000 --- a/scylla/cql3/statements/alter_table_statement.cc +++ /dev/null @@ -1,389 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "cql3/statements/alter_table_statement.hh" -#include "index/secondary_index_manager.hh" -#include "prepared_statement.hh" -#include "service/migration_manager.hh" -#include "validation.hh" -#include "db/config.hh" -#include -#include -#include "cql3/util.hh" -#include "view_info.hh" - -namespace cql3 { - -namespace statements { - -alter_table_statement::alter_table_statement(shared_ptr name, - type t, - shared_ptr column_name, - shared_ptr validator, - shared_ptr properties, - renames_type renames, - bool is_static) - : schema_altering_statement(std::move(name)) - , _type(t) - , _raw_column_name(std::move(column_name)) - , _validator(std::move(validator)) - , _properties(std::move(properties)) - , _renames(std::move(renames)) - , _is_static(is_static) -{ -} - -future<> alter_table_statement::check_access(const service::client_state& state) { - return state.has_column_family_access(keyspace(), column_family(), auth::permission::ALTER); -} - -void alter_table_statement::validate(distributed& proxy, const service::client_state& state) -{ - // validated in announce_migration() -} - -static data_type validate_alter(schema_ptr schema, const column_definition& def, const cql3_type& validator) -{ - auto type = def.type->is_reversed() && !validator.get_type()->is_reversed() - ? reversed_type_impl::get_instance(validator.get_type()) - : validator.get_type(); - switch (def.kind) { - case column_kind::partition_key: - if (type->is_counter()) { - throw exceptions::invalid_request_exception( - sprint("counter type is not supported for PRIMARY KEY part %s", def.name_as_text())); - } - - if (!type->is_value_compatible_with(*def.type)) { - throw exceptions::configuration_exception( - sprint("Cannot change %s from type %s to type %s: types are incompatible.", - def.name_as_text(), - def.type->as_cql3_type(), - validator)); - } - break; - - case column_kind::clustering_key: - if (!schema->is_cql3_table()) { - throw exceptions::invalid_request_exception( - sprint("Cannot alter clustering column %s in a non-CQL3 table", def.name_as_text())); - } - - // Note that CFMetaData.validateCompatibility already validate the change we're about to do. However, the error message it - // sends is a bit cryptic for a CQL3 user, so validating here for a sake of returning a better error message - // Do note that we need isCompatibleWith here, not just isValueCompatibleWith. - if (!type->is_compatible_with(*def.type)) { - throw exceptions::configuration_exception( - sprint("Cannot change %s from type %s to type %s: types are not order-compatible.", - def.name_as_text(), - def.type->as_cql3_type(), - validator)); - } - break; - - case column_kind::regular_column: - case column_kind::static_column: - // Thrift allows to change a column validator so CFMetaData.validateCompatibility will let it slide - // if we change to an incompatible type (contrarily to the comparator case). But we don't want to - // allow it for CQL3 (see #5882) so validating it explicitly here. We only care about value compatibility - // though since we won't compare values (except when there is an index, but that is validated by - // ColumnDefinition already). - if (!type->is_value_compatible_with(*def.type)) { - throw exceptions::configuration_exception( - sprint("Cannot change %s from type %s to type %s: types are incompatible.", - def.name_as_text(), - def.type->as_cql3_type(), - validator)); - } - break; - } - return type; -} - -static void validate_column_rename(const schema& schema, const column_identifier& from, const column_identifier& to) -{ - auto def = schema.get_column_definition(from.name()); - if (!def) { - throw exceptions::invalid_request_exception(sprint("Cannot rename unknown column %s in table %s", from, schema.cf_name())); - } - - if (schema.get_column_definition(to.name())) { - throw exceptions::invalid_request_exception(sprint("Cannot rename column %s to %s in table %s; another column of that name already exist", from, to, schema.cf_name())); - } - - if (def->is_part_of_cell_name()) { - throw exceptions::invalid_request_exception(sprint("Cannot rename non PRIMARY KEY part %s", from)); - } - - if (!schema.indices().empty()) { - auto& sim = secondary_index::get_secondary_index_manager(); - auto dependent_indices = sim.local().get_dependent_indices(*def); - if (!dependent_indices.empty()) { - auto index_names = ::join(", ", dependent_indices | boost::adaptors::transformed([](const index_metadata& im) { - return im.name(); - })); - throw exceptions::invalid_request_exception( - sprint("Cannot rename column %s because it has dependent secondary indexes (%s)", from, index_names)); - } - } -} - -future> alter_table_statement::announce_migration(distributed& proxy, bool is_local_only) -{ - auto& db = proxy.local().get_db().local(); - auto schema = validation::validate_column_family(db, keyspace(), column_family()); - if (schema->is_view()) { - throw exceptions::invalid_request_exception("Cannot use ALTER TABLE on Materialized View"); - } - - auto cfm = schema_builder(schema); - - shared_ptr validator; - if (_validator) { - validator = _validator->prepare(db, keyspace()); - } - shared_ptr column_name; - const column_definition* def = nullptr; - if (_raw_column_name) { - column_name = _raw_column_name->prepare_column_identifier(schema); - def = get_column_definition(schema, *column_name); - } - - auto& cf = db.find_column_family(schema); - std::vector view_updates; - - switch (_type) { - case alter_table_statement::type::add: - { - assert(column_name); - if (schema->is_dense()) { - throw exceptions::invalid_request_exception("Cannot add new column to a COMPACT STORAGE table"); - } - - if (_is_static) { - if (!schema->is_compound()) { - throw exceptions::invalid_request_exception("Static columns are not allowed in COMPACT STORAGE tables"); - } - if (!schema->clustering_key_size()) { - throw exceptions::invalid_request_exception("Static columns are only useful (and thus allowed) if the table has at least one clustering column"); - } - } - - if (def) { - if (def->is_partition_key()) { - throw exceptions::invalid_request_exception(sprint("Invalid column name %s because it conflicts with a PRIMARY KEY part", column_name)); - } else { - throw exceptions::invalid_request_exception(sprint("Invalid column name %s because it conflicts with an existing column", column_name)); - } - } - - // Cannot re-add a dropped counter column. See #7831. - if (schema->is_counter() && schema->dropped_columns().count(column_name->text())) { - throw exceptions::invalid_request_exception(sprint("Cannot re-add previously dropped counter column %s", column_name)); - } - - auto type = validator->get_type(); - if (type->is_collection() && type->is_multi_cell()) { - if (!schema->is_compound()) { - throw exceptions::invalid_request_exception("Cannot use non-frozen collections with a non-composite PRIMARY KEY"); - } - if (schema->is_super()) { - throw exceptions::invalid_request_exception("Cannot use non-frozen collections with super column families"); - } - - - // If there used to be a non-frozen collection column with the same name (that has been dropped), - // we could still have some data using the old type, and so we can't allow adding a collection - // with the same name unless the types are compatible (see #6276). - auto& dropped = schema->dropped_columns(); - auto i = dropped.find(column_name->text()); - if (i != dropped.end() && !type->is_compatible_with(*i->second.type)) { - throw exceptions::invalid_request_exception(sprint("Cannot add a collection with the name %s " - "because a collection with the same name and a different type has already been used in the past", column_name)); - } - } - - cfm.with_column(column_name->name(), type, _is_static ? column_kind::static_column : column_kind::regular_column); - - // Adding a column to a table which has an include all view requires the column to be added to the view - // as well - if (!_is_static) { - for (auto&& view : cf.views()) { - if (view->view_info()->include_all_columns()) { - schema_builder builder(view); - builder.with_column(column_name->name(), type); - view_updates.push_back(view_ptr(builder.build())); - } - } - } - - break; - } - case alter_table_statement::type::alter: - { - assert(column_name); - if (!def) { - throw exceptions::invalid_request_exception(sprint("Column %s was not found in table %s", column_name, column_family())); - } - - auto type = validate_alter(schema, *def, *validator); - // In any case, we update the column definition - cfm.with_altered_column_type(column_name->name(), type); - - // We also have to validate the view types here. If we have a view which includes a column as part of - // the clustering key, we need to make sure that it is indeed compatible. - for (auto&& view : cf.views()) { - auto* view_def = view->get_column_definition(column_name->name()); - if (view_def) { - schema_builder builder(view); - auto view_type = validate_alter(view, *view_def, *validator); - builder.with_altered_column_type(column_name->name(), std::move(view_type)); - view_updates.push_back(view_ptr(builder.build())); - } - } - break; - } - case alter_table_statement::type::drop: - { - assert(column_name); - if (!schema->is_cql3_table()) { - throw exceptions::invalid_request_exception("Cannot drop columns from a non-CQL3 table"); - } - if (!def) { - throw exceptions::invalid_request_exception(sprint("Column %s was not found in table %s", column_name, column_family())); - } - - if (def->is_primary_key()) { - throw exceptions::invalid_request_exception(sprint("Cannot drop PRIMARY KEY part %s", column_name)); - } else { - for (auto&& column_def : boost::range::join(schema->static_columns(), schema->regular_columns())) { // find - if (column_def.name() == column_name->name()) { - cfm.without_column(column_name->name()); - break; - } - } - } - - // If a column is dropped which is included in a view, we don't allow the drop to take place. - auto view_names = ::join(", ", cf.views() - | boost::adaptors::filtered([&] (auto&& v) { return bool(v->get_column_definition(column_name->name())); }) - | boost::adaptors::transformed([] (auto&& v) { return v->cf_name(); })); - if (!view_names.empty()) { - throw exceptions::invalid_request_exception(sprint( - "Cannot drop column %s, depended on by materialized views (%s.{%s})", - column_name, keyspace(), view_names)); - } - break; - } - - case alter_table_statement::type::opts: - if (!_properties) { - throw exceptions::invalid_request_exception("ALTER COLUMNFAMILY WITH invoked, but no parameters found"); - } - - _properties->validate(); - - if (!cf.views().empty() && _properties->get_gc_grace_seconds() == 0) { - throw exceptions::invalid_request_exception( - "Cannot alter gc_grace_seconds of the base table of a " - "materialized view to 0, since this value is used to TTL " - "undelivered updates. Setting gc_grace_seconds too low might " - "cause undelivered updates to expire " - "before being replayed."); - } - - if (schema->is_counter() && _properties->get_default_time_to_live() > 0) { - throw exceptions::invalid_request_exception("Cannot set default_time_to_live on a table with counters"); - } - - _properties->apply_to_builder(cfm); - break; - - case alter_table_statement::type::rename: - for (auto&& entry : _renames) { - auto from = entry.first->prepare_column_identifier(schema); - auto to = entry.second->prepare_column_identifier(schema); - - validate_column_rename(*schema, *from, *to); - cfm.with_column_rename(from->name(), to->name()); - - // If the view includes a renamed column, it must be renamed in the view table and the definition. - for (auto&& view : cf.views()) { - if (view->get_column_definition(from->name())) { - schema_builder builder(view); - - auto view_from = entry.first->prepare_column_identifier(view); - auto view_to = entry.second->prepare_column_identifier(view); - validate_column_rename(*view, *view_from, *view_to); - builder.with_column_rename(view_from->name(), view_to->name()); - - auto new_where = util::rename_column_in_where_clause( - view->view_info()->where_clause(), - column_identifier::raw(view_from->text(), true), - column_identifier::raw(view_to->text(), true)); - builder.with_view_info(view->view_info()->base_id(), view->view_info()->base_name(), - view->view_info()->include_all_columns(), std::move(new_where)); - - view_updates.push_back(view_ptr(builder.build())); - } - } - } - break; - } - - return service::get_local_migration_manager().announce_column_family_update(cfm.build(), false, std::move(view_updates), is_local_only).then([this] { - using namespace cql_transport; - return make_shared( - event::schema_change::change_type::UPDATED, - event::schema_change::target_type::TABLE, - keyspace(), - column_family()); - }); -} - -std::unique_ptr -cql3::statements::alter_table_statement::prepare(database& db, cql_stats& stats) { - return std::make_unique(make_shared(*this)); -} - -} - -} diff --git a/scylla/cql3/statements/alter_table_statement.hh b/scylla/cql3/statements/alter_table_statement.hh deleted file mode 100644 index 14b03d2..0000000 --- a/scylla/cql3/statements/alter_table_statement.hh +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "cql3/statements/schema_altering_statement.hh" -#include "cql3/statements/cf_prop_defs.hh" -#include "cql3/cql3_type.hh" - -namespace cql3 { - -namespace statements { - -class alter_table_statement : public schema_altering_statement { -public: - enum class type { - add, - alter, - drop, - opts, - rename, - }; - using renames_type = std::vector, - shared_ptr>>; -private: - const type _type; - const shared_ptr _raw_column_name; - const shared_ptr _validator; - const shared_ptr _properties; - const renames_type _renames; - const bool _is_static; -public: - alter_table_statement(shared_ptr name, - type t, - shared_ptr column_name, - shared_ptr validator, - shared_ptr properties, - renames_type renames, - bool is_static); - - virtual future<> check_access(const service::client_state& state) override; - virtual void validate(distributed& proxy, const service::client_state& state) override; - virtual future> announce_migration(distributed& proxy, bool is_local_only) override; - virtual std::unique_ptr prepare(database& db, cql_stats& stats) override; -}; - -} - -} diff --git a/scylla/cql3/statements/alter_type_statement.cc b/scylla/cql3/statements/alter_type_statement.cc deleted file mode 100644 index 8c6b836..0000000 --- a/scylla/cql3/statements/alter_type_statement.cc +++ /dev/null @@ -1,243 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright 2016 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "cql3/statements/alter_type_statement.hh" -#include "cql3/statements/create_type_statement.hh" -#include "prepared_statement.hh" -#include "schema_builder.hh" -#include "service/migration_manager.hh" -#include "boost/range/adaptor/map.hpp" -#include "stdx.hh" - -namespace cql3 { - -namespace statements { - -alter_type_statement::alter_type_statement(const ut_name& name) - : _name{name} -{ -} - -void alter_type_statement::prepare_keyspace(const service::client_state& state) -{ - if (!_name.has_keyspace()) { - _name.set_keyspace(state.get_keyspace()); - } -} - -future<> alter_type_statement::check_access(const service::client_state& state) -{ - return state.has_keyspace_access(keyspace(), auth::permission::ALTER); -} - -void alter_type_statement::validate(distributed& proxy, const service::client_state& state) -{ - // Validation is left to announceMigration as it's easier to do it while constructing the updated type. - // It doesn't really change anything anyway. -} - -const sstring& alter_type_statement::keyspace() const -{ - return _name.get_keyspace(); -} - -static stdx::optional get_idx_of_field(user_type type, shared_ptr field) -{ - for (uint32_t i = 0; i < type->field_names().size(); ++i) { - if (field->name() == type->field_names()[i]) { - return {i}; - } - } - return {}; -} - -void alter_type_statement::do_announce_migration(database& db, ::keyspace& ks, bool is_local_only) -{ - auto&& all_types = ks.metadata()->user_types()->get_all_types(); - auto to_update = all_types.find(_name.get_user_type_name()); - // Shouldn't happen, unless we race with a drop - if (to_update == all_types.end()) { - throw exceptions::invalid_request_exception(sprint("No user type named %s exists.", _name.to_string())); - } - - auto&& updated = make_updated_type(db, to_update->second); - - // Now, we need to announce the type update to basically change it for new tables using this type, - // but we also need to find all existing user types and CF using it and change them. - service::get_local_migration_manager().announce_type_update(updated, is_local_only).get(); - - for (auto&& schema : ks.metadata()->cf_meta_data() | boost::adaptors::map_values) { - auto cfm = schema_builder(schema); - bool modified = false; - for (auto&& column : schema->all_columns()) { - auto t_opt = column.type->update_user_type(updated); - if (t_opt) { - modified = true; - // We need to update this column - cfm.with_altered_column_type(column.name(), *t_opt); - } - } - if (modified) { - if (schema->is_view()) { - service::get_local_migration_manager().announce_view_update(view_ptr(cfm.build()), is_local_only).get(); - } else { - service::get_local_migration_manager().announce_column_family_update(cfm.build(), false, {}, is_local_only).get(); - } - } - } - - // Other user types potentially using the updated type - for (auto&& ut : ks.metadata()->user_types()->get_all_types() | boost::adaptors::map_values) { - // Re-updating the type we've just updated would be harmless but useless so we avoid it. - if (ut->_keyspace != updated->_keyspace || ut->_name != updated->_name) { - auto upd_opt = ut->update_user_type(updated); - if (upd_opt) { - service::get_local_migration_manager().announce_type_update( - static_pointer_cast(*upd_opt), is_local_only).get(); - } - } - } -} - -future> alter_type_statement::announce_migration(distributed& proxy, bool is_local_only) -{ - return seastar::async([this, &proxy, is_local_only] { - auto&& db = proxy.local().get_db().local(); - try { - auto&& ks = db.find_keyspace(keyspace()); - do_announce_migration(db, ks, is_local_only); - using namespace cql_transport; - return make_shared( - event::schema_change::change_type::UPDATED, - event::schema_change::target_type::TYPE, - keyspace(), - _name.get_string_type_name()); - } catch (no_such_keyspace& e) { - throw exceptions::invalid_request_exception(sprint("Cannot alter type in unknown keyspace %s", keyspace())); - } - }); -} - -alter_type_statement::add_or_alter::add_or_alter(const ut_name& name, bool is_add, shared_ptr field_name, shared_ptr field_type) - : alter_type_statement(name) - , _is_add(is_add) - , _field_name(field_name) - , _field_type(field_type) -{ -} - -user_type alter_type_statement::add_or_alter::do_add(database& db, user_type to_update) const -{ - if (get_idx_of_field(to_update, _field_name)) { - throw exceptions::invalid_request_exception(sprint("Cannot add new field %s to type %s: a field of the same name already exists", _field_name->name(), _name.to_string())); - } - - std::vector new_names(to_update->field_names()); - new_names.push_back(_field_name->name()); - std::vector new_types(to_update->field_types()); - auto&& add_type = _field_type->prepare(db, keyspace())->get_type(); - if (add_type->references_user_type(to_update->_keyspace, to_update->_name)) { - throw exceptions::invalid_request_exception(sprint("Cannot add new field %s of type %s to type %s as this would create a circular reference", _field_name->name(), _field_type->to_string(), _name.to_string())); - } - new_types.push_back(std::move(add_type)); - return user_type_impl::get_instance(to_update->_keyspace, to_update->_name, std::move(new_names), std::move(new_types)); -} - -user_type alter_type_statement::add_or_alter::do_alter(database& db, user_type to_update) const -{ - stdx::optional idx = get_idx_of_field(to_update, _field_name); - if (!idx) { - throw exceptions::invalid_request_exception(sprint("Unknown field %s in type %s", _field_name->name(), _name.to_string())); - } - - auto previous = to_update->field_types()[*idx]; - auto new_type = _field_type->prepare(db, keyspace())->get_type(); - if (!new_type->is_compatible_with(*previous)) { - throw exceptions::invalid_request_exception(sprint("Type %s in incompatible with previous type %s of field %s in user type %s", _field_type->to_string(), previous->as_cql3_type()->to_string(), _field_name->name(), _name.to_string())); - } - - std::vector new_types(to_update->field_types()); - new_types[*idx] = new_type; - return user_type_impl::get_instance(to_update->_keyspace, to_update->_name, to_update->field_names(), std::move(new_types)); -} - -user_type alter_type_statement::add_or_alter::make_updated_type(database& db, user_type to_update) const -{ - return _is_add ? do_add(db, to_update) : do_alter(db, to_update); -} - -alter_type_statement::renames::renames(const ut_name& name) - : alter_type_statement(name) -{ -} - -void alter_type_statement::renames::add_rename(shared_ptr previous_name, shared_ptr new_name) -{ - _renames.emplace_back(previous_name, new_name); -} - -user_type alter_type_statement::renames::make_updated_type(database& db, user_type to_update) const -{ - std::vector new_names(to_update->field_names()); - for (auto&& rename : _renames) { - auto&& from = rename.first; - stdx::optional idx = get_idx_of_field(to_update, from); - if (!idx) { - throw exceptions::invalid_request_exception(sprint("Unknown field %s in type %s", from->to_string(), _name.to_string())); - } - new_names[*idx] = rename.second->name(); - } - auto&& updated = user_type_impl::get_instance(to_update->_keyspace, to_update->_name, std::move(new_names), to_update->field_types()); - create_type_statement::check_for_duplicate_names(updated); - return updated; -} - -std::unique_ptr -alter_type_statement::add_or_alter::prepare(database& db, cql_stats& stats) { - return std::make_unique(make_shared(*this)); -} - -std::unique_ptr -alter_type_statement::renames::prepare(database& db, cql_stats& stats) { - return std::make_unique(make_shared(*this)); -} - -} - -} diff --git a/scylla/cql3/statements/alter_type_statement.hh b/scylla/cql3/statements/alter_type_statement.hh deleted file mode 100644 index bd5143b..0000000 --- a/scylla/cql3/statements/alter_type_statement.hh +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "cql3/statements/schema_altering_statement.hh" -#include "cql3/cql3_type.hh" -#include "cql3/ut_name.hh" - -namespace cql3 { - -namespace statements { - -class alter_type_statement : public schema_altering_statement { -protected: - ut_name _name; -public: - alter_type_statement(const ut_name& name); - - virtual void prepare_keyspace(const service::client_state& state) override; - - virtual future<> check_access(const service::client_state& state) override; - - virtual void validate(distributed& proxy, const service::client_state& state) override; - - virtual const sstring& keyspace() const override; - - virtual future> announce_migration(distributed& proxy, bool is_local_only) override; - - class add_or_alter; - class renames; -protected: - virtual user_type make_updated_type(database& db, user_type to_update) const = 0; -private: - void do_announce_migration(database& db, ::keyspace& ks, bool is_local_only); -}; - -class alter_type_statement::add_or_alter : public alter_type_statement { - bool _is_add; - shared_ptr _field_name; - shared_ptr _field_type; -public: - add_or_alter(const ut_name& name, bool is_add, - const shared_ptr field_name, - const shared_ptr field_type); - virtual user_type make_updated_type(database& db, user_type to_update) const override; - virtual std::unique_ptr prepare(database& db, cql_stats& stats) override; -private: - user_type do_add(database& db, user_type to_update) const; - user_type do_alter(database& db, user_type to_update) const; -}; - - -class alter_type_statement::renames : public alter_type_statement { - using renames_type = std::vector, - shared_ptr>>; - renames_type _renames; -public: - renames(const ut_name& name); - - void add_rename(shared_ptr previous_name, shared_ptr new_name); - - virtual user_type make_updated_type(database& db, user_type to_update) const override; - virtual std::unique_ptr prepare(database& db, cql_stats& stats) override; -}; - -} - -} diff --git a/scylla/cql3/statements/alter_user_statement.cc b/scylla/cql3/statements/alter_user_statement.cc deleted file mode 100644 index c361c99..0000000 --- a/scylla/cql3/statements/alter_user_statement.cc +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright 2016 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include - -#include "alter_user_statement.hh" -#include "auth/auth.hh" -#include "auth/authenticator.hh" - -cql3::statements::alter_user_statement::alter_user_statement(sstring username, ::shared_ptr opts, std::experimental::optional superuser) - : _username(std::move(username)) - , _opts(std::move(opts)) - , _superuser(std::move(superuser)) -{} - -void cql3::statements::alter_user_statement::validate(distributed& proxy, const service::client_state& state) { - _opts->validate(); - - if (!_superuser && _opts->empty()) { - throw exceptions::invalid_request_exception("ALTER USER can't be empty"); - } - - // validate login here before checkAccess to avoid leaking user existence to anonymous users. - state.ensure_not_anonymous(); - - // cannot validate user existence here, because - // we need to query -> continuation, and this is not a continuation method -} - -future<> cql3::statements::alter_user_statement::check_access(const service::client_state& state) { - auto user = state.user(); - if (_superuser && user->name() == _username) { - // using contractions in error messages is the ultimate sign of lowbrowness. - // however, dtests depend on matching the exception messages. So we keep them despite - // my disgust. - throw exceptions::unauthorized_exception("You aren't allowed to alter your own superuser status"); - } - return user->is_super().then([this, user](bool is_super) { - if (_superuser && !is_super) { - throw exceptions::unauthorized_exception("Only superusers are allowed to alter superuser status"); - } - - if (!is_super && user->name() != _username) { - throw exceptions::unauthorized_exception("You aren't allowed to alter this user"); - } - - if (!is_super) { - for (auto o : _opts->options() | boost::adaptors::map_keys) { - if (!auth::authenticator::get().alterable_options().contains(o)) { - throw exceptions::unauthorized_exception(sprint("You aren't allowed to alter {} option", o)); - } - } - } - }); -} - -future<::shared_ptr> -cql3::statements::alter_user_statement::execute(distributed& proxy, service::query_state& state, const query_options& options) { - return auth::auth::is_existing_user(_username).then([this](bool exists) { - if (!exists) { - throw exceptions::invalid_request_exception(sprint("User %s doesn't exist", _username)); - } - auto f = _opts->options().empty() ? make_ready_future() : auth::authenticator::get().alter(_username, _opts->options()); - if (_superuser) { - f = f.then([this] { - return auth::auth::insert_user(_username, *_superuser); - }); - } - return f.then([] { return make_ready_future<::shared_ptr>(); }); - }); -} - diff --git a/scylla/cql3/statements/alter_user_statement.hh b/scylla/cql3/statements/alter_user_statement.hh deleted file mode 100644 index e30eeb1..0000000 --- a/scylla/cql3/statements/alter_user_statement.hh +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright 2016 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include - -#include "authentication_statement.hh" -#include "cql3/user_options.hh" - -namespace cql3 { - -namespace statements { - -class alter_user_statement : public authentication_statement { -private: - sstring _username; - ::shared_ptr _opts; - std::experimental::optional _superuser; -public: - - alter_user_statement(sstring, ::shared_ptr, std::experimental::optional superuser = {}); - - void validate(distributed&, const service::client_state&) override; - future<> check_access(const service::client_state&) override; - - future<::shared_ptr> execute(distributed& - , service::query_state& - , const query_options&) override; -}; - -} - -} diff --git a/scylla/cql3/statements/alter_view_statement.cc b/scylla/cql3/statements/alter_view_statement.cc deleted file mode 100644 index 6d494b7..0000000 --- a/scylla/cql3/statements/alter_view_statement.cc +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright 2016 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "cql3/statements/alter_view_statement.hh" -#include "cql3/statements/prepared_statement.hh" -#include "service/migration_manager.hh" -#include "validation.hh" -#include "view_info.hh" - -namespace cql3 { - -namespace statements { - -alter_view_statement::alter_view_statement(::shared_ptr view_name, ::shared_ptr properties) - : schema_altering_statement{std::move(view_name)} - , _properties{std::move(properties)} -{ -} - -future<> alter_view_statement::check_access(const service::client_state& state) -{ - try { - auto&& s = service::get_local_storage_proxy().get_db().local().find_schema(keyspace(), column_family()); - if (s->is_view()) { - return state.has_column_family_access(keyspace(), s->view_info()->base_name(), auth::permission::ALTER); - } - } catch (const no_such_column_family& e) { - // Will be validated afterwards. - } - return make_ready_future<>(); -} - -void alter_view_statement::validate(distributed&, const service::client_state& state) -{ - // validated in announce_migration() -} - -future> alter_view_statement::announce_migration(distributed& proxy, bool is_local_only) -{ - auto&& db = proxy.local().get_db().local(); - schema_ptr schema = validation::validate_column_family(db, keyspace(), column_family()); - if (!schema->is_view()) { - throw exceptions::invalid_request_exception("Cannot use ALTER MATERIALIZED VIEW on Table"); - } - - if (!_properties) { - throw exceptions::invalid_request_exception("ALTER MATERIALIZED VIEW WITH invoked, but no parameters found"); - } - - _properties->validate(); - - auto builder = schema_builder(schema); - _properties->apply_to_builder(builder); - - if (builder.get_gc_grace_seconds() == 0) { - throw exceptions::invalid_request_exception( - "Cannot alter gc_grace_seconds of a materialized view to 0, since this " - "value is used to TTL undelivered updates. Setting gc_grace_seconds too " - "low might cause undelivered updates to expire before being replayed."); - } - - if (builder.default_time_to_live().count() > 0) { - throw exceptions::invalid_request_exception( - "Cannot set or alter default_time_to_live for a materialized view. " - "Data in a materialized view always expire at the same time than " - "the corresponding data in the parent table."); - } - - return service::get_local_migration_manager().announce_view_update(view_ptr(builder.build()), is_local_only).then([this] { - using namespace cql_transport; - - return make_shared( - event::schema_change::change_type::UPDATED, - event::schema_change::target_type::TABLE, - keyspace(), - column_family()); - }); -} - -std::unique_ptr -alter_view_statement::prepare(database& db, cql_stats& stats) { - return std::make_unique(make_shared(*this)); -} - -} - -} diff --git a/scylla/cql3/statements/alter_view_statement.hh b/scylla/cql3/statements/alter_view_statement.hh deleted file mode 100644 index 154f9b5..0000000 --- a/scylla/cql3/statements/alter_view_statement.hh +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright 2016 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include - -#include "database.hh" -#include "cql3/statements/cf_prop_defs.hh" -#include "cql3/statements/schema_altering_statement.hh" -#include "cql3/cf_name.hh" - -namespace cql3 { - -namespace statements { - -/** An ALTER MATERIALIZED VIEW parsed from a CQL query statement. */ -class alter_view_statement : public schema_altering_statement { -private: - ::shared_ptr _properties; -public: - alter_view_statement(::shared_ptr view_name, ::shared_ptr properties); - - virtual future<> check_access(const service::client_state& state) override; - - virtual void validate(distributed&, const service::client_state& state) override; - - virtual future> announce_migration(distributed& proxy, bool is_local_only) override; - - virtual std::unique_ptr prepare(database& db, cql_stats& stats) override; -}; - -} -} diff --git a/scylla/cql3/statements/authentication_statement.cc b/scylla/cql3/statements/authentication_statement.cc deleted file mode 100644 index 3977e4b..0000000 --- a/scylla/cql3/statements/authentication_statement.cc +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright 2016 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "authentication_statement.hh" -#include "transport/messages/result_message.hh" - -uint32_t cql3::statements::authentication_statement::get_bound_terms() { - return 0; -} - -std::unique_ptr cql3::statements::authentication_statement::prepare( - database& db, cql_stats& stats) { - return std::make_unique(this->shared_from_this()); -} - -bool cql3::statements::authentication_statement::uses_function( - const sstring& ks_name, const sstring& function_name) const { - return parsed_statement::uses_function(ks_name, function_name); -} - -bool cql3::statements::authentication_statement::depends_on_keyspace( - const sstring& ks_name) const { - return false; -} - -bool cql3::statements::authentication_statement::depends_on_column_family( - const sstring& cf_name) const { - return false; -} - -void cql3::statements::authentication_statement::validate( - distributed&, - const service::client_state& state) { -} - -future<> cql3::statements::authentication_statement::check_access(const service::client_state& state) { - return make_ready_future<>(); -} - -future<::shared_ptr> cql3::statements::authentication_statement::execute_internal( - distributed& proxy, - service::query_state& state, const query_options& options) { - // Internal queries are exclusively on the system keyspace and makes no sense here - throw std::runtime_error("unsupported operation"); -} diff --git a/scylla/cql3/statements/authentication_statement.hh b/scylla/cql3/statements/authentication_statement.hh deleted file mode 100644 index 1dd23a1..0000000 --- a/scylla/cql3/statements/authentication_statement.hh +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright 2016 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "cql3/cql_statement.hh" -#include "prepared_statement.hh" -#include "raw/parsed_statement.hh" -#include "transport/messages_fwd.hh" - -namespace cql3 { - -namespace statements { - -class authentication_statement : public raw::parsed_statement, public cql_statement_no_metadata, public ::enable_shared_from_this { -public: - uint32_t get_bound_terms() override; - - std::unique_ptr prepare(database& db, cql_stats& stats) override; - - bool uses_function(const sstring& ks_name, const sstring& function_name) const override; - - bool depends_on_keyspace(const sstring& ks_name) const override; - - bool depends_on_column_family(const sstring& cf_name) const override; - - future<> check_access(const service::client_state& state) override; - - void validate(distributed&, const service::client_state& state) override; - - future<::shared_ptr> - execute_internal(distributed& proxy, service::query_state& state, const query_options& options) override; -}; - -} - -} diff --git a/scylla/cql3/statements/authorization_statement.cc b/scylla/cql3/statements/authorization_statement.cc deleted file mode 100644 index ba881dd..0000000 --- a/scylla/cql3/statements/authorization_statement.cc +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright 2016 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "authorization_statement.hh" -#include "transport/messages/result_message.hh" - -uint32_t cql3::statements::authorization_statement::get_bound_terms() { - return 0; -} - -std::unique_ptr cql3::statements::authorization_statement::prepare( - database& db, cql_stats& stats) { - return std::make_unique(this->shared_from_this()); -} - -bool cql3::statements::authorization_statement::uses_function( - const sstring& ks_name, const sstring& function_name) const { - return parsed_statement::uses_function(ks_name, function_name); -} - -bool cql3::statements::authorization_statement::depends_on_keyspace( - const sstring& ks_name) const { - return false; -} - -bool cql3::statements::authorization_statement::depends_on_column_family( - const sstring& cf_name) const { - return false; -} - -void cql3::statements::authorization_statement::validate( - distributed&, - const service::client_state& state) { -} - -future<> cql3::statements::authorization_statement::check_access(const service::client_state& state) { - return make_ready_future<>(); -} - -future<::shared_ptr> cql3::statements::authorization_statement::execute_internal( - distributed& proxy, - service::query_state& state, const query_options& options) { - // Internal queries are exclusively on the system keyspace and makes no sense here - throw std::runtime_error("unsupported operation"); -} - -void cql3::statements::authorization_statement::mayme_correct_resource(auth::data_resource& resource, const service::client_state& state) { - if (resource.is_column_family_level() && resource.keyspace().empty()) { - resource = auth::data_resource(state.get_keyspace(), resource.column_family()); - } -} - diff --git a/scylla/cql3/statements/authorization_statement.hh b/scylla/cql3/statements/authorization_statement.hh deleted file mode 100644 index 36ec2b4..0000000 --- a/scylla/cql3/statements/authorization_statement.hh +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright 2016 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "cql3/cql_statement.hh" -#include "prepared_statement.hh" -#include "raw/parsed_statement.hh" -#include "transport/messages_fwd.hh" - -namespace auth { -class data_resource; -} - -namespace cql3 { - -namespace statements { - -class authorization_statement : public raw::parsed_statement, public cql_statement_no_metadata, public ::enable_shared_from_this { -public: - uint32_t get_bound_terms() override; - - std::unique_ptr prepare(database& db, cql_stats& stats) override; - - bool uses_function(const sstring& ks_name, const sstring& function_name) const override; - - bool depends_on_keyspace(const sstring& ks_name) const override; - - bool depends_on_column_family(const sstring& cf_name) const override; - - future<> check_access(const service::client_state& state) override; - - void validate(distributed&, const service::client_state& state) override; - - future<::shared_ptr> - execute_internal(distributed& proxy, service::query_state& state, const query_options& options) override; - -protected: - static void mayme_correct_resource(auth::data_resource&, const service::client_state&); -}; - -} - -} diff --git a/scylla/cql3/statements/batch_statement.cc b/scylla/cql3/statements/batch_statement.cc deleted file mode 100644 index 248e8b7..0000000 --- a/scylla/cql3/statements/batch_statement.cc +++ /dev/null @@ -1,454 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "batch_statement.hh" -#include "raw/batch_statement.hh" -#include "db/config.hh" -#include - -namespace { - -struct mutation_equals_by_key { - bool operator()(const mutation& m1, const mutation& m2) const { - return m1.schema() == m2.schema() - && m1.decorated_key().equal(*m1.schema(), m2.decorated_key()); - } -}; - -struct mutation_hash_by_key { - size_t operator()(const mutation& m) const { - auto dk_hash = std::hash(); - return dk_hash(m.decorated_key()); - } -}; - -} - - -namespace cql3 { - -namespace statements { - -logging::logger batch_statement::_logger("BatchStatement"); - -batch_statement::batch_statement(int bound_terms, type type_, - std::vector> statements, - std::unique_ptr attrs, - cql_stats& stats) - : _bound_terms(bound_terms), _type(type_), _statements(std::move(statements)) - , _attrs(std::move(attrs)) - , _has_conditions(boost::algorithm::any_of(_statements, std::mem_fn(&modification_statement::has_conditions))) - , _stats(stats) -{ -} - -batch_statement::batch_statement(type type_, - std::vector> statements, - std::unique_ptr attrs, - cql_stats& stats) - : batch_statement(-1, type_, std::move(statements), std::move(attrs), stats) -{ -} - -bool batch_statement::uses_function(const sstring& ks_name, const sstring& function_name) const -{ - return _attrs->uses_function(ks_name, function_name) - || boost::algorithm::any_of(_statements, [&] (auto&& s) { return s->uses_function(ks_name, function_name); }); -} - -bool batch_statement::depends_on_keyspace(const sstring& ks_name) const -{ - return false; -} - -bool batch_statement::depends_on_column_family(const sstring& cf_name) const -{ - return false; -} - -uint32_t batch_statement::get_bound_terms() -{ - return _bound_terms; -} - -future<> batch_statement::check_access(const service::client_state& state) -{ - return parallel_for_each(_statements.begin(), _statements.end(), [&state](auto&& s) { - return s->check_access(state); - }); -} - -void batch_statement::validate() -{ - if (_attrs->is_time_to_live_set()) { - throw exceptions::invalid_request_exception("Global TTL on the BATCH statement is not supported."); - } - - bool timestamp_set = _attrs->is_timestamp_set(); - if (timestamp_set) { - if (_has_conditions) { - throw exceptions::invalid_request_exception("Cannot provide custom timestamp for conditional BATCH"); - } - if (_type == type::COUNTER) { - throw exceptions::invalid_request_exception("Cannot provide custom timestamp for counter BATCH"); - } - } - - bool has_counters = boost::algorithm::any_of(_statements, std::mem_fn(&modification_statement::is_counter)); - bool has_non_counters = !boost::algorithm::all_of(_statements, std::mem_fn(&modification_statement::is_counter)); - if (timestamp_set && has_counters) { - throw exceptions::invalid_request_exception("Cannot provide custom timestamp for a BATCH containing counters"); - } - if (timestamp_set && boost::algorithm::any_of(_statements, std::mem_fn(&modification_statement::is_timestamp_set))) { - throw exceptions::invalid_request_exception("Timestamp must be set either on BATCH or individual statements"); - } - if (_type == type::COUNTER && has_non_counters) { - throw exceptions::invalid_request_exception("Cannot include non-counter statement in a counter batch"); - } - if (_type == type::LOGGED && has_counters) { - throw exceptions::invalid_request_exception("Cannot include a counter statement in a logged batch"); - } - if (has_counters && has_non_counters) { - throw exceptions::invalid_request_exception("Counter and non-counter mutations cannot exist in the same batch"); - } - - if (_has_conditions - && !_statements.empty() - && (boost::distance(_statements - | boost::adaptors::transformed(std::mem_fn(&modification_statement::keyspace)) - | boost::adaptors::uniqued) != 1 - || (boost::distance(_statements - | boost::adaptors::transformed(std::mem_fn(&modification_statement::column_family)) - | boost::adaptors::uniqued) != 1))) { - throw exceptions::invalid_request_exception("Batch with conditions cannot span multiple tables"); - } - std::experimental::optional raw_counter; - for (auto& s : _statements) { - if (raw_counter && s->is_raw_counter_shard_write() != *raw_counter) { - throw exceptions::invalid_request_exception("Cannot mix raw and regular counter statements in batch"); - } - raw_counter = s->is_raw_counter_shard_write(); - } -} - -void batch_statement::validate(distributed& proxy, const service::client_state& state) -{ - for (auto&& s : _statements) { - s->validate(proxy, state); - } -} - -const std::vector>& batch_statement::get_statements() -{ - return _statements; -} - -future> batch_statement::get_mutations(distributed& storage, const query_options& options, bool local, api::timestamp_type now, tracing::trace_state_ptr trace_state) { - // Do not process in parallel because operations like list append/prepend depend on execution order. - using mutation_set_type = std::unordered_set; - return do_with(mutation_set_type(), [this, &storage, &options, now, local, trace_state] (auto& result) { - result.reserve(_statements.size()); - _stats.statements_in_batches += _statements.size(); - return do_for_each(boost::make_counting_iterator(0), - boost::make_counting_iterator(_statements.size()), - [this, &storage, &options, now, local, &result, trace_state] (size_t i) { - auto&& statement = _statements[i]; - statement->inc_cql_stats(); - auto&& statement_options = options.for_statement(i); - auto timestamp = _attrs->get_timestamp(now, statement_options); - return statement->get_mutations(storage, statement_options, local, timestamp, trace_state).then([&result] (auto&& more) { - for (auto&& m : more) { - // We want unordered_set::try_emplace(), but we don't have it - auto pos = result.find(m); - if (pos == result.end()) { - result.emplace(std::move(m)); - } else { - const_cast(*pos).apply(std::move(m)); // Won't change key - } - } - }); - }).then([&result] { - // can't use range adaptors, because we want to move - auto vresult = std::vector(); - vresult.reserve(result.size()); - for (auto&& m : result) { - vresult.push_back(std::move(m)); - } - return vresult; - }); - }); -} - -void batch_statement::verify_batch_size(const std::vector& mutations) { - if (mutations.size() <= 1) { - return; // We only warn for batch spanning multiple mutations - } - - size_t warn_threshold = service::get_local_storage_proxy().get_db().local().get_config().batch_size_warn_threshold_in_kb() * 1024; - size_t fail_threshold = service::get_local_storage_proxy().get_db().local().get_config().batch_size_fail_threshold_in_kb() * 1024; - - class my_partition_visitor : public mutation_partition_visitor { - public: - void accept_partition_tombstone(tombstone) override {} - void accept_static_cell(column_id, atomic_cell_view v) override { - size += v.value().size(); - } - void accept_static_cell(column_id, collection_mutation_view v) override { - size += v.data.size(); - } - void accept_row_tombstone(const range_tombstone&) override {} - void accept_row(position_in_partition_view, const row_tombstone&, const row_marker&, is_dummy, is_continuous) override {} - void accept_row_cell(column_id, atomic_cell_view v) override { - size += v.value().size(); - } - void accept_row_cell(column_id id, collection_mutation_view v) override { - size += v.data.size(); - } - - size_t size = 0; - }; - - my_partition_visitor v; - - for (auto&m : mutations) { - m.partition().accept(*m.schema(), v); - } - - if (v.size > warn_threshold) { - auto error = [&] (const char* type, size_t threshold) -> sstring { - std::unordered_set ks_cf_pairs; - for (auto&& m : mutations) { - ks_cf_pairs.insert(m.schema()->ks_name() + "." + m.schema()->cf_name()); - } - return sprint("Batch of prepared statements for %s is of size %d, exceeding specified %s threshold of %d by %d.", - join(", ", ks_cf_pairs), v.size, type, threshold, v.size - threshold); - }; - if (v.size > fail_threshold) { - _logger.error(error("FAIL", fail_threshold).c_str()); - throw exceptions::invalid_request_exception("Batch too large"); - } else { - _logger.warn(error("WARN", warn_threshold).c_str()); - } - } -} - -struct batch_statement_executor { - static auto get() { return &batch_statement::do_execute; } -}; -static thread_local auto batch_stage = seastar::make_execution_stage("cql3_batch", batch_statement_executor::get()); - -future> batch_statement::execute( - distributed& storage, service::query_state& state, const query_options& options) { - ++_stats.batches; - return batch_stage(this, seastar::ref(storage), seastar::ref(state), - seastar::cref(options), false, options.get_timestamp(state)); -} - -future> batch_statement::do_execute( - distributed& storage, - service::query_state& query_state, const query_options& options, - bool local, api::timestamp_type now) -{ - // FIXME: we don't support nulls here -#if 0 - if (options.get_consistency() == null) - throw new InvalidRequestException("Invalid empty consistency level"); - if (options.getSerialConsistency() == null) - throw new InvalidRequestException("Invalid empty serial consistency level"); -#endif - if (_has_conditions) { - return execute_with_conditions(storage, options, query_state); - } - - return get_mutations(storage, options, local, now, query_state.get_trace_state()).then([this, &storage, &options, tr_state = query_state.get_trace_state()] (std::vector ms) mutable { - return execute_without_conditions(storage, std::move(ms), options.get_consistency(), std::move(tr_state)); - }).then([] { - return make_ready_future>( - make_shared()); - }); -} - -future<> batch_statement::execute_without_conditions( - distributed& storage, - std::vector mutations, - db::consistency_level cl, - tracing::trace_state_ptr tr_state) -{ - // FIXME: do we need to do this? -#if 0 - // Extract each collection of cfs from it's IMutation and then lazily concatenate all of them into a single Iterable. - Iterable cfs = Iterables.concat(Iterables.transform(mutations, new Function>() - { - public Collection apply(IMutation im) - { - return im.getColumnFamilies(); - } - })); -#endif - verify_batch_size(mutations); - - bool mutate_atomic = true; - if (_type != type::LOGGED) { - _stats.batches_pure_unlogged += 1; - mutate_atomic = false; - } else { - if (mutations.size() > 1) { - _stats.batches_pure_logged += 1; - } else { - _stats.batches_unlogged_from_logged += 1; - mutate_atomic = false; - } - } - return storage.local().mutate_with_triggers(std::move(mutations), cl, mutate_atomic, std::move(tr_state)); -} - -future> batch_statement::execute_with_conditions( - distributed& storage, - const query_options& options, - service::query_state& state) -{ - fail(unimplemented::cause::LWT); -#if 0 - auto now = state.get_timestamp(); - ByteBuffer key = null; - String ksName = null; - String cfName = null; - CQL3CasRequest casRequest = null; - Set columnsWithConditions = new LinkedHashSet<>(); - - for (int i = 0; i < statements.size(); i++) - { - ModificationStatement statement = statements.get(i); - QueryOptions statementOptions = options.forStatement(i); - long timestamp = attrs.getTimestamp(now, statementOptions); - List pks = statement.buildPartitionKeyNames(statementOptions); - if (pks.size() > 1) - throw new IllegalArgumentException("Batch with conditions cannot span multiple partitions (you cannot use IN on the partition key)"); - if (key == null) - { - key = pks.get(0); - ksName = statement.cfm.ksName; - cfName = statement.cfm.cfName; - casRequest = new CQL3CasRequest(statement.cfm, key, true); - } - else if (!key.equals(pks.get(0))) - { - throw new InvalidRequestException("Batch with conditions cannot span multiple partitions"); - } - - Composite clusteringPrefix = statement.createClusteringPrefix(statementOptions); - if (statement.hasConditions()) - { - statement.addConditions(clusteringPrefix, casRequest, statementOptions); - // As soon as we have a ifNotExists, we set columnsWithConditions to null so that everything is in the resultSet - if (statement.hasIfNotExistCondition() || statement.hasIfExistCondition()) - columnsWithConditions = null; - else if (columnsWithConditions != null) - Iterables.addAll(columnsWithConditions, statement.getColumnsWithConditions()); - } - casRequest.addRowUpdate(clusteringPrefix, statement, statementOptions, timestamp); - } - - ColumnFamily result = StorageProxy.cas(ksName, cfName, key, casRequest, options.getSerialConsistency(), options.getConsistency(), state.getClientState()); - - return new ResultMessage.Rows(ModificationStatement.buildCasResultSet(ksName, key, cfName, result, columnsWithConditions, true, options.forStatement(0))); -#endif -} - -future> batch_statement::execute_internal( - distributed& proxy, - service::query_state& query_state, const query_options& options) -{ - throw std::runtime_error(sprint("%s not implemented", __PRETTY_FUNCTION__)); -#if 0 - assert !hasConditions; - for (IMutation mutation : getMutations(BatchQueryOptions.withoutPerStatementVariables(options), true, queryState.getTimestamp())) - { - // We don't use counters internally. - assert mutation instanceof Mutation; - ((Mutation) mutation).apply(); - } - return null; -#endif -} - -namespace raw { - -std::unique_ptr -batch_statement::prepare(database& db, cql_stats& stats) { - auto&& bound_names = get_bound_variables(); - - stdx::optional first_ks; - stdx::optional first_cf; - bool have_multiple_cfs = false; - - std::vector> statements; - for (auto&& parsed : _parsed_statements) { - if (!first_ks) { - first_ks = parsed->keyspace(); - first_cf = parsed->column_family(); - } else { - have_multiple_cfs = first_ks.value() != parsed->keyspace() || first_cf.value() != parsed->column_family(); - } - statements.push_back(parsed->prepare(db, bound_names, stats)); - } - - auto&& prep_attrs = _attrs->prepare(db, "[batch]", "[batch]"); - prep_attrs->collect_marker_specification(bound_names); - - cql3::statements::batch_statement batch_statement_(bound_names->size(), _type, std::move(statements), std::move(prep_attrs), stats); - batch_statement_.validate(); - - std::vector partition_key_bind_indices; - if (!have_multiple_cfs && batch_statement_.get_statements().size() > 0) { - partition_key_bind_indices = bound_names->get_partition_key_bind_indexes(batch_statement_.get_statements()[0]->s); - } - return std::make_unique(make_shared(std::move(batch_statement_)), - bound_names->get_specifications(), - std::move(partition_key_bind_indices)); -} - -} - - -} - -} - - diff --git a/scylla/cql3/statements/batch_statement.hh b/scylla/cql3/statements/batch_statement.hh deleted file mode 100644 index e156bba..0000000 --- a/scylla/cql3/statements/batch_statement.hh +++ /dev/null @@ -1,156 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "cql3/cql_statement.hh" -#include "modification_statement.hh" -#include "raw/modification_statement.hh" -#include "raw/batch_statement.hh" -#include "service/storage_proxy.hh" -#include "transport/messages/result_message.hh" -#include "timestamp.hh" -#include "log.hh" -#include "to_string.hh" -#include -#include -#include -#include -#include - -#pragma once - -namespace cql3 { - -namespace statements { - -/** - * A BATCH statement parsed from a CQL query. - * - */ -class batch_statement : public cql_statement_no_metadata { - static logging::logger _logger; -public: - using type = raw::batch_statement::type; -private: - int _bound_terms; - type _type; - std::vector> _statements; - std::unique_ptr _attrs; - bool _has_conditions; - cql_stats& _stats; -public: - /** - * Creates a new BatchStatement from a list of statements and a - * Thrift consistency level. - * - * @param type type of the batch - * @param statements a list of UpdateStatements - * @param attrs additional attributes for statement (CL, timestamp, timeToLive) - */ - batch_statement(int bound_terms, type type_, - std::vector> statements, - std::unique_ptr attrs, - cql_stats& stats); - - batch_statement(type type_, - std::vector> statements, - std::unique_ptr attrs, - cql_stats& stats); - - virtual bool uses_function(const sstring& ks_name, const sstring& function_name) const override; - - virtual bool depends_on_keyspace(const sstring& ks_name) const override; - - virtual bool depends_on_column_family(const sstring& cf_name) const override; - - virtual uint32_t get_bound_terms() override; - - virtual future<> check_access(const service::client_state& state) override; - - // Validates a prepared batch statement without validating its nested statements. - void validate(); - - // The batch itself will be validated in either Parsed#prepare() - for regular CQL3 batches, - // or in QueryProcessor.processBatch() - for native protocol batches. - virtual void validate(distributed& proxy, const service::client_state& state) override; - - const std::vector>& get_statements(); -private: - future> get_mutations(distributed& storage, const query_options& options, bool local, api::timestamp_type now, tracing::trace_state_ptr trace_state); - -public: - /** - * Checks batch size to ensure threshold is met. If not, a warning is logged. - * @param cfs ColumnFamilies that will store the batch's mutations. - */ - static void verify_batch_size(const std::vector& mutations); - - virtual future> execute( - distributed& storage, service::query_state& state, const query_options& options) override; -private: - friend class batch_statement_executor; - future> do_execute( - distributed& storage, - service::query_state& query_state, const query_options& options, - bool local, api::timestamp_type now); - - future<> execute_without_conditions( - distributed& storage, - std::vector mutations, - db::consistency_level cl, - tracing::trace_state_ptr tr_state); - - future> execute_with_conditions( - distributed& storage, - const query_options& options, - service::query_state& state); -public: - virtual future> execute_internal( - distributed& proxy, - service::query_state& query_state, const query_options& options) override; - - // FIXME: no cql_statement::to_string() yet -#if 0 - sstring to_string() const { - return sprint("BatchStatement(type=%s, statements=%s)", _type, join(", ", _statements)); - } -#endif -}; - -} -} diff --git a/scylla/cql3/statements/bound.hh b/scylla/cql3/statements/bound.hh deleted file mode 100644 index d4c457f..0000000 --- a/scylla/cql3/statements/bound.hh +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include - -#pragma once - -namespace cql3 { - -namespace statements { - -enum class bound : int32_t { START = 0, END }; - -static inline -int32_t get_idx(bound b) { - return (int32_t)b; -} - -static inline -bound reverse(bound b) { - return bound((int32_t)b ^ 1); -} - -static inline -bool is_start(bound b) { - return b == bound::START; -} - -static inline -bool is_end(bound b) { - return b == bound::END; -} - -} - -} diff --git a/scylla/cql3/statements/cf_prop_defs.cc b/scylla/cql3/statements/cf_prop_defs.cc deleted file mode 100644 index b871520..0000000 --- a/scylla/cql3/statements/cf_prop_defs.cc +++ /dev/null @@ -1,248 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "cql3/statements/cf_prop_defs.hh" - -#include - -namespace cql3 { - -namespace statements { - -const sstring cf_prop_defs::KW_COMMENT = "comment"; -const sstring cf_prop_defs::KW_READREPAIRCHANCE = "read_repair_chance"; -const sstring cf_prop_defs::KW_DCLOCALREADREPAIRCHANCE = "dclocal_read_repair_chance"; -const sstring cf_prop_defs::KW_GCGRACESECONDS = "gc_grace_seconds"; -const sstring cf_prop_defs::KW_MINCOMPACTIONTHRESHOLD = "min_threshold"; -const sstring cf_prop_defs::KW_MAXCOMPACTIONTHRESHOLD = "max_threshold"; -const sstring cf_prop_defs::KW_CACHING = "caching"; -const sstring cf_prop_defs::KW_DEFAULT_TIME_TO_LIVE = "default_time_to_live"; -const sstring cf_prop_defs::KW_MIN_INDEX_INTERVAL = "min_index_interval"; -const sstring cf_prop_defs::KW_MAX_INDEX_INTERVAL = "max_index_interval"; -const sstring cf_prop_defs::KW_SPECULATIVE_RETRY = "speculative_retry"; -const sstring cf_prop_defs::KW_BF_FP_CHANCE = "bloom_filter_fp_chance"; -const sstring cf_prop_defs::KW_MEMTABLE_FLUSH_PERIOD = "memtable_flush_period_in_ms"; - -const sstring cf_prop_defs::KW_COMPACTION = "compaction"; -const sstring cf_prop_defs::KW_COMPRESSION = "compression"; -const sstring cf_prop_defs::KW_CRC_CHECK_CHANCE = "crc_check_chance"; - -const sstring cf_prop_defs::COMPACTION_STRATEGY_CLASS_KEY = "class"; - -const sstring cf_prop_defs::COMPACTION_ENABLED_KEY = "enabled"; - -void cf_prop_defs::validate() { - // Skip validation if the comapction strategy class is already set as it means we've alreayd - // prepared (and redoing it would set strategyClass back to null, which we don't want) - if (_compaction_strategy_class) { - return; - } - - static std::set keywords({ - KW_COMMENT, KW_READREPAIRCHANCE, KW_DCLOCALREADREPAIRCHANCE, - KW_GCGRACESECONDS, KW_CACHING, KW_DEFAULT_TIME_TO_LIVE, - KW_MIN_INDEX_INTERVAL, KW_MAX_INDEX_INTERVAL, KW_SPECULATIVE_RETRY, - KW_BF_FP_CHANCE, KW_MEMTABLE_FLUSH_PERIOD, KW_COMPACTION, - KW_COMPRESSION, KW_CRC_CHECK_CHANCE - }); - static std::set obsolete_keywords({ - sstring("index_interval"), - sstring("replicate_on_write"), - sstring("populate_io_cache_on_flush"), - }); - property_definitions::validate(keywords, obsolete_keywords); - - auto compaction_options = get_compaction_options(); - if (!compaction_options.empty()) { - auto strategy = compaction_options.find(COMPACTION_STRATEGY_CLASS_KEY); - if (strategy == compaction_options.end()) { - throw exceptions::configuration_exception(sstring("Missing sub-option '") + COMPACTION_STRATEGY_CLASS_KEY + "' for the '" + KW_COMPACTION + "' option."); - } - _compaction_strategy_class = sstables::compaction_strategy::type(strategy->second); - remove_from_map_if_exists(KW_COMPACTION, COMPACTION_STRATEGY_CLASS_KEY); - -#if 0 - CFMetaData.validateCompactionOptions(compactionStrategyClass, compactionOptions); -#endif - } - - auto compression_options = get_compression_options(); - if (compression_options && !compression_options->empty()) { - auto sstable_compression_class = compression_options->find(sstring(compression_parameters::SSTABLE_COMPRESSION)); - if (sstable_compression_class == compression_options->end()) { - throw exceptions::configuration_exception(sstring("Missing sub-option '") + compression_parameters::SSTABLE_COMPRESSION + "' for the '" + KW_COMPRESSION + "' option."); - } - compression_parameters cp(*compression_options); - cp.validate(); - } - - validate_minimum_int(KW_DEFAULT_TIME_TO_LIVE, 0, DEFAULT_DEFAULT_TIME_TO_LIVE); - - auto min_index_interval = get_int(KW_MIN_INDEX_INTERVAL, DEFAULT_MIN_INDEX_INTERVAL); - auto max_index_interval = get_int(KW_MAX_INDEX_INTERVAL, DEFAULT_MAX_INDEX_INTERVAL); - if (min_index_interval < 1) { - throw exceptions::configuration_exception(KW_MIN_INDEX_INTERVAL + " must be greater than 0"); - } - if (max_index_interval < min_index_interval) { - throw exceptions::configuration_exception(KW_MAX_INDEX_INTERVAL + " must be greater than " + KW_MIN_INDEX_INTERVAL); - } - - speculative_retry::from_sstring(get_string(KW_SPECULATIVE_RETRY, speculative_retry(speculative_retry::type::NONE, 0).to_sstring())); -} - -std::map cf_prop_defs::get_compaction_options() const { - auto compaction_options = get_map(KW_COMPACTION); - if (compaction_options ) { - return compaction_options.value(); - } - return std::map{}; -} - -stdx::optional> cf_prop_defs::get_compression_options() const { - auto compression_options = get_map(KW_COMPRESSION); - if (compression_options) { - return { compression_options.value() }; - } - return { }; -} - -int32_t cf_prop_defs::get_default_time_to_live() const -{ - return get_int(KW_DEFAULT_TIME_TO_LIVE, 0); -} - -int32_t cf_prop_defs::get_gc_grace_seconds() const -{ - return get_int(KW_GCGRACESECONDS, DEFAULT_GC_GRACE_SECONDS); -} - -void cf_prop_defs::apply_to_builder(schema_builder& builder) { - if (has_property(KW_COMMENT)) { - builder.set_comment(get_string(KW_COMMENT, "")); - } - - if (has_property(KW_READREPAIRCHANCE)) { - builder.set_read_repair_chance(get_double(KW_READREPAIRCHANCE, builder.get_read_repair_chance())); - } - - if (has_property(KW_DCLOCALREADREPAIRCHANCE)) { - builder.set_dc_local_read_repair_chance(get_double(KW_DCLOCALREADREPAIRCHANCE, builder.get_dc_local_read_repair_chance())); - } - - if (has_property(KW_GCGRACESECONDS)) { - builder.set_gc_grace_seconds(get_int(KW_GCGRACESECONDS, builder.get_gc_grace_seconds())); - } - - std::experimental::optional tmp_value = {}; - if (has_property(KW_COMPACTION)) { - if (get_compaction_options().count(KW_MINCOMPACTIONTHRESHOLD)) { - tmp_value = get_compaction_options().at(KW_MINCOMPACTIONTHRESHOLD); - } - } - int min_compaction_threshold = to_int(KW_MINCOMPACTIONTHRESHOLD, tmp_value, builder.get_min_compaction_threshold()); - - tmp_value = {}; - if (has_property(KW_COMPACTION)) { - if (get_compaction_options().count(KW_MAXCOMPACTIONTHRESHOLD)) { - tmp_value = get_compaction_options().at(KW_MAXCOMPACTIONTHRESHOLD); - } - } - int max_compaction_threshold = to_int(KW_MAXCOMPACTIONTHRESHOLD, tmp_value, builder.get_max_compaction_threshold()); - - if (min_compaction_threshold <= 0 || max_compaction_threshold <= 0) - throw exceptions::configuration_exception("Disabling compaction by setting compaction thresholds to 0 has been deprecated, set the compaction option 'enabled' to false instead."); - builder.set_min_compaction_threshold(min_compaction_threshold); - builder.set_max_compaction_threshold(max_compaction_threshold); - - if (has_property(KW_COMPACTION)) { - if (get_compaction_options().count(COMPACTION_ENABLED_KEY)) { - auto enabled = boost::algorithm::iequals(get_compaction_options().at(COMPACTION_ENABLED_KEY), "true"); - builder.set_compaction_enabled(enabled); - } - } - - builder.set_default_time_to_live(gc_clock::duration(get_int(KW_DEFAULT_TIME_TO_LIVE, DEFAULT_DEFAULT_TIME_TO_LIVE))); - - if (has_property(KW_SPECULATIVE_RETRY)) { - builder.set_speculative_retry(get_string(KW_SPECULATIVE_RETRY, builder.get_speculative_retry().to_sstring())); - } - - if (has_property(KW_MEMTABLE_FLUSH_PERIOD)) { - builder.set_memtable_flush_period(get_int(KW_MEMTABLE_FLUSH_PERIOD, builder.get_memtable_flush_period())); - } - - if (has_property(KW_MIN_INDEX_INTERVAL)) { - builder.set_min_index_interval(get_int(KW_MIN_INDEX_INTERVAL, builder.get_min_index_interval())); - } - - if (has_property(KW_MAX_INDEX_INTERVAL)) { - builder.set_max_index_interval(get_int(KW_MAX_INDEX_INTERVAL, builder.get_max_index_interval())); - } - - if (_compaction_strategy_class) { - builder.set_compaction_strategy(*_compaction_strategy_class); - builder.set_compaction_strategy_options(get_compaction_options()); - } - - builder.set_bloom_filter_fp_chance(get_double(KW_BF_FP_CHANCE, builder.get_bloom_filter_fp_chance())); - auto compression_options = get_compression_options(); - if (compression_options) { - builder.set_compressor_params(compression_parameters(*compression_options)); - } -#if 0 - CachingOptions cachingOptions = getCachingOptions(); - if (cachingOptions != null) - cfm.caching(cachingOptions); -#endif -} - -void cf_prop_defs::validate_minimum_int(const sstring& field, int32_t minimum_value, int32_t default_value) const -{ - auto val = get_int(field, default_value); - if (val < minimum_value) { - throw exceptions::configuration_exception(sprint("%s cannot be smaller than %s, (default %s)", - field, minimum_value, default_value)); - } -} - -} - -} diff --git a/scylla/cql3/statements/cf_prop_defs.hh b/scylla/cql3/statements/cf_prop_defs.hh deleted file mode 100644 index c501845..0000000 --- a/scylla/cql3/statements/cf_prop_defs.hh +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "cql3/statements/property_definitions.hh" - -#include "schema.hh" -#include "database.hh" -#include "schema_builder.hh" -#include "compaction_strategy.hh" - -namespace cql3 { - -namespace statements { - -class cf_prop_defs : public property_definitions { -public: - static const sstring KW_COMMENT; - static const sstring KW_READREPAIRCHANCE; - static const sstring KW_DCLOCALREADREPAIRCHANCE; - static const sstring KW_GCGRACESECONDS; - static const sstring KW_MINCOMPACTIONTHRESHOLD; - static const sstring KW_MAXCOMPACTIONTHRESHOLD; - static const sstring KW_CACHING; - static const sstring KW_DEFAULT_TIME_TO_LIVE; - static const sstring KW_MIN_INDEX_INTERVAL; - static const sstring KW_MAX_INDEX_INTERVAL; - static const sstring KW_SPECULATIVE_RETRY; - static const sstring KW_BF_FP_CHANCE; - static const sstring KW_MEMTABLE_FLUSH_PERIOD; - - static const sstring KW_COMPACTION; - static const sstring KW_COMPRESSION; - static const sstring KW_CRC_CHECK_CHANCE; - - static const sstring COMPACTION_STRATEGY_CLASS_KEY; - static const sstring COMPACTION_ENABLED_KEY; - - // FIXME: In origin the following consts are in CFMetaData. - static constexpr int32_t DEFAULT_DEFAULT_TIME_TO_LIVE = 0; - static constexpr int32_t DEFAULT_MIN_INDEX_INTERVAL = 128; - static constexpr int32_t DEFAULT_MAX_INDEX_INTERVAL = 2048; -private: - std::experimental::optional _compaction_strategy_class; -public: - void validate(); - std::map get_compaction_options() const; - stdx::optional> get_compression_options() const; -#if 0 - public CachingOptions getCachingOptions() throws SyntaxException, ConfigurationException - { - CachingOptions options = null; - Object val = properties.get(KW_CACHING); - if (val == null) - return null; - else if (val instanceof Map) - options = CachingOptions.fromMap(getMap(KW_CACHING)); - else if (val instanceof String) // legacy syntax - { - options = CachingOptions.fromString(getSimple(KW_CACHING)); - logger.warn("Setting caching options with deprecated syntax."); - } - return options; - } -#endif - int32_t get_default_time_to_live() const; - int32_t get_gc_grace_seconds() const; - - void apply_to_builder(schema_builder& builder); - void validate_minimum_int(const sstring& field, int32_t minimum_value, int32_t default_value) const; -}; - -} - -} diff --git a/scylla/cql3/statements/cf_properties.hh b/scylla/cql3/statements/cf_properties.hh deleted file mode 100644 index b6be059..0000000 --- a/scylla/cql3/statements/cf_properties.hh +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright 2016 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "cql3/statements/cf_prop_defs.hh" - -namespace cql3 { - -namespace statements { - -/** - * Class for common statement properties. - */ -class cf_properties final { - const ::shared_ptr _properties = ::make_shared(); - bool _use_compact_storage = false; - std::vector, bool>> _defined_ordering; // Insertion ordering is important -public: - auto& properties() const { - return _properties; - } - - bool use_compact_storage() const { - return _use_compact_storage; - } - - void set_compact_storage() { - _use_compact_storage = true; - } - - auto& defined_ordering() const { - return _defined_ordering; - } - - data_type get_reversable_type(::shared_ptr t, data_type type) const { - auto is_reversed = find_ordering_info(t).value_or(false); - if (!is_reversed && type->is_reversed()) { - return static_pointer_cast(type)->underlying_type(); - } - if (is_reversed && !type->is_reversed()) { - return reversed_type_impl::get_instance(type); - } - return type; - } - - std::experimental::optional find_ordering_info(::shared_ptr type) const { - for (auto& t: _defined_ordering) { - if (*(t.first) == *type) { - return t.second; - } - } - return {}; - } - - void set_ordering(::shared_ptr alias, bool reversed) { - _defined_ordering.emplace_back(alias, reversed); - } - - void validate() { - _properties->validate(); - } -}; - -} - -} diff --git a/scylla/cql3/statements/cf_statement.cc b/scylla/cql3/statements/cf_statement.cc deleted file mode 100644 index 775ad09..0000000 --- a/scylla/cql3/statements/cf_statement.cc +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright 2014-2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "raw/cf_statement.hh" -#include "service/client_state.hh" - -namespace cql3 { - -namespace statements { - -namespace raw { - -cf_statement::cf_statement(::shared_ptr cf_name) - : _cf_name(std::move(cf_name)) -{ -} - -void cf_statement::prepare_keyspace(const service::client_state& state) -{ - if (!_cf_name->has_keyspace()) { - // XXX: We explicitely only want to call state.getKeyspace() in this case, as we don't want to throw - // if not logged in any keyspace but a keyspace is explicitely set on the statement. So don't move - // the call outside the 'if' or replace the method by 'prepareKeyspace(state.getKeyspace())' - _cf_name->set_keyspace(state.get_keyspace(), true); - } -} - -void cf_statement::prepare_keyspace(sstring keyspace) -{ - if (!_cf_name->has_keyspace()) { - _cf_name->set_keyspace(keyspace, true); - } -} - -const sstring& cf_statement::keyspace() const -{ - assert(_cf_name->has_keyspace()); // "The statement hasn't be prepared correctly"; - return _cf_name->get_keyspace(); -} - -const sstring& cf_statement::column_family() const -{ - return _cf_name->get_column_family(); -} - -} - -} - -} diff --git a/scylla/cql3/statements/create_index_statement.cc b/scylla/cql3/statements/create_index_statement.cc deleted file mode 100644 index 91403fb..0000000 --- a/scylla/cql3/statements/create_index_statement.cc +++ /dev/null @@ -1,277 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "create_index_statement.hh" -#include "prepared_statement.hh" -#include "validation.hh" -#include "service/storage_proxy.hh" -#include "service/migration_manager.hh" -#include "service/storage_service.hh" -#include "schema.hh" -#include "schema_builder.hh" - -#include -#include - -namespace cql3 { - -namespace statements { - -create_index_statement::create_index_statement(::shared_ptr name, - ::shared_ptr index_name, - std::vector<::shared_ptr> raw_targets, - ::shared_ptr properties, - bool if_not_exists) - : schema_altering_statement(name) - , _index_name(index_name->get_idx()) - , _raw_targets(raw_targets) - , _properties(properties) - , _if_not_exists(if_not_exists) -{ -} - -future<> -create_index_statement::check_access(const service::client_state& state) { - return state.has_column_family_access(keyspace(), column_family(), auth::permission::ALTER); -} - -void -create_index_statement::validate(distributed& proxy, const service::client_state& state) -{ - auto& db = proxy.local().get_db().local(); - auto schema = validation::validate_column_family(db, keyspace(), column_family()); - - if (schema->is_counter()) { - throw exceptions::invalid_request_exception("Secondary indexes are not supported on counter tables"); - } - - if (schema->is_view()) { - throw exceptions::invalid_request_exception("Secondary indexes are not supported on materialized views"); - } - - std::vector<::shared_ptr> targets; - for (auto& raw_target : _raw_targets) { - targets.emplace_back(raw_target->prepare(schema)); - } - - if (targets.empty() && !_properties->is_custom) { - throw exceptions::invalid_request_exception("Only CUSTOM indexes can be created without specifying a target column"); - } - - if (targets.size() > 1) { - validate_targets_for_multi_column_index(targets); - } - - for (auto& target : targets) { - auto cd = schema->get_column_definition(target->column->name()); - - if (cd == nullptr) { - throw exceptions::invalid_request_exception( - sprint("No column definition found for column %s", *target->column)); - } - - // Origin TODO: we could lift that limitation - if ((schema->is_dense() || !schema->thrift().has_compound_comparator()) && - cd->kind != column_kind::regular_column) { - throw exceptions::invalid_request_exception( - "Secondary indexes are not supported on PRIMARY KEY columns in COMPACT STORAGE tables"); - } - - if (cd->kind == column_kind::partition_key && cd->is_on_all_components()) { - throw exceptions::invalid_request_exception( - sprint( - "Cannot create secondary index on partition key column %s", - *target->column)); - } - - bool is_map = dynamic_cast(cd->type.get()) != nullptr - && dynamic_cast(cd->type.get())->is_map(); - bool is_frozen_collection = cd->type->is_collection() && !cd->type->is_multi_cell(); - - if (is_frozen_collection) { - validate_for_frozen_collection(target); - } else { - validate_not_full_index(target); - validate_is_values_index_if_target_column_not_collection(cd, target); - validate_target_column_is_map_if_index_involves_keys(is_map, target); - } - } - - if (db.existing_index_names(keyspace()).count(_index_name) > 0) { - if (_if_not_exists) { - return; - } else { - throw exceptions::invalid_request_exception("Index already exists"); - } - } - - _properties->validate(); -} - -void create_index_statement::validate_for_frozen_collection(::shared_ptr target) const -{ - if (target->type != index_target::target_type::full) { - throw exceptions::invalid_request_exception( - sprint("Cannot create index on %s of frozen column %s", - index_target::index_option(target->type), - *target->column)); - } -} - -void create_index_statement::validate_not_full_index(::shared_ptr target) const -{ - if (target->type == index_target::target_type::full) { - throw exceptions::invalid_request_exception("full() indexes can only be created on frozen collections"); - } -} - -void create_index_statement::validate_is_values_index_if_target_column_not_collection( - const column_definition* cd, ::shared_ptr target) const -{ - if (!cd->type->is_collection() - && target->type != index_target::target_type::values) { - throw exceptions::invalid_request_exception( - sprint("Cannot create index on %s of column %s; only non-frozen collections support %s indexes", - index_target::index_option(target->type), - *target->column, - index_target::index_option(target->type))); - } -} - -void create_index_statement::validate_target_column_is_map_if_index_involves_keys(bool is_map, ::shared_ptr target) const -{ - if (target->type == index_target::target_type::keys - || target->type == index_target::target_type::keys_and_values) { - if (!is_map) { - throw exceptions::invalid_request_exception( - sprint("Cannot create index on %s of column %s with non-map type", - index_target::index_option(target->type), *target->column)); - } - } -} - -void create_index_statement::validate_targets_for_multi_column_index(std::vector<::shared_ptr> targets) const -{ - if (!_properties->is_custom) { - throw exceptions::invalid_request_exception("Only CUSTOM indexes support multiple columns"); - } - std::unordered_set<::shared_ptr> columns; - for (auto& target : targets) { - if (columns.count(target->column) > 0) { - throw exceptions::invalid_request_exception(sprint("Duplicate column %s in index target list", target->column->name())); - } - columns.emplace(target->column); - } -} - -future<::shared_ptr> -create_index_statement::announce_migration(distributed& proxy, bool is_local_only) { - if (!service::get_local_storage_service().cluster_supports_indexes()) { - throw exceptions::invalid_request_exception("Index support is not enabled"); - } - auto& db = proxy.local().get_db().local(); - auto schema = db.find_schema(keyspace(), column_family()); - std::vector<::shared_ptr> targets; - for (auto& raw_target : _raw_targets) { - targets.emplace_back(raw_target->prepare(schema)); - } - sstring accepted_name = _index_name; - if (accepted_name.empty()) { - std::experimental::optional index_name_root; - if (targets.size() == 1) { - index_name_root = targets[0]->column->to_string(); - } - accepted_name = db.get_available_index_name(keyspace(), column_family(), index_name_root); - } - index_metadata_kind kind; - index_options_map index_options; - if (_properties->is_custom) { - kind = index_metadata_kind::custom; - index_options = _properties->get_options(); - } else { - kind = schema->is_compound() ? index_metadata_kind::composites : index_metadata_kind::keys; - } - auto index = make_index_metadata(schema, targets, accepted_name, kind, index_options); - auto existing_index = schema->find_index_noname(index); - if (existing_index) { - if (_if_not_exists) { - return make_ready_future<::shared_ptr>(nullptr); - } else { - throw exceptions::invalid_request_exception( - sprint("Index %s is a duplicate of existing index %s", index.name(), existing_index.value().name())); - } - } - schema_builder builder{schema}; - builder.with_index(index); - return service::get_local_migration_manager().announce_column_family_update( - builder.build(), false, {}, is_local_only).then([this]() { - using namespace cql_transport; - return make_shared( - event::schema_change::change_type::UPDATED, - event::schema_change::target_type::TABLE, - keyspace(), - column_family()); - }); -} - -std::unique_ptr -create_index_statement::prepare(database& db, cql_stats& stats) { - return std::make_unique(make_shared(*this)); -} - -index_metadata create_index_statement::make_index_metadata(schema_ptr schema, - const std::vector<::shared_ptr>& targets, - const sstring& name, - index_metadata_kind kind, - const index_options_map& options) -{ - index_options_map new_options = options; - auto target_option = boost::algorithm::join(targets | boost::adaptors::transformed( - [schema](const auto &target) -> sstring { - return target->as_cql_string(schema); - }), ","); - new_options.emplace(index_target::target_option_name, target_option); - return index_metadata{name, new_options, kind}; -} - -} - -} diff --git a/scylla/cql3/statements/create_index_statement.hh b/scylla/cql3/statements/create_index_statement.hh deleted file mode 100644 index 04bb063..0000000 --- a/scylla/cql3/statements/create_index_statement.hh +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "schema_altering_statement.hh" -#include "index_prop_defs.hh" -#include "index_target.hh" -#include "raw/cf_statement.hh" - -#include "cql3/index_name.hh" -#include "cql3/cql3_type.hh" - -#include "service/migration_manager.hh" -#include "schema.hh" - -#include "core/shared_ptr.hh" - -#include -#include -#include -#include - - -namespace cql3 { - -namespace statements { - -/** A CREATE INDEX statement parsed from a CQL query. */ -class create_index_statement : public schema_altering_statement { - const sstring _index_name; - const std::vector<::shared_ptr> _raw_targets; - const ::shared_ptr _properties; - const bool _if_not_exists; - - -public: - create_index_statement(::shared_ptr name, ::shared_ptr index_name, - std::vector<::shared_ptr> raw_targets, - ::shared_ptr properties, bool if_not_exists); - - future<> check_access(const service::client_state& state) override; - void validate(distributed&, const service::client_state& state) override; - future<::shared_ptr> announce_migration(distributed&, bool is_local_only) override; - - virtual std::unique_ptr prepare(database& db, cql_stats& stats) override; -private: - void validate_for_frozen_collection(::shared_ptr target) const; - void validate_not_full_index(::shared_ptr target) const; - void validate_is_values_index_if_target_column_not_collection(const column_definition* cd, - ::shared_ptr target) const; - void validate_target_column_is_map_if_index_involves_keys(bool is_map, ::shared_ptr target) const; - void validate_targets_for_multi_column_index(std::vector<::shared_ptr> targets) const; - static index_metadata make_index_metadata(schema_ptr schema, - const std::vector<::shared_ptr>& targets, - const sstring& name, - index_metadata_kind kind, - const index_options_map& options); -}; - -} -} diff --git a/scylla/cql3/statements/create_keyspace_statement.cc b/scylla/cql3/statements/create_keyspace_statement.cc deleted file mode 100644 index c31f6c2..0000000 --- a/scylla/cql3/statements/create_keyspace_statement.cc +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "cql3/statements/create_keyspace_statement.hh" -#include "prepared_statement.hh" - -#include "service/migration_manager.hh" - -#include - -namespace cql3 { - -namespace statements { - -create_keyspace_statement::create_keyspace_statement(const sstring& name, shared_ptr attrs, bool if_not_exists) - : _name{name} - , _attrs{attrs} - , _if_not_exists{if_not_exists} -{ -} - -const sstring& create_keyspace_statement::keyspace() const -{ - return _name; -} - -future<> create_keyspace_statement::check_access(const service::client_state& state) -{ - return state.has_all_keyspaces_access(auth::permission::CREATE); -} - -void create_keyspace_statement::validate(distributed&, const service::client_state& state) -{ - std::string name; - name.resize(_name.length()); - std::transform(_name.begin(), _name.end(), name.begin(), ::tolower); - if (is_system_keyspace(name)) { - throw exceptions::invalid_request_exception("system keyspace is not user-modifiable"); - } - // keyspace name - std::regex name_regex("\\w+"); - if (!std::regex_match(name, name_regex)) { - throw exceptions::invalid_request_exception(sprint("\"%s\" is not a valid keyspace name", _name.c_str())); - } - if (name.length() > schema::NAME_LENGTH) { - throw exceptions::invalid_request_exception(sprint("Keyspace names shouldn't be more than %d characters long (got \"%s\")", schema::NAME_LENGTH, _name.c_str())); - } - - _attrs->validate(); - - if (!bool(_attrs->get_replication_strategy_class())) { - throw exceptions::configuration_exception("Missing mandatory replication strategy class"); - } -#if 0 - // The strategy is validated through KSMetaData.validate() in announceNewKeyspace below. - // However, for backward compatibility with thrift, this doesn't validate unexpected options yet, - // so doing proper validation here. - AbstractReplicationStrategy.validateReplicationStrategy(name, - AbstractReplicationStrategy.getClass(attrs.getReplicationStrategyClass()), - StorageService.instance.getTokenMetadata(), - DatabaseDescriptor.getEndpointSnitch(), - attrs.getReplicationOptions()); -#endif -} - -future> create_keyspace_statement::announce_migration(distributed& proxy, bool is_local_only) -{ - return make_ready_future<>().then([this, is_local_only] { - return service::get_local_migration_manager().announce_new_keyspace(_attrs->as_ks_metadata(_name), is_local_only); - }).then_wrapped([this] (auto&& f) { - try { - f.get(); - using namespace cql_transport; - return make_shared( - event::schema_change::change_type::CREATED, - this->keyspace()); - } catch (const exceptions::already_exists_exception& e) { - if (_if_not_exists) { - return ::shared_ptr(); - } - throw e; - } - }); -} - -std::unique_ptr -cql3::statements::create_keyspace_statement::prepare(database& db, cql_stats& stats) { - return std::make_unique(make_shared(*this)); -} - -} - -} diff --git a/scylla/cql3/statements/create_keyspace_statement.hh b/scylla/cql3/statements/create_keyspace_statement.hh deleted file mode 100644 index 13487a3..0000000 --- a/scylla/cql3/statements/create_keyspace_statement.hh +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "cql3/statements/schema_altering_statement.hh" -#include "cql3/statements/ks_prop_defs.hh" -#include "transport/event.hh" - -#include "core/shared_ptr.hh" - -namespace cql3 { - -namespace statements { - -/** A CREATE KEYSPACE statement parsed from a CQL query. */ -class create_keyspace_statement : public schema_altering_statement { -private: - sstring _name; - shared_ptr _attrs; - bool _if_not_exists; - -public: - /** - * Creates a new CreateKeyspaceStatement instance for a given - * keyspace name and keyword arguments. - * - * @param name the name of the keyspace to create - * @param attrs map of the raw keyword arguments that followed the WITH keyword. - */ - create_keyspace_statement(const sstring& name, shared_ptr attrs, bool if_not_exists); - - virtual const sstring& keyspace() const override; - - virtual future<> check_access(const service::client_state& state) override; - - /** - * The CqlParser only goes as far as extracting the keyword arguments - * from these statements, so this method is responsible for processing and - * validating. - * - * @throws InvalidRequestException if arguments are missing or unacceptable - */ - virtual void validate(distributed&, const service::client_state& state) override; - - virtual future> announce_migration(distributed& proxy, bool is_local_only) override; - - virtual std::unique_ptr prepare(database& db, cql_stats& stats) override; -}; - -} - -} diff --git a/scylla/cql3/statements/create_table_statement.cc b/scylla/cql3/statements/create_table_statement.cc deleted file mode 100644 index 0d8b60b..0000000 --- a/scylla/cql3/statements/create_table_statement.cc +++ /dev/null @@ -1,390 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - - -#include -#include - -#include -#include - -#include "cql3/statements/create_table_statement.hh" -#include "cql3/statements/prepared_statement.hh" - -#include "schema_builder.hh" -#include "service/storage_service.hh" - -namespace cql3 { - -namespace statements { - -create_table_statement::create_table_statement(::shared_ptr name, - ::shared_ptr properties, - bool if_not_exists, - column_set_type static_columns) - : schema_altering_statement{name} - , _static_columns{static_columns} - , _properties{properties} - , _if_not_exists{if_not_exists} -{ -} - -future<> create_table_statement::check_access(const service::client_state& state) { - return state.has_keyspace_access(keyspace(), auth::permission::CREATE); -} - -void create_table_statement::validate(distributed&, const service::client_state& state) { - // validated in announceMigration() -} - -// Column definitions -std::vector create_table_statement::get_columns() -{ - std::vector column_defs; - for (auto&& col : _columns) { - column_kind kind = column_kind::regular_column; - if (_static_columns.count(col.first)) { - kind = column_kind::static_column; - } - column_defs.emplace_back(col.first->name(), col.second, kind); - } - return column_defs; -} - -future> create_table_statement::announce_migration(distributed& proxy, bool is_local_only) { - return make_ready_future<>().then([this, is_local_only] { - return service::get_local_migration_manager().announce_new_column_family(get_cf_meta_data(), is_local_only); - }).then_wrapped([this] (auto&& f) { - try { - f.get(); - using namespace cql_transport; - return make_shared( - event::schema_change::change_type::CREATED, - event::schema_change::target_type::TABLE, - this->keyspace(), - this->column_family()); - } catch (const exceptions::already_exists_exception& e) { - if (_if_not_exists) { - return ::shared_ptr(); - } - throw e; - } - }); -} - -/** - * Returns a CFMetaData instance based on the parameters parsed from this - * CREATE statement, or defaults where applicable. - * - * @return a CFMetaData instance corresponding to the values parsed from this statement - * @throws InvalidRequestException on failure to validate parsed parameters - */ -schema_ptr create_table_statement::get_cf_meta_data() { - schema_builder builder{keyspace(), column_family()}; - apply_properties_to(builder); - return builder.build(_use_compact_storage ? schema_builder::compact_storage::yes : schema_builder::compact_storage::no); -} - -void create_table_statement::apply_properties_to(schema_builder& builder) { - auto&& columns = get_columns(); - for (auto&& column : columns) { - builder.with_column(column); - } -#if 0 - cfmd.defaultValidator(defaultValidator) - .addAllColumnDefinitions(getColumns(cfmd)) -#endif - add_column_metadata_from_aliases(builder, _key_aliases, _partition_key_types, column_kind::partition_key); - add_column_metadata_from_aliases(builder, _column_aliases, _clustering_key_types, column_kind::clustering_key); -#if 0 - if (valueAlias != null) - addColumnMetadataFromAliases(cfmd, Collections.singletonList(valueAlias), defaultValidator, ColumnDefinition.Kind.COMPACT_VALUE); -#endif - - _properties->apply_to_builder(builder); -} - -void create_table_statement::add_column_metadata_from_aliases(schema_builder& builder, std::vector aliases, const std::vector& types, column_kind kind) -{ - assert(aliases.size() == types.size()); - for (size_t i = 0; i < aliases.size(); i++) { - if (!aliases[i].empty()) { - builder.with_column(aliases[i], types[i], kind); - } - } -} - -std::unique_ptr -create_table_statement::prepare(database& db, cql_stats& stats) { - // Cannot happen; create_table_statement is never instantiated as a raw statement - // (instead we instantiate create_table_statement::raw_statement) - abort(); -} - - -create_table_statement::raw_statement::raw_statement(::shared_ptr name, bool if_not_exists) - : cf_statement{std::move(name)} - , _if_not_exists{if_not_exists} -{ } - -std::unique_ptr create_table_statement::raw_statement::prepare(database& db, cql_stats& stats) { - // Column family name - const sstring& cf_name = _cf_name->get_column_family(); - std::regex name_regex("\\w+"); - if (!std::regex_match(std::string(cf_name), name_regex)) { - throw exceptions::invalid_request_exception(sprint("\"%s\" is not a valid table name (must be alphanumeric character only: [0-9A-Za-z]+)", cf_name.c_str())); - } - if (cf_name.size() > size_t(schema::NAME_LENGTH)) { - throw exceptions::invalid_request_exception(sprint("Table names shouldn't be more than %d characters long (got \"%s\")", schema::NAME_LENGTH, cf_name.c_str())); - } - - // Check for duplicate column names - auto i = boost::range::adjacent_find(_defined_names, [] (auto&& e1, auto&& e2) { - return e1->text() == e2->text(); - }); - if (i != _defined_names.end()) { - throw exceptions::invalid_request_exception(sprint("Multiple definition of identifier %s", (*i)->text())); - } - - _properties.validate(); - - auto stmt = ::make_shared(_cf_name, _properties.properties(), _if_not_exists, _static_columns); - - std::experimental::optional> defined_multi_cell_collections; - for (auto&& entry : _definitions) { - ::shared_ptr id = entry.first; - ::shared_ptr pt = entry.second->prepare(db, keyspace()); - if (pt->is_counter() && !service::get_local_storage_service().cluster_supports_counters()) { - throw exceptions::invalid_request_exception("Counter support is not enabled"); - } - if (pt->is_collection() && pt->get_type()->is_multi_cell()) { - if (!defined_multi_cell_collections) { - defined_multi_cell_collections = std::map{}; - } - defined_multi_cell_collections->emplace(id->name(), pt->get_type()); - } - stmt->_columns.emplace(id, pt->get_type()); // we'll remove what is not a column below - } - if (_key_aliases.empty()) { - throw exceptions::invalid_request_exception("No PRIMARY KEY specifed (exactly one required)"); - } else if (_key_aliases.size() > 1) { - throw exceptions::invalid_request_exception("Multiple PRIMARY KEYs specifed (exactly one required)"); - } - - stmt->_use_compact_storage = _properties.use_compact_storage(); - - auto& key_aliases = _key_aliases[0]; - std::vector key_types; - for (auto&& alias : key_aliases) { - stmt->_key_aliases.emplace_back(alias->name()); - auto t = get_type_and_remove(stmt->_columns, alias); - if (t->is_counter()) { - throw exceptions::invalid_request_exception(sprint("counter type is not supported for PRIMARY KEY part %s", alias->text())); - } - if (_static_columns.count(alias) > 0) { - throw exceptions::invalid_request_exception(sprint("Static column %s cannot be part of the PRIMARY KEY", alias->text())); - } - key_types.emplace_back(t); - } - stmt->_partition_key_types = key_types; - - // Handle column aliases - if (_column_aliases.empty()) { - if (_properties.use_compact_storage()) { - // There should remain some column definition since it is a non-composite "static" CF - if (stmt->_columns.empty()) { - throw exceptions::invalid_request_exception("No definition found that is not part of the PRIMARY KEY"); - } - if (defined_multi_cell_collections) { - throw exceptions::invalid_request_exception("Non-frozen collection types are not supported with COMPACT STORAGE"); - } - } - stmt->_clustering_key_types = std::vector{}; - } else { - // If we use compact storage and have only one alias, it is a - // standard "dynamic" CF, otherwise it's a composite - if (_properties.use_compact_storage() && _column_aliases.size() == 1) { - if (defined_multi_cell_collections) { - throw exceptions::invalid_request_exception("Collection types are not supported with COMPACT STORAGE"); - } - auto alias = _column_aliases[0]; - if (_static_columns.count(alias) > 0) { - throw exceptions::invalid_request_exception(sprint("Static column %s cannot be part of the PRIMARY KEY", alias->text())); - } - stmt->_column_aliases.emplace_back(alias->name()); - auto at = get_type_and_remove(stmt->_columns, alias); - if (at->is_counter()) { - throw exceptions::invalid_request_exception(sprint("counter type is not supported for PRIMARY KEY part %s", stmt->_column_aliases[0])); - } - stmt->_clustering_key_types.emplace_back(at); - } else { - std::vector types; - for (auto&& t : _column_aliases) { - stmt->_column_aliases.emplace_back(t->name()); - auto type = get_type_and_remove(stmt->_columns, t); - if (type->is_counter()) { - throw exceptions::invalid_request_exception(sprint("counter type is not supported for PRIMARY KEY part %s", t->text())); - } - if (_static_columns.count(t) > 0) { - throw exceptions::invalid_request_exception(sprint("Static column %s cannot be part of the PRIMARY KEY", t->text())); - } - types.emplace_back(type); - } - - if (_properties.use_compact_storage()) { - if (defined_multi_cell_collections) { - throw exceptions::invalid_request_exception("Collection types are not supported with COMPACT STORAGE"); - } - stmt->_clustering_key_types = types; - } else { - stmt->_clustering_key_types = types; - } - } - } - - if (!_static_columns.empty()) { - // Only CQL3 tables can have static columns - if (_properties.use_compact_storage()) { - throw exceptions::invalid_request_exception("Static columns are not supported in COMPACT STORAGE tables"); - } - // Static columns only make sense if we have at least one clustering column. Otherwise everything is static anyway - if (_column_aliases.empty()) { - throw exceptions::invalid_request_exception("Static columns are only useful (and thus allowed) if the table has at least one clustering column"); - } - } - - if (_properties.use_compact_storage() && !stmt->_column_aliases.empty()) { - if (stmt->_columns.empty()) { -#if 0 - // The only value we'll insert will be the empty one, so the default validator don't matter - stmt.defaultValidator = BytesType.instance; - // We need to distinguish between - // * I'm upgrading from thrift so the valueAlias is null - // * I've defined my table with only a PK (and the column value will be empty) - // So, we use an empty valueAlias (rather than null) for the second case - stmt.valueAlias = ByteBufferUtil.EMPTY_BYTE_BUFFER; -#endif - } else { - if (stmt->_columns.size() > 1) { - throw exceptions::invalid_request_exception(sprint("COMPACT STORAGE with composite PRIMARY KEY allows no more than one column not part of the PRIMARY KEY (got: %s)", - ::join( ", ", stmt->_columns | boost::adaptors::map_keys))); - } -#if 0 - Map.Entry lastEntry = stmt.columns.entrySet().iterator().next(); - stmt.defaultValidator = lastEntry.getValue(); - stmt.valueAlias = lastEntry.getKey().bytes; - stmt.columns.remove(lastEntry.getKey()); -#endif - } - } else { - // For compact, we are in the "static" case, so we need at least one column defined. For non-compact however, having - // just the PK is fine since we have CQL3 row marker. - if (_properties.use_compact_storage() && stmt->_columns.empty()) { - throw exceptions::invalid_request_exception("COMPACT STORAGE with non-composite PRIMARY KEY require one column not part of the PRIMARY KEY, none given"); - } -#if 0 - // There is no way to insert/access a column that is not defined for non-compact storage, so - // the actual validator don't matter much (except that we want to recognize counter CF as limitation apply to them). - stmt.defaultValidator = !stmt.columns.isEmpty() && (stmt.columns.values().iterator().next() instanceof CounterColumnType) - ? CounterColumnType.instance - : BytesType.instance; -#endif - } - - // If we give a clustering order, we must explicitly do so for all aliases and in the order of the PK - if (!_properties.defined_ordering().empty()) { - if (_properties.defined_ordering().size() > _column_aliases.size()) { - throw exceptions::invalid_request_exception("Only clustering key columns can be defined in CLUSTERING ORDER directive"); - } - - int i = 0; - for (auto& pair: _properties.defined_ordering()){ - auto& id = pair.first; - auto& c = _column_aliases.at(i); - - if (!(*id == *c)) { - if (_properties.find_ordering_info(c)) { - throw exceptions::invalid_request_exception(sprint("The order of columns in the CLUSTERING ORDER directive must be the one of the clustering key (%s must appear before %s)", c, id)); - } else { - throw exceptions::invalid_request_exception(sprint("Missing CLUSTERING ORDER for column %s", c)); - } - } - ++i; - } - } - - return std::make_unique(stmt); -} - -data_type create_table_statement::raw_statement::get_type_and_remove(column_map_type& columns, ::shared_ptr t) -{ - auto it = columns.find(t); - if (it == columns.end()) { - throw exceptions::invalid_request_exception(sprint("Unknown definition %s referenced in PRIMARY KEY", t->text())); - } - auto type = it->second; - if (type->is_collection() && type->is_multi_cell()) { - throw exceptions::invalid_request_exception(sprint("Invalid collection type for PRIMARY KEY component %s", t->text())); - } - columns.erase(t); - - return _properties.get_reversable_type(t, type); -} - -void create_table_statement::raw_statement::add_definition(::shared_ptr def, ::shared_ptr type, bool is_static) { - _defined_names.emplace(def); - _definitions.emplace(def, type); - if (is_static) { - _static_columns.emplace(def); - } -} - -void create_table_statement::raw_statement::add_key_aliases(const std::vector<::shared_ptr> aliases) { - _key_aliases.emplace_back(aliases); -} - -void create_table_statement::raw_statement::add_column_alias(::shared_ptr alias) { - _column_aliases.emplace_back(alias); -} - -} - -} diff --git a/scylla/cql3/statements/create_table_statement.hh b/scylla/cql3/statements/create_table_statement.hh deleted file mode 100644 index 521ca2b..0000000 --- a/scylla/cql3/statements/create_table_statement.hh +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "cql3/statements/schema_altering_statement.hh" -#include "cql3/statements/cf_prop_defs.hh" -#include "cql3/statements/cf_properties.hh" -#include "cql3/statements/raw/cf_statement.hh" -#include "cql3/cql3_type.hh" - -#include "service/migration_manager.hh" -#include "schema.hh" - -#include "core/shared_ptr.hh" - -#include -#include -#include -#include -#include -#include - -namespace cql3 { - -namespace statements { - -/** A CREATE TABLE parsed from a CQL query statement. */ -class create_table_statement : public schema_altering_statement { -#if 0 - private AbstractType defaultValidator; -#endif - std::vector _partition_key_types; - std::vector _clustering_key_types; - std::vector _key_aliases; - std::vector _column_aliases; -#if 0 - private ByteBuffer valueAlias; -#endif - bool _use_compact_storage; - - using column_map_type = - std::unordered_map<::shared_ptr, - data_type, - shared_ptr_value_hash, - shared_ptr_equal_by_value>; - using column_set_type = - std::unordered_set<::shared_ptr, - shared_ptr_value_hash, - shared_ptr_equal_by_value>; - column_map_type _columns; - column_set_type _static_columns; - const ::shared_ptr _properties; - const bool _if_not_exists; -public: - create_table_statement(::shared_ptr name, - ::shared_ptr properties, - bool if_not_exists, - column_set_type static_columns); - - virtual future<> check_access(const service::client_state& state) override; - - virtual void validate(distributed&, const service::client_state& state) override; - - virtual future> announce_migration(distributed& proxy, bool is_local_only) override; - - virtual std::unique_ptr prepare(database& db, cql_stats& stats) override; - - schema_ptr get_cf_meta_data(); - - class raw_statement; - - friend raw_statement; -private: - std::vector get_columns(); - - void apply_properties_to(schema_builder& builder); - - void add_column_metadata_from_aliases(schema_builder& builder, std::vector aliases, const std::vector& types, column_kind kind); -}; - -class create_table_statement::raw_statement : public raw::cf_statement { -private: - using defs_type = std::unordered_map<::shared_ptr, - ::shared_ptr, - shared_ptr_value_hash, - shared_ptr_equal_by_value>; - defs_type _definitions; - std::vector>> _key_aliases; - std::vector<::shared_ptr> _column_aliases; - create_table_statement::column_set_type _static_columns; - - std::multiset<::shared_ptr, - indirect_less<::shared_ptr, column_identifier::text_comparator>> _defined_names; - bool _if_not_exists; - cf_properties _properties; -public: - raw_statement(::shared_ptr name, bool if_not_exists); - - virtual std::unique_ptr prepare(database& db, cql_stats& stats) override; - - cf_properties& properties() { - return _properties; - } - - data_type get_type_and_remove(column_map_type& columns, ::shared_ptr t); - - void add_definition(::shared_ptr def, ::shared_ptr type, bool is_static); - - void add_key_aliases(const std::vector<::shared_ptr> aliases); - - void add_column_alias(::shared_ptr alias); -}; - -} - -} diff --git a/scylla/cql3/statements/create_type_statement.cc b/scylla/cql3/statements/create_type_statement.cc deleted file mode 100644 index ba89003..0000000 --- a/scylla/cql3/statements/create_type_statement.cc +++ /dev/null @@ -1,164 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright 2016 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "cql3/statements/create_type_statement.hh" -#include "prepared_statement.hh" - -#include "service/migration_manager.hh" - -namespace cql3 { - -namespace statements { - -create_type_statement::create_type_statement(const ut_name& name, bool if_not_exists) - : _name{name} - , _if_not_exists{if_not_exists} -{ -} - -void create_type_statement::prepare_keyspace(const service::client_state& state) -{ - if (!_name.has_keyspace()) { - _name.set_keyspace(state.get_keyspace()); - } -} - -void create_type_statement::add_definition(::shared_ptr name, ::shared_ptr type) -{ - _column_names.emplace_back(name); - _column_types.emplace_back(type); -} - -future<> create_type_statement::check_access(const service::client_state& state) -{ - return state.has_keyspace_access(keyspace(), auth::permission::CREATE); -} - -inline bool create_type_statement::type_exists_in(::keyspace& ks) -{ - auto&& keyspace_types = ks.metadata()->user_types()->get_all_types(); - return keyspace_types.find(_name.get_user_type_name()) != keyspace_types.end(); -} - -void create_type_statement::validate(distributed& proxy, const service::client_state& state) -{ - try { - auto&& ks = proxy.local().get_db().local().find_keyspace(keyspace()); - if (type_exists_in(ks) && !_if_not_exists) { - throw exceptions::invalid_request_exception(sprint("A user type of name %s already exists", _name.to_string())); - } - } catch (no_such_keyspace& e) { - throw exceptions::invalid_request_exception(sprint("Cannot add type in unknown keyspace %s", keyspace())); - } - - for (auto&& type : _column_types) { - if (type->is_counter()) { - throw exceptions::invalid_request_exception(sprint("A user type cannot contain counters")); - } - } -} - -void create_type_statement::check_for_duplicate_names(user_type type) -{ - auto names = type->field_names(); - for (auto i = names.cbegin(); i < names.cend() - 1; ++i) { - for (auto j = i + 1; j < names.cend(); ++j) { - if (*i == *j) { - throw exceptions::invalid_request_exception( - sprint("Duplicate field name %s in type %s", to_hex(*i), type->get_name_as_string())); - } - } - } -} - -const sstring& create_type_statement::keyspace() const -{ - return _name.get_keyspace(); -} - -inline user_type create_type_statement::create_type(database& db) -{ - std::vector field_names; - std::vector field_types; - - for (auto&& column_name : _column_names) { - field_names.push_back(column_name->name()); - } - - for (auto&& column_type : _column_types) { - field_types.push_back(column_type->prepare(db, keyspace())->get_type()); - } - - return user_type_impl::get_instance(keyspace(), _name.get_user_type_name(), - std::move(field_names), std::move(field_types)); -} - -future> create_type_statement::announce_migration(distributed& proxy, bool is_local_only) -{ - auto&& db = proxy.local().get_db().local(); - - // Keyspace exists or we wouldn't have validated otherwise - auto&& ks = db.find_keyspace(keyspace()); - - // Can happen with if_not_exists - if (type_exists_in(ks)) { - return make_ready_future<::shared_ptr>(); - } - - auto type = create_type(db); - check_for_duplicate_names(type); - return service::get_local_migration_manager().announce_new_type(type, is_local_only).then([this] { - using namespace cql_transport; - - return make_shared( - event::schema_change::change_type::CREATED, - event::schema_change::target_type::TYPE, - keyspace(), - _name.get_string_type_name()); - }); -} - -std::unique_ptr -create_type_statement::prepare(database& db, cql_stats& stats) { - return std::make_unique(make_shared(*this)); -} - -} - -} diff --git a/scylla/cql3/statements/create_type_statement.hh b/scylla/cql3/statements/create_type_statement.hh deleted file mode 100644 index 28159b8..0000000 --- a/scylla/cql3/statements/create_type_statement.hh +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright 2016 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "cql3/statements/schema_altering_statement.hh" -#include "cql3/cql3_type.hh" -#include "cql3/ut_name.hh" - -namespace cql3 { - -namespace statements { - -class create_type_statement : public schema_altering_statement { - ut_name _name; - std::vector<::shared_ptr> _column_names; - std::vector<::shared_ptr> _column_types; - bool _if_not_exists; -public: - create_type_statement(const ut_name& name, bool if_not_exists); - - virtual void prepare_keyspace(const service::client_state& state) override; - - void add_definition(::shared_ptr name, ::shared_ptr type); - - virtual future<> check_access(const service::client_state& state) override; - - virtual void validate(distributed&, const service::client_state& state) override; - - virtual const sstring& keyspace() const override; - - virtual future> announce_migration(distributed& proxy, bool is_local_only) override; - - virtual std::unique_ptr prepare(database& db, cql_stats& stats) override; - - static void check_for_duplicate_names(user_type type); -private: - bool type_exists_in(::keyspace& ks); - user_type create_type(database& db); -}; - -} - -} diff --git a/scylla/cql3/statements/create_user_statement.cc b/scylla/cql3/statements/create_user_statement.cc deleted file mode 100644 index 2967a7b..0000000 --- a/scylla/cql3/statements/create_user_statement.cc +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright 2016 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "create_user_statement.hh" -#include "auth/auth.hh" -#include "auth/authenticator.hh" - -cql3::statements::create_user_statement::create_user_statement(sstring username, ::shared_ptr opts, bool superuser, bool if_not_exists) - : _username(std::move(username)) - , _opts(std::move(opts)) - , _superuser(superuser) - , _if_not_exists(if_not_exists) -{} - -void cql3::statements::create_user_statement::validate(distributed& proxy, const service::client_state& state) { - if (_username.empty()) { - throw exceptions::invalid_request_exception("Username can't be an empty string"); - } - - _opts->validate(); - - // validate login here before checkAccess to avoid leaking user existence to anonymous users. - state.ensure_not_anonymous(); - - // cannot validate user existence compliant with _if_not_exists here, because - // we need to query -> continuation, and this is not a continuation method -} - -future<::shared_ptr> -cql3::statements::create_user_statement::execute(distributed& proxy, service::query_state& state, const query_options& options) { - return state.get_client_state().user()->is_super().then([this](bool is_super) { - if (!is_super) { - throw exceptions::unauthorized_exception("Only superusers are allowed to perform CREATE USER queries"); - } - return auth::auth::is_existing_user(_username).then([this](bool exists) { - if (exists && !_if_not_exists) { - throw exceptions::invalid_request_exception(sprint("User %s already exists", _username)); - } - if (exists && _if_not_exists) { - make_ready_future<::shared_ptr>(); - } - return auth::authenticator::get().create(_username, _opts->options()).then([this] { - return auth::auth::insert_user(_username, _superuser).then([] { - return make_ready_future<::shared_ptr>(); - }); - }); - }); - }); -} diff --git a/scylla/cql3/statements/create_user_statement.hh b/scylla/cql3/statements/create_user_statement.hh deleted file mode 100644 index 2ca13ae..0000000 --- a/scylla/cql3/statements/create_user_statement.hh +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright 2016 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "authentication_statement.hh" -#include "cql3/user_options.hh" - -namespace cql3 { - -namespace statements { - -class create_user_statement : public authentication_statement { -private: - sstring _username; - ::shared_ptr _opts; - bool _superuser; - bool _if_not_exists; -public: - - create_user_statement(sstring, ::shared_ptr, bool superuser, bool if_not_exists); - - void validate(distributed&, const service::client_state&) override; - - future<::shared_ptr> execute(distributed& - , service::query_state& - , const query_options&) override; -}; - -} - -} diff --git a/scylla/cql3/statements/create_view_statement.cc b/scylla/cql3/statements/create_view_statement.cc deleted file mode 100644 index 166fac1..0000000 --- a/scylla/cql3/statements/create_view_statement.cc +++ /dev/null @@ -1,353 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2016 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include -#include - -#include -#include -#include -#include - -#include "cql3/column_identifier.hh" -#include "cql3/restrictions/statement_restrictions.hh" -#include "cql3/statements/create_view_statement.hh" -#include "cql3/statements/prepared_statement.hh" -#include "cql3/statements/select_statement.hh" -#include "cql3/statements/raw/select_statement.hh" -#include "cql3/selection/selectable.hh" -#include "cql3/selection/selectable_with_field_selection.hh" -#include "cql3/selection/selection.hh" -#include "cql3/selection/writetime_or_ttl.hh" -#include "cql3/util.hh" -#include "schema_builder.hh" -#include "service/storage_proxy.hh" -#include "validation.hh" -#include "db/config.hh" -#include "service/storage_service.hh" - -namespace cql3 { - -namespace statements { - -create_view_statement::create_view_statement( - ::shared_ptr view_name, - ::shared_ptr base_name, - std::vector<::shared_ptr> select_clause, - std::vector<::shared_ptr> where_clause, - std::vector<::shared_ptr> partition_keys, - std::vector<::shared_ptr> clustering_keys, - bool if_not_exists) - : schema_altering_statement{view_name} - , _base_name{base_name} - , _select_clause{select_clause} - , _where_clause{where_clause} - , _partition_keys{partition_keys} - , _clustering_keys{clustering_keys} - , _if_not_exists{if_not_exists} -{ - service::get_local_storage_proxy().get_db().local().get_config().check_experimental("Creating materialized views"); - if (!service::get_local_storage_service().cluster_supports_materialized_views()) { - throw exceptions::invalid_request_exception("Can't create materialized views until the whole cluster has been upgraded"); - } -} - -future<> create_view_statement::check_access(const service::client_state& state) { - return state.has_column_family_access(keyspace(), _base_name->get_column_family(), auth::permission::ALTER); -} - -void create_view_statement::validate(distributed&, const service::client_state& state) { - // validated in announceMigration() -} - -static const column_definition* get_column_definition(schema_ptr schema, column_identifier::raw& identifier) { - auto prepared = identifier.prepare(schema); - assert(dynamic_pointer_cast(prepared)); - auto id = static_pointer_cast(prepared); - return schema->get_column_definition(id->name()); -} - -static bool validate_primary_key( - schema_ptr schema, - const column_definition* def, - const std::unordered_set& base_pk, - bool has_non_pk_column, - const restrictions::statement_restrictions& restrictions) { - - if (def->type->is_multi_cell()) { - throw exceptions::invalid_request_exception(sprint( - "Cannot use MultiCell column '%s' in PRIMARY KEY of materialized view", def->name_as_text())); - } - if (def->is_static()) { - throw exceptions::invalid_request_exception(sprint( - "Cannot use Static column '%s' in PRIMARY KEY of materialized view", def->name_as_text())); - } - - if (base_pk.find(def) == base_pk.end()) { - if (has_non_pk_column) { - throw exceptions::invalid_request_exception(sprint( - "Cannot include more than one non-primary key column '%s' in materialized view primary key", def->name_as_text())); - } - return true; - } - - // We don't need to include the "IS NOT NULL" filter on a non-composite partition key - // because we will never allow a single partition key to be NULL - if (schema->partition_key_columns().size() > 1 && !restrictions.is_restricted(def)) { - throw exceptions::invalid_request_exception(sprint( - "Primary key column '%s' is required to be filtered by 'IS NOT NULL'", def->name_as_text())); - } - - return false; -} - -future> create_view_statement::announce_migration(distributed& proxy, bool is_local_only) { - // We need to make sure that: - // - primary key includes all columns in base table's primary key - // - make sure that the select statement does not have anything other than columns - // and their names match the base table's names - // - make sure that primary key does not include any collections - // - make sure there is no where clause in the select statement - // - make sure there is not currently a table or view - // - make sure base_table gc_grace_seconds > 0 - - _properties.validate(); - - if (_properties.use_compact_storage()) { - throw exceptions::invalid_request_exception(sprint( - "Cannot use 'COMPACT STORAGE' when defining a materialized view")); - } - - // View and base tables must be in the same keyspace, to ensure that RF - // is the same (because we assign a view replica to each base replica). - // If a keyspace was not specified for the base table name, it is assumed - // it is in the same keyspace as the view table being created (which - // itself might be the current USEd keyspace, or explicitly specified). - if (_base_name->get_keyspace().empty()) { - _base_name->set_keyspace(keyspace(), true); - } - if (_base_name->get_keyspace() != keyspace()) { - throw exceptions::invalid_request_exception(sprint( - "Cannot create a materialized view on a table in a separate keyspace ('%s' != '%s')", - _base_name->get_keyspace(), keyspace())); - } - - auto&& db = proxy.local().get_db().local(); - schema_ptr schema = validation::validate_column_family(db, _base_name->get_keyspace(), _base_name->get_column_family()); - - if (schema->is_counter()) { - throw exceptions::invalid_request_exception(sprint( - "Materialized views are not supported on counter tables")); - } - - if (schema->is_view()) { - throw exceptions::invalid_request_exception(sprint( - "Materialized views cannot be created against other materialized views")); - } - - if (schema->gc_grace_seconds().count() == 0) { - throw exceptions::invalid_request_exception(sprint( - "Cannot create materialized view '%s' for base table " - "'%s' with gc_grace_seconds of 0, since this value is " - "used to TTL undelivered updates. Setting gc_grace_seconds " - "too low might cause undelivered updates to expire " - "before being replayed.", column_family(), _base_name->get_column_family())); - } - - // Gather all included columns, as specified by the select clause - auto included = boost::copy_range>(_select_clause | boost::adaptors::transformed([&](auto&& selector) { - if (selector->alias) { - throw exceptions::invalid_request_exception(sprint( - "Cannot use alias when defining a materialized view")); - } - - auto selectable = selector->selectable_; - if (dynamic_pointer_cast(selectable)) { - throw exceptions::invalid_request_exception(sprint( - "Cannot select out a part of type when defining a materialized view")); - } - if (dynamic_pointer_cast(selectable)) { - throw exceptions::invalid_request_exception(sprint( - "Cannot use function when defining a materialized view")); - } - if (dynamic_pointer_cast(selectable)) { - throw exceptions::invalid_request_exception(sprint( - "Cannot use function when defining a materialized view")); - } - - assert(dynamic_pointer_cast(selectable)); - auto identifier = static_pointer_cast(selectable); - auto* def = get_column_definition(schema, *identifier); - if (!def) { - throw exceptions::invalid_request_exception(sprint( - "Unknown column name detected in CREATE MATERIALIZED VIEW statement : ", identifier)); - } - return def; - })); - - if (!get_bound_variables()->empty()) { - throw exceptions::invalid_request_exception(sprint( - "Cannot use query parameters in CREATE MATERIALIZED VIEW statements")); - } - - auto parameters = ::make_shared(raw::select_statement::parameters::orderings_type(), false, true); - raw::select_statement raw_select(_base_name, std::move(parameters), _select_clause, _where_clause, nullptr); - raw_select.prepare_keyspace(keyspace()); - raw_select.set_bound_variables({}); - - cql_stats ignored; - auto prepared = raw_select.prepare(db, ignored, true); - auto restrictions = static_pointer_cast(prepared->statement)->get_restrictions(); - - auto base_primary_key_cols = boost::copy_range>( - boost::range::join(schema->partition_key_columns(), schema->clustering_key_columns()) - | boost::adaptors::transformed([](auto&& def) { return &def; })); - - if (_partition_keys.empty()) { - throw exceptions::invalid_request_exception(sprint("Must select at least a column for a Materialized View")); - } - if (_clustering_keys.empty()) { - throw exceptions::invalid_request_exception(sprint("No columns are defined for Materialized View other than primary key")); - } - - // Validate the primary key clause, ensuring only one non-PK base column is used in the view's PK. - bool has_non_pk_column = false; - std::unordered_set target_primary_keys; - std::vector target_partition_keys; - std::vector target_clustering_keys; - auto validate_pk = [&] (const std::vector<::shared_ptr>& keys, std::vector& target_keys) mutable { - for (auto&& identifier : keys) { - auto* def = get_column_definition(schema, *identifier); - if (!def) { - throw exceptions::invalid_request_exception(sprint( - "Unknown column name detected in CREATE MATERIALIZED VIEW statement : ", identifier)); - } - if (!target_primary_keys.insert(def).second) { - throw exceptions::invalid_request_exception(sprint( - "Duplicate entry found in PRIMARY KEY: ", identifier)); - } - target_keys.push_back(def); - has_non_pk_column |= validate_primary_key(schema, def, base_primary_key_cols, has_non_pk_column, *restrictions); - } - }; - validate_pk(_partition_keys, target_partition_keys); - validate_pk(_clustering_keys, target_clustering_keys); - - std::vector missing_pk_columns; - std::vector target_non_pk_columns; - - // We need to include all of the primary key columns from the base table in order to make sure that we do not - // overwrite values in the view. We cannot support "collapsing" the base table into a smaller number of rows in - // the view because if we need to generate a tombstone, we have no way of knowing which value is currently being - // used in the view and whether or not to generate a tombstone. In order to not surprise our users, we require - // that they include all of the columns. We provide them with a list of all of the columns left to include. - for (auto& def : schema->all_columns()) { - bool included_def = included.empty() || included.find(&def) != included.end(); - if (included_def && def.is_static()) { - throw exceptions::invalid_request_exception(sprint( - "Unable to include static column '%s' which would be included by Materialized View SELECT * statement", def)); - } - - bool def_in_target_pk = std::find(target_primary_keys.begin(), target_primary_keys.end(), &def) != target_primary_keys.end(); - if (included_def && !def_in_target_pk) { - target_non_pk_columns.push_back(&def); - } else if (def.is_primary_key() && !def_in_target_pk) { - missing_pk_columns.push_back(&def); - } - } - - if (!missing_pk_columns.empty()) { - auto column_names = ::join(", ", missing_pk_columns | boost::adaptors::transformed(std::mem_fn(&column_definition::name))); - throw exceptions::invalid_request_exception(sprint( - "Cannot create Materialized View %s without primary key columns from base %s (%s)", - column_family(), _base_name->get_column_family(), column_names)); - } - - schema_builder builder{keyspace(), column_family()}; - auto add_columns = [this, &builder] (std::vector& defs, column_kind kind) mutable { - for (auto* def : defs) { - auto&& type = _properties.get_reversable_type(def->column_specification->name, def->type); - builder.with_column(def->name(), type, kind); - } - }; - add_columns(target_partition_keys, column_kind::partition_key); - add_columns(target_clustering_keys, column_kind::clustering_key); - add_columns(target_non_pk_columns, column_kind::regular_column); - _properties.properties()->apply_to_builder(builder); - - if (builder.default_time_to_live().count() > 0) { - throw exceptions::invalid_request_exception( - "Cannot set or alter default_time_to_live for a materialized view. " - "Data in a materialized view always expire at the same time than " - "the corresponding data in the parent table."); - } - - auto where_clause_text = util::relations_to_where_clause(_where_clause); - builder.with_view_info(schema->id(), schema->cf_name(), included.empty(), std::move(where_clause_text)); - - return make_ready_future<>().then([definition = view_ptr(builder.build()), is_local_only]() mutable { - return service::get_local_migration_manager().announce_new_view(definition, is_local_only); - }).then_wrapped([this] (auto&& f) { - try { - f.get(); - using namespace cql_transport; - return make_shared( - event::schema_change::change_type::CREATED, - event::schema_change::target_type::TABLE, - this->keyspace(), - this->column_family()); - } catch (const exceptions::already_exists_exception& e) { - if (_if_not_exists) { - return ::shared_ptr(); - } - throw e; - } - }); -} - -std::unique_ptr -create_view_statement::prepare(database& db, cql_stats& stats) { - return std::make_unique(make_shared(*this)); -} - -} - -} diff --git a/scylla/cql3/statements/create_view_statement.hh b/scylla/cql3/statements/create_view_statement.hh deleted file mode 100644 index 0ae7762..0000000 --- a/scylla/cql3/statements/create_view_statement.hh +++ /dev/null @@ -1,78 +0,0 @@ -/* - * This file is part of Scylla. - * Copyright (C) 2016 ScyllaDB - * - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "cql3/statements/schema_altering_statement.hh" -#include "cql3/statements/cf_prop_defs.hh" -#include "cql3/statements/cf_properties.hh" -#include "cql3/cql3_type.hh" -#include "cql3/selection/raw_selector.hh" -#include "cql3/relation.hh" -#include "cql3/cf_name.hh" - -#include "service/migration_manager.hh" -#include "schema.hh" - -#include "core/shared_ptr.hh" - -#include -#include -#include - -namespace cql3 { - -namespace statements { - -/** A CREATE MATERIALIZED VIEW parsed from a CQL query statement. */ -class create_view_statement : public schema_altering_statement { -private: - ::shared_ptr _base_name; - std::vector<::shared_ptr> _select_clause; - std::vector<::shared_ptr> _where_clause; - std::vector<::shared_ptr> _partition_keys; - std::vector<::shared_ptr> _clustering_keys; - cf_properties _properties; - bool _if_not_exists; - -public: - create_view_statement( - ::shared_ptr view_name, - ::shared_ptr base_name, - std::vector<::shared_ptr> select_clause, - std::vector<::shared_ptr> where_clause, - std::vector<::shared_ptr> partition_keys, - std::vector<::shared_ptr> clustering_keys, - bool if_not_exists); - - auto& properties() { - return _properties; - } - - // Functions we need to override to subclass schema_altering_statement - virtual future<> check_access(const service::client_state& state) override; - virtual void validate(distributed&, const service::client_state& state) override; - virtual future> announce_migration(distributed& proxy, bool is_local_only) override; - virtual std::unique_ptr prepare(database& db, cql_stats& stats) override; - - // FIXME: continue here. See create_table_statement.hh and CreateViewStatement.java -}; - -} -} diff --git a/scylla/cql3/statements/delete_statement.cc b/scylla/cql3/statements/delete_statement.cc deleted file mode 100644 index 24aa53d..0000000 --- a/scylla/cql3/statements/delete_statement.cc +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "delete_statement.hh" -#include "raw/delete_statement.hh" - -namespace cql3 { - -namespace statements { - -delete_statement::delete_statement(statement_type type, uint32_t bound_terms, schema_ptr s, std::unique_ptr attrs, cql_stats& stats) - : modification_statement{type, bound_terms, std::move(s), std::move(attrs), &stats.deletes} -{ } - -bool delete_statement::require_full_clustering_key() const { - return false; -} - -bool delete_statement::allow_clustering_key_slices() const { - return true; -} - -void delete_statement::add_update_for_key(mutation& m, const query::clustering_range& range, const update_parameters& params) { - if (_column_operations.empty()) { - if (s->clustering_key_size() == 0 || range.is_full()) { - m.partition().apply(params.make_tombstone()); - } else if (range.is_singular()) { - m.partition().apply_delete(*s, range.start()->value(), params.make_tombstone()); - } else { - auto bvs = bound_view::from_range(range); - m.partition().apply_delete(*s, range_tombstone(bvs.first, bvs.second, params.make_tombstone())); - } - return; - } - - for (auto&& op : _column_operations) { - op->execute(m, range.start() ? std::move(range.start()->value()) : clustering_key_prefix::make_empty(), params); - } -} - -namespace raw { - -::shared_ptr -delete_statement::prepare_internal(database& db, schema_ptr schema, shared_ptr bound_names, - std::unique_ptr attrs, cql_stats& stats) { - auto stmt = ::make_shared(statement_type::DELETE, bound_names->size(), schema, std::move(attrs), stats); - - for (auto&& deletion : _deletions) { - auto&& id = deletion->affected_column()->prepare_column_identifier(schema); - auto def = get_column_definition(schema, *id); - if (!def) { - throw exceptions::invalid_request_exception(sprint("Unknown identifier %s", *id)); - } - - // For compact, we only have one value except the key, so the only form of DELETE that make sense is without a column - // list. However, we support having the value name for coherence with the static/sparse case - if (def->is_primary_key()) { - throw exceptions::invalid_request_exception(sprint("Invalid identifier %s for deletion (should not be a PRIMARY KEY part)", def->name_as_text())); - } - - auto&& op = deletion->prepare(db, schema->ks_name(), *def); - op->collect_marker_specification(bound_names); - stmt->add_operation(op); - } - - stmt->process_where_clause(db, _where_clause, std::move(bound_names)); - if (!stmt->restrictions()->get_clustering_columns_restrictions()->has_bound(bound::START) - || !stmt->restrictions()->get_clustering_columns_restrictions()->has_bound(bound::END)) { - throw exceptions::invalid_request_exception("A range deletion operation needs to specify both bounds"); - } - return stmt; -} - -delete_statement::delete_statement(::shared_ptr name, - ::shared_ptr attrs, - std::vector<::shared_ptr> deletions, - std::vector<::shared_ptr> where_clause, - conditions_vector conditions, - bool if_exists) - : raw::modification_statement(std::move(name), std::move(attrs), std::move(conditions), false, if_exists) - , _deletions(std::move(deletions)) - , _where_clause(std::move(where_clause)) -{ } - -} - -} - -} diff --git a/scylla/cql3/statements/delete_statement.hh b/scylla/cql3/statements/delete_statement.hh deleted file mode 100644 index 35b2609..0000000 --- a/scylla/cql3/statements/delete_statement.hh +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "cql3/statements/modification_statement.hh" -#include "cql3/statements/raw/modification_statement.hh" -#include "cql3/attributes.hh" -#include "cql3/operation.hh" -#include "database_fwd.hh" - -namespace cql3 { - -namespace statements { - -/** -* A DELETE parsed from a CQL query statement. -*/ -class delete_statement : public modification_statement { -public: - delete_statement(statement_type type, uint32_t bound_terms, schema_ptr s, std::unique_ptr attrs, cql_stats& stats); - - virtual bool require_full_clustering_key() const override; - - virtual bool allow_clustering_key_slices() const override; - - virtual void add_update_for_key(mutation& m, const query::clustering_range& range, const update_parameters& params) override; - -#if 0 - protected void validateWhereClauseForConditions() throws InvalidRequestException - { - Iterator iterator = Iterators.concat(cfm.partitionKeyColumns().iterator(), cfm.clusteringColumns().iterator()); - while (iterator.hasNext()) - { - ColumnDefinition def = iterator.next(); - Restriction restriction = processedKeys.get(def.name); - if (restriction == null || !(restriction.isEQ() || restriction.isIN())) - { - throw new InvalidRequestException( - String.format("DELETE statements must restrict all PRIMARY KEY columns with equality relations in order " + - "to use IF conditions, but column '%s' is not restricted", def.name)); - } - } - - } -#endif -}; - -} - -} diff --git a/scylla/cql3/statements/drop_index_statement.cc b/scylla/cql3/statements/drop_index_statement.cc deleted file mode 100644 index 2431cb8..0000000 --- a/scylla/cql3/statements/drop_index_statement.cc +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2017 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "cql3/statements/drop_index_statement.hh" -#include "cql3/statements/prepared_statement.hh" -#include "service/migration_manager.hh" -#include "service/storage_service.hh" -#include "schema_builder.hh" - -namespace cql3 { - -namespace statements { - -drop_index_statement::drop_index_statement(::shared_ptr index_name, bool if_exists) - : schema_altering_statement{index_name->get_cf_name()} - , _index_name{index_name->get_idx()} - , _if_exists{if_exists} -{ -} - -const sstring& drop_index_statement::column_family() const -{ - auto cfm = lookup_indexed_table(); - assert(cfm); - return cfm->cf_name(); -} - -future<> drop_index_statement::check_access(const service::client_state& state) -{ - auto cfm = lookup_indexed_table(); - if (!cfm) { - return make_ready_future<>(); - } - return state.has_column_family_access(cfm->ks_name(), cfm->cf_name(), auth::permission::ALTER); -} - -void drop_index_statement::validate(distributed&, const service::client_state& state) -{ - // validated in lookup_indexed_table() -} - -future> drop_index_statement::announce_migration(distributed& proxy, bool is_local_only) -{ - if (!service::get_local_storage_service().cluster_supports_indexes()) { - throw exceptions::invalid_request_exception("Index support is not enabled"); - } - auto cfm = lookup_indexed_table(); - if (!cfm) { - return make_ready_future<::shared_ptr>(nullptr); - } - auto builder = schema_builder(cfm); - builder.without_index(_index_name); - return service::get_local_migration_manager().announce_column_family_update(builder.build(), false, {}, is_local_only).then([cfm] { - // Dropping an index is akin to updating the CF - // Note that we shouldn't call columnFamily() at this point because the index has been dropped and the call to lookupIndexedTable() - // in that method would now throw. - using namespace cql_transport; - return make_shared(event::schema_change::change_type::UPDATED, - event::schema_change::target_type::TABLE, - cfm->ks_name(), - cfm->cf_name()); - }); -} - -std::unique_ptr -drop_index_statement::prepare(database& db, cql_stats& stats) { - return std::make_unique(make_shared(*this)); -} - -schema_ptr drop_index_statement::lookup_indexed_table() const -{ - auto& db = service::get_local_storage_proxy().get_db().local(); - if (!db.has_keyspace(keyspace())) { - throw exceptions::keyspace_not_defined_exception(sprint("Keyspace %s does not exist", keyspace())); - } - auto cfm = db.find_indexed_table(keyspace(), _index_name); - if (cfm) { - return cfm; - } - if (_if_exists) { - return nullptr; - } - throw exceptions::invalid_request_exception( - sprint("Index '%s' could not be found in any of the tables of keyspace '%s'", _index_name, keyspace())); -} - -} - -} diff --git a/scylla/cql3/statements/drop_index_statement.hh b/scylla/cql3/statements/drop_index_statement.hh deleted file mode 100644 index 386760c..0000000 --- a/scylla/cql3/statements/drop_index_statement.hh +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2017 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "cql3/statements/schema_altering_statement.hh" -#include "cql3/index_name.hh" - -#include -#include - -#include - -namespace cql3 { - -namespace statements { - -class drop_index_statement : public schema_altering_statement { - sstring _index_name; - bool _if_exists; -public: - drop_index_statement(::shared_ptr index_name, bool if_exists); - - virtual const sstring& column_family() const override; - - virtual future<> check_access(const service::client_state& state) override; - - virtual void validate(distributed&, const service::client_state& state) override; - - virtual future> announce_migration(distributed& proxy, bool is_local_only) override; - - virtual std::unique_ptr prepare(database& db, cql_stats& stats) override; -private: - schema_ptr lookup_indexed_table() const; -}; - -} - -} \ No newline at end of file diff --git a/scylla/cql3/statements/drop_keyspace_statement.cc b/scylla/cql3/statements/drop_keyspace_statement.cc deleted file mode 100644 index 5d0c52e..0000000 --- a/scylla/cql3/statements/drop_keyspace_statement.cc +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "cql3/statements/drop_keyspace_statement.hh" -#include "cql3/statements/prepared_statement.hh" - -#include "service/migration_manager.hh" -#include "transport/event.hh" - -namespace cql3 { - -namespace statements { - -drop_keyspace_statement::drop_keyspace_statement(const sstring& keyspace, bool if_exists) - : _keyspace{keyspace} - , _if_exists{if_exists} -{ -} - -future<> drop_keyspace_statement::check_access(const service::client_state& state) -{ - return state.has_keyspace_access(keyspace(), auth::permission::DROP); -} - -void drop_keyspace_statement::validate(distributed&, const service::client_state& state) -{ - warn(unimplemented::cause::VALIDATION); -#if 0 - ThriftValidation.validateKeyspaceNotSystem(keyspace); -#endif -} - -const sstring& drop_keyspace_statement::keyspace() const -{ - return _keyspace; -} - -future> drop_keyspace_statement::announce_migration(distributed& proxy, bool is_local_only) -{ - return make_ready_future<>().then([this, is_local_only] { - return service::get_local_migration_manager().announce_keyspace_drop(_keyspace, is_local_only); - }).then_wrapped([this] (auto&& f) { - try { - f.get(); - using namespace cql_transport; - return make_shared( - event::schema_change::change_type::DROPPED, - this->keyspace()); - } catch (const exceptions::configuration_exception& e) { - if (_if_exists) { - return ::shared_ptr(); - } - throw e; - } - }); -} - -std::unique_ptr -drop_keyspace_statement::prepare(database& db, cql_stats& stats) { - return std::make_unique(make_shared(*this)); -} - -} - -} diff --git a/scylla/cql3/statements/drop_keyspace_statement.hh b/scylla/cql3/statements/drop_keyspace_statement.hh deleted file mode 100644 index bde0978..0000000 --- a/scylla/cql3/statements/drop_keyspace_statement.hh +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "cql3/statements/schema_altering_statement.hh" - -namespace cql3 { - -namespace statements { - -class drop_keyspace_statement : public schema_altering_statement { - sstring _keyspace; - bool _if_exists; -public: - drop_keyspace_statement(const sstring& keyspace, bool if_exists); - - virtual future<> check_access(const service::client_state& state) override; - - virtual void validate(distributed&, const service::client_state& state) override; - - virtual const sstring& keyspace() const override; - - virtual future> announce_migration(distributed& proxy, bool is_local_only) override; - - virtual std::unique_ptr prepare(database& db, cql_stats& stats) override; -}; - -} - -} diff --git a/scylla/cql3/statements/drop_table_statement.cc b/scylla/cql3/statements/drop_table_statement.cc deleted file mode 100644 index 9668fc5..0000000 --- a/scylla/cql3/statements/drop_table_statement.cc +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "cql3/statements/drop_table_statement.hh" -#include "cql3/statements/prepared_statement.hh" - -#include "service/migration_manager.hh" - -namespace cql3 { - -namespace statements { - -drop_table_statement::drop_table_statement(::shared_ptr cf_name, bool if_exists) - : schema_altering_statement{std::move(cf_name)} - , _if_exists{if_exists} -{ -} - -future<> drop_table_statement::check_access(const service::client_state& state) -{ - // invalid_request_exception is only thrown synchronously. - try { - return state.has_column_family_access(keyspace(), column_family(), auth::permission::DROP); - } catch (exceptions::invalid_request_exception&) { - if (!_if_exists) { - throw; - } - return make_ready_future(); - } -} - -void drop_table_statement::validate(distributed&, const service::client_state& state) -{ - // validated in announce_migration() -} - -future> drop_table_statement::announce_migration(distributed& proxy, bool is_local_only) -{ - return make_ready_future<>().then([this, is_local_only] { - return service::get_local_migration_manager().announce_column_family_drop(keyspace(), column_family(), is_local_only); - }).then_wrapped([this] (auto&& f) { - try { - f.get(); - using namespace cql_transport; - return make_shared( - event::schema_change::change_type::DROPPED, - event::schema_change::target_type::TABLE, - this->keyspace(), - this->column_family()); - } catch (const exceptions::configuration_exception& e) { - if (_if_exists) { - return ::shared_ptr(); - } - throw e; - } - }); -} - -std::unique_ptr -drop_table_statement::prepare(database& db, cql_stats& stats) { - return std::make_unique(make_shared(*this)); -} - -} - -} diff --git a/scylla/cql3/statements/drop_table_statement.hh b/scylla/cql3/statements/drop_table_statement.hh deleted file mode 100644 index 1e1d686..0000000 --- a/scylla/cql3/statements/drop_table_statement.hh +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "cql3/statements/schema_altering_statement.hh" - -#include "cql3/cf_name.hh" - -namespace cql3 { - -namespace statements { - -class drop_table_statement : public schema_altering_statement { - bool _if_exists; -public: - drop_table_statement(::shared_ptr cf_name, bool if_exists); - - virtual future<> check_access(const service::client_state& state) override; - - virtual void validate(distributed&, const service::client_state& state) override; - - virtual future> announce_migration(distributed& proxy, bool is_local_only) override; - - virtual std::unique_ptr prepare(database& db, cql_stats& stats) override; -}; - -} - -} diff --git a/scylla/cql3/statements/drop_type_statement.cc b/scylla/cql3/statements/drop_type_statement.cc deleted file mode 100644 index 4e9c5b1..0000000 --- a/scylla/cql3/statements/drop_type_statement.cc +++ /dev/null @@ -1,176 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright 2016 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "cql3/statements/drop_type_statement.hh" -#include "cql3/statements/prepared_statement.hh" - -#include "boost/range/adaptor/map.hpp" - -#include "service/migration_manager.hh" - -namespace cql3 { - -namespace statements { - -drop_type_statement::drop_type_statement(const ut_name& name, bool if_exists) - : _name{name} - , _if_exists{if_exists} -{ -} - -void drop_type_statement::prepare_keyspace(const service::client_state& state) -{ - if (!_name.has_keyspace()) { - _name.set_keyspace(state.get_keyspace()); - } -} - -future<> drop_type_statement::check_access(const service::client_state& state) -{ - return state.has_keyspace_access(keyspace(), auth::permission::DROP); -} - -void drop_type_statement::validate(distributed& proxy, const service::client_state& state) -{ - try { - auto&& ks = proxy.local().get_db().local().find_keyspace(keyspace()); - auto&& all_types = ks.metadata()->user_types()->get_all_types(); - auto old = all_types.find(_name.get_user_type_name()); - if (old == all_types.end()) { - if (_if_exists) { - return; - } else { - throw exceptions::invalid_request_exception(sprint("No user type named %s exists.", _name.to_string())); - } - } - - // We don't want to drop a type unless it's not used anymore (mainly because - // if someone drops a type and recreates one with the same name but different - // definition with the previous name still in use, things can get messy). - // We have two places to check: 1) other user type that can nest the one - // we drop and 2) existing tables referencing the type (maybe in a nested - // way). - - // This code is moved from schema_keyspace (akin to origin) because we cannot - // delay this check to until after we've applied the mutations. If a type or - // table references the type we're dropping, we will a.) get exceptions parsing - // (can be translated to invalid_request, but...) and more importantly b.) - // we will leave those types/tables in a broken state. - // We managed to get through this before because we neither enforced hard - // cross reference between types when loading them, nor did we in fact - // probably ever run the scenario of dropping a referenced type and then - // actually using the referee. - // - // Now, this has a giant flaw. We are succeptible to race conditions here, - // since we could have a drop at the same time as a create type that references - // the dropped one, but we complete the check before the create is done, - // yet apply the drop mutations after -> inconsistent data! - // This problem is the same in origin, and I see no good way around it - // as long as the atomicity of schema modifications are based on - // actual appy of mutations, because unlike other drops, this one isn't - // benevolent. - // I guess this is one case where user need beware, and don't mess with types - // concurrently! - - auto&& type = old->second; - auto&& keyspace = type->_keyspace; - auto&& name = type->_name; - - for (auto&& ut : all_types | boost::adaptors::map_values) { - if (ut->_keyspace == keyspace && ut->_name == name) { - continue; - } - - if (ut->references_user_type(keyspace, name)) { - throw exceptions::invalid_request_exception(sprint("Cannot drop user type %s.%s as it is still used by user type %s", keyspace, type->get_name_as_string(), ut->get_name_as_string())); - } - } - - for (auto&& cfm : ks.metadata()->cf_meta_data() | boost::adaptors::map_values) { - for (auto&& col : cfm->all_columns()) { - if (col.type->references_user_type(keyspace, name)) { - throw exceptions::invalid_request_exception(sprint("Cannot drop user type %s.%s as it is still used by table %s.%s", keyspace, type->get_name_as_string(), cfm->ks_name(), cfm->cf_name())); - } - } - } - - } catch (no_such_keyspace& e) { - throw exceptions::invalid_request_exception(sprint("Cannot drop type in unknown keyspace %s", keyspace())); - } -} - -const sstring& drop_type_statement::keyspace() const -{ - return _name.get_keyspace(); -} - -future> drop_type_statement::announce_migration(distributed& proxy, bool is_local_only) -{ - auto&& db = proxy.local().get_db().local(); - - // Keyspace exists or we wouldn't have validated otherwise - auto&& ks = db.find_keyspace(keyspace()); - - auto&& all_types = ks.metadata()->user_types()->get_all_types(); - auto to_drop = all_types.find(_name.get_user_type_name()); - - // Can happen with if_exists - if (to_drop == all_types.end()) { - return make_ready_future<::shared_ptr>(); - } - - return service::get_local_migration_manager().announce_type_drop(to_drop->second, is_local_only).then([this] { - using namespace cql_transport; - - return make_shared( - event::schema_change::change_type::DROPPED, - event::schema_change::target_type::TYPE, - keyspace(), - _name.get_string_type_name()); - }); -} - -std::unique_ptr -drop_type_statement::prepare(database& db, cql_stats& stats) { - return std::make_unique(make_shared(*this)); -} - -} - -} diff --git a/scylla/cql3/statements/drop_type_statement.hh b/scylla/cql3/statements/drop_type_statement.hh deleted file mode 100644 index 9bf8d4b..0000000 --- a/scylla/cql3/statements/drop_type_statement.hh +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright 2016 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "cql3/statements/schema_altering_statement.hh" -#include "cql3/cql3_type.hh" -#include "cql3/ut_name.hh" - -namespace cql3 { - -namespace statements { - -class drop_type_statement : public schema_altering_statement { - ut_name _name; - bool _if_exists; -public: - drop_type_statement(const ut_name& name, bool if_exists); - - virtual void prepare_keyspace(const service::client_state& state) override; - - virtual future<> check_access(const service::client_state& state) override; - - virtual void validate(distributed&, const service::client_state& state) override; - - virtual const sstring& keyspace() const override; - - virtual future> announce_migration(distributed& proxy, bool is_local_only) override; - - virtual std::unique_ptr prepare(database& db, cql_stats& stats) override; -}; - -} - -} diff --git a/scylla/cql3/statements/drop_user_statement.cc b/scylla/cql3/statements/drop_user_statement.cc deleted file mode 100644 index 5470130..0000000 --- a/scylla/cql3/statements/drop_user_statement.cc +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright 2016 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include - -#include "drop_user_statement.hh" -#include "auth/auth.hh" -#include "auth/authenticator.hh" -#include "auth/authorizer.hh" - -cql3::statements::drop_user_statement::drop_user_statement(sstring username, bool if_exists) - : _username(std::move(username)) - , _if_exists(if_exists) -{} - -void cql3::statements::drop_user_statement::validate(distributed& proxy, const service::client_state& state) { - // validate login here before checkAccess to avoid leaking user existence to anonymous users. - state.ensure_not_anonymous(); - - // cannot validate user existence here, because - // we need to query -> continuation, and this is not a continuation method - - if (state.user()->name() == _username) { - throw exceptions::invalid_request_exception("Users aren't allowed to DROP themselves"); - } -} - -future<::shared_ptr> -cql3::statements::drop_user_statement::execute(distributed& proxy, service::query_state& state, const query_options& options) { - return state.get_client_state().user()->is_super().then([this](bool is_super) { - if (!is_super) { - throw exceptions::unauthorized_exception("Only superusers are allowed to perform DROP USER queries"); - } - - return auth::auth::is_existing_user(_username).then([this](bool exists) { - if (!_if_exists && !exists) { - throw exceptions::invalid_request_exception(sprint("User %s doesn't exist", _username)); - } - if (_if_exists && !exists) { - return make_ready_future<::shared_ptr>(); - } - - // clean up permissions after the dropped user. - return auth::authorizer::get().revoke_all(_username).then([this] { - return auth::auth::delete_user(_username).then([this] { - return auth::authenticator::get().drop(_username); - }); - }).then([] { - return make_ready_future<::shared_ptr>(); - }); - }); - }); -} - diff --git a/scylla/cql3/statements/drop_user_statement.hh b/scylla/cql3/statements/drop_user_statement.hh deleted file mode 100644 index 5754f6a..0000000 --- a/scylla/cql3/statements/drop_user_statement.hh +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright 2016 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "authentication_statement.hh" - -namespace cql3 { - -namespace statements { - -class drop_user_statement : public authentication_statement { -private: - sstring _username; - bool _if_exists; -public: - drop_user_statement(sstring, bool if_exists); - - void validate(distributed&, const service::client_state&) override; - - future<::shared_ptr> execute(distributed& - , service::query_state& - , const query_options&) override; -}; - -} - -} diff --git a/scylla/cql3/statements/drop_view_statement.cc b/scylla/cql3/statements/drop_view_statement.cc deleted file mode 100644 index 2b1ae0b..0000000 --- a/scylla/cql3/statements/drop_view_statement.cc +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright 2016 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "cql3/statements/drop_view_statement.hh" -#include "cql3/statements/prepared_statement.hh" -#include "service/migration_manager.hh" -#include "view_info.hh" - -namespace cql3 { - -namespace statements { - -drop_view_statement::drop_view_statement(::shared_ptr view_name, bool if_exists) - : schema_altering_statement{std::move(view_name)} - , _if_exists{if_exists} -{ -} - -future<> drop_view_statement::check_access(const service::client_state& state) -{ - try { - auto&& s = service::get_local_storage_proxy().get_db().local().find_schema(keyspace(), column_family()); - if (s->is_view()) { - return state.has_column_family_access(keyspace(), s->view_info()->base_name(), auth::permission::ALTER); - } - } catch (const no_such_column_family& e) { - // Will be validated afterwards. - } - return make_ready_future<>(); -} - -void drop_view_statement::validate(distributed&, const service::client_state& state) -{ - // validated in migration_manager::announce_view_drop() -} - -future> drop_view_statement::announce_migration(distributed& proxy, bool is_local_only) -{ - return make_ready_future<>().then([this, is_local_only] { - return service::get_local_migration_manager().announce_view_drop(keyspace(), column_family(), is_local_only); - }).then_wrapped([this] (auto&& f) { - try { - f.get(); - using namespace cql_transport; - - return make_shared(event::schema_change::change_type::DROPPED, - event::schema_change::target_type::TABLE, - this->keyspace(), - this->column_family()); - } catch (const exceptions::configuration_exception& e) { - if (_if_exists) { - return ::shared_ptr(); - } - throw e; - } - }); -} - -std::unique_ptr -drop_view_statement::prepare(database& db, cql_stats& stats) { - return std::make_unique(make_shared(*this)); -} - -} - -} diff --git a/scylla/cql3/statements/drop_view_statement.hh b/scylla/cql3/statements/drop_view_statement.hh deleted file mode 100644 index 5caa0aa..0000000 --- a/scylla/cql3/statements/drop_view_statement.hh +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright 2016 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include - -#include "cql3/statements/schema_altering_statement.hh" -#include "cql3/cf_name.hh" - -#include "database.hh" - -namespace cql3 { - -namespace statements { - -/** A DROP MATERIALIZED VIEW parsed from a CQL query statement. */ -class drop_view_statement : public schema_altering_statement { -private: - bool _if_exists; -public: - drop_view_statement(::shared_ptr view_name, bool if_exists); - - virtual future<> check_access(const service::client_state& state) override; - - virtual void validate(distributed&, const service::client_state& state) override; - - virtual future> announce_migration(distributed& proxy, bool is_local_only) override; - - virtual std::unique_ptr prepare(database& db, cql_stats& stats) override; -}; - -} -} diff --git a/scylla/cql3/statements/grant_statement.cc b/scylla/cql3/statements/grant_statement.cc deleted file mode 100644 index 69d39aa..0000000 --- a/scylla/cql3/statements/grant_statement.cc +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright 2016 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "grant_statement.hh" -#include "auth/authorizer.hh" - -future<::shared_ptr> -cql3::statements::grant_statement::execute(distributed& proxy, service::query_state& state, const query_options& options) { - return auth::authorizer::get().grant(state.get_client_state().user(), _permissions, _resource, _username).then([] { - return make_ready_future<::shared_ptr>(); - }); -} diff --git a/scylla/cql3/statements/grant_statement.hh b/scylla/cql3/statements/grant_statement.hh deleted file mode 100644 index ef8f14e..0000000 --- a/scylla/cql3/statements/grant_statement.hh +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright 2016 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "permission_altering_statement.hh" - -namespace cql3 { - -namespace statements { - -class grant_statement : public permission_altering_statement { -public: - using permission_altering_statement::permission_altering_statement; - - future<::shared_ptr> execute(distributed& - , service::query_state& - , const query_options&) override; -}; - -} - -} diff --git a/scylla/cql3/statements/index_prop_defs.cc b/scylla/cql3/statements/index_prop_defs.cc deleted file mode 100644 index a828712..0000000 --- a/scylla/cql3/statements/index_prop_defs.cc +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include -#include "index_prop_defs.hh" -#include "db/index/secondary_index.hh" - -void cql3::statements::index_prop_defs::validate() { - static std::set keywords({ sstring(KW_OPTIONS) }); - - property_definitions::validate(keywords, std::set()); - - if (is_custom && !custom_class) { - throw exceptions::invalid_request_exception("CUSTOM index requires specifiying the index class"); - } - - if (!is_custom && custom_class) { - throw exceptions::invalid_request_exception("Cannot specify index class for a non-CUSTOM index"); - } - if (!is_custom && !_properties.empty()) { - throw exceptions::invalid_request_exception("Cannot specify options for a non-CUSTOM index"); - } - if (get_raw_options().count( - db::index::secondary_index::custom_index_option_name)) { - throw exceptions::invalid_request_exception( - sprint("Cannot specify %s as a CUSTOM option", - db::index::secondary_index::custom_index_option_name)); - } -} - -index_options_map -cql3::statements::index_prop_defs::get_raw_options() { - auto options = get_map(KW_OPTIONS); - return !options ? std::unordered_map() : std::unordered_map(options->begin(), options->end()); -} - -index_options_map -cql3::statements::index_prop_defs::get_options() { - auto options = get_raw_options(); - options.emplace(db::index::secondary_index::custom_index_option_name, *custom_class); - return options; -} diff --git a/scylla/cql3/statements/index_prop_defs.hh b/scylla/cql3/statements/index_prop_defs.hh deleted file mode 100644 index 85c6a01..0000000 --- a/scylla/cql3/statements/index_prop_defs.hh +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "property_definitions.hh" -#include "core/sstring.hh" - -#include -#include - -typedef std::unordered_map index_options_map; - -namespace cql3 { - -namespace statements { - -class index_prop_defs : public property_definitions { -public: - static constexpr auto KW_OPTIONS = "options"; - - bool is_custom = false; - std::experimental::optional custom_class; - - void validate(); - index_options_map get_raw_options(); - index_options_map get_options(); -}; - -} -} - diff --git a/scylla/cql3/statements/index_target.cc b/scylla/cql3/statements/index_target.cc deleted file mode 100644 index 05ab4ab..0000000 --- a/scylla/cql3/statements/index_target.cc +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include -#include "index_target.hh" -#include "db/index/secondary_index.hh" - -namespace cql3 { - -namespace statements { - -using db::index::secondary_index; - -const sstring index_target::target_option_name = "target"; -const sstring index_target::custom_index_option_name = "class_name"; - -sstring index_target::as_cql_string(schema_ptr schema) const { - if (!schema->get_column_definition(column->name())->type->is_collection()) { - return column->to_cql_string(); - } - return sprint("%s(%s)", to_sstring(type), column->to_cql_string()); -} - -sstring index_target::index_option(target_type type) { - switch (type) { - case target_type::keys: return secondary_index::index_keys_option_name; - case target_type::keys_and_values: return secondary_index::index_entries_option_name; - case target_type::values: return secondary_index::index_values_option_name; - default: throw std::invalid_argument("should not reach"); - } -} - -::shared_ptr -index_target::raw::values_of(::shared_ptr c) { - return ::make_shared(c, target_type::values); -} - -::shared_ptr -index_target::raw::keys_of(::shared_ptr c) { - return ::make_shared(c, target_type::keys); -} - -::shared_ptr -index_target::raw::keys_and_values_of(::shared_ptr c) { - return ::make_shared(c, target_type::keys_and_values); -} - -::shared_ptr -index_target::raw::full_collection(::shared_ptr c) { - return ::make_shared(c, target_type::full); -} - -::shared_ptr -index_target::raw::prepare(schema_ptr schema) { - return ::make_shared(column->prepare_column_identifier(schema), type); -} - -sstring to_sstring(index_target::target_type type) -{ - switch (type) { - case index_target::target_type::keys: return "keys"; - case index_target::target_type::keys_and_values: return "entries"; - case index_target::target_type::values: return "values"; - case index_target::target_type::full: return "full"; - } - return ""; -} - -} - -} diff --git a/scylla/cql3/statements/index_target.hh b/scylla/cql3/statements/index_target.hh deleted file mode 100644 index 12fd59c..0000000 --- a/scylla/cql3/statements/index_target.hh +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "core/shared_ptr.hh" -#include "cql3/column_identifier.hh" -#include "db/index/secondary_index.hh" - -namespace cql3 { - -namespace statements { - -struct index_target { - static const sstring target_option_name; - static const sstring custom_index_option_name; - - enum class target_type { - values, keys, keys_and_values, full - }; - - const ::shared_ptr column; - const target_type type; - - index_target(::shared_ptr c, target_type t) - : column(c), type(t) { - } - - sstring as_cql_string(schema_ptr schema) const; - - static sstring index_option(target_type type); - static target_type from_column_definition(const column_definition& cd); - - class raw { - public: - const ::shared_ptr column; - const target_type type; - - raw(::shared_ptr c, target_type t) - : column(c), type(t) - {} - - static ::shared_ptr values_of(::shared_ptr c); - static ::shared_ptr keys_of(::shared_ptr c); - static ::shared_ptr keys_and_values_of(::shared_ptr c); - static ::shared_ptr full_collection(::shared_ptr c); - ::shared_ptr prepare(schema_ptr); - }; -}; - -sstring to_sstring(index_target::target_type type); - -} -} diff --git a/scylla/cql3/statements/ks_prop_defs.cc b/scylla/cql3/statements/ks_prop_defs.cc deleted file mode 100644 index c33a657..0000000 --- a/scylla/cql3/statements/ks_prop_defs.cc +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "cql3/statements/ks_prop_defs.hh" - -namespace cql3 { - -namespace statements { - -void ks_prop_defs::validate() { - // Skip validation if the strategy class is already set as it means we've alreayd - // prepared (and redoing it would set strategyClass back to null, which we don't want) - if (_strategy_class) { - return; - } - - static std::set keywords({ sstring(KW_DURABLE_WRITES), sstring(KW_REPLICATION) }); - property_definitions::validate(keywords, std::set()); - - auto replication_options = get_replication_options(); - if (replication_options.count(REPLICATION_STRATEGY_CLASS_KEY)) { - _strategy_class = replication_options[REPLICATION_STRATEGY_CLASS_KEY]; - } -} - -std::map ks_prop_defs::get_replication_options() const { - auto replication_options = get_map(KW_REPLICATION); - if (replication_options) { - return replication_options.value(); - } - return std::map{}; -} - -std::experimental::optional ks_prop_defs::get_replication_strategy_class() const { - return _strategy_class; -} - -lw_shared_ptr ks_prop_defs::as_ks_metadata(sstring ks_name) { - auto options = get_replication_options(); - options.erase(REPLICATION_STRATEGY_CLASS_KEY); - return keyspace_metadata::new_keyspace(ks_name, get_replication_strategy_class().value(), options, get_boolean(KW_DURABLE_WRITES, true)); -} - -lw_shared_ptr ks_prop_defs::as_ks_metadata_update(lw_shared_ptr old) { - auto options = get_replication_options(); - options.erase(REPLICATION_STRATEGY_CLASS_KEY); - auto sc = get_replication_strategy_class(); - if (!sc) { - sc = old->strategy_name(); - options = old->strategy_options(); - } - return keyspace_metadata::new_keyspace(old->name(), *sc, options, get_boolean(KW_DURABLE_WRITES, true)); -} - - -} - -} diff --git a/scylla/cql3/statements/ks_prop_defs.hh b/scylla/cql3/statements/ks_prop_defs.hh deleted file mode 100644 index f7df831..0000000 --- a/scylla/cql3/statements/ks_prop_defs.hh +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "cql3/statements/property_definitions.hh" - -#include "database.hh" - -#include -#include -#include - -namespace cql3 { - -namespace statements { - -class ks_prop_defs : public property_definitions { -public: - static constexpr auto KW_DURABLE_WRITES = "durable_writes"; - static constexpr auto KW_REPLICATION = "replication"; - - static constexpr auto REPLICATION_STRATEGY_CLASS_KEY = "class"; -private: - std::experimental::optional _strategy_class; -public: - void validate(); - std::map get_replication_options() const; - std::experimental::optional get_replication_strategy_class() const; - lw_shared_ptr as_ks_metadata(sstring ks_name); - lw_shared_ptr as_ks_metadata_update(lw_shared_ptr old); - -#if 0 - public KSMetaData asKSMetadataUpdate(KSMetaData old) throws RequestValidationException - { - String sClass = strategyClass; - Map sOptions = getReplicationOptions(); - if (sClass == null) - { - sClass = old.strategyClass.getName(); - sOptions = old.strategyOptions; - } - return KSMetaData.newKeyspace(old.name, sClass, sOptions, getBoolean(KW_DURABLE_WRITES, old.durableWrites)); - } -#endif -}; - -} - -} diff --git a/scylla/cql3/statements/list_permissions_statement.cc b/scylla/cql3/statements/list_permissions_statement.cc deleted file mode 100644 index 1a9d29c..0000000 --- a/scylla/cql3/statements/list_permissions_statement.cc +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright 2016 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include -#include - -#include "list_permissions_statement.hh" -#include "auth/authorizer.hh" -#include "auth/auth.hh" -#include "cql3/result_set.hh" -#include "transport/messages/result_message.hh" - -cql3::statements::list_permissions_statement::list_permissions_statement( - auth::permission_set permissions, - std::experimental::optional resource, - std::experimental::optional username, bool recursive) - : _permissions(permissions), _resource(std::move(resource)), _username( - std::move(username)), _recursive(recursive) { -} - -void cql3::statements::list_permissions_statement::validate(distributed& proxy, const service::client_state& state) { - // a check to ensure the existence of the user isn't being leaked by user existence check. - state.ensure_not_anonymous(); -} - -future<> cql3::statements::list_permissions_statement::check_access(const service::client_state& state) { - auto f = make_ready_future(); - if (_username) { - f = auth::auth::is_existing_user(*_username).then([this](bool exists) { - if (!exists) { - throw exceptions::invalid_request_exception(sprint("User %s doesn't exist", *_username)); - } - }); - } - return f.then([this, &state] { - if (_resource) { - mayme_correct_resource(*_resource, state); - if (!_resource->exists()) { - throw exceptions::invalid_request_exception(sprint("%s doesn't exist", *_resource)); - } - } - }); -} - - -future<::shared_ptr> -cql3::statements::list_permissions_statement::execute(distributed& proxy, service::query_state& state, const query_options& options) { - static auto make_column = [](sstring name) { - return ::make_shared(auth::auth::AUTH_KS, "permissions", ::make_shared(std::move(name), true), utf8_type); - }; - static thread_local const std::vector<::shared_ptr> metadata({ - make_column("username"), make_column("resource"), make_column("permission") - }); - - typedef std::experimental::optional opt_resource; - - std::vector resources; - - auto r = _resource; - for (;;) { - resources.emplace_back(r); - if (!r || !r->has_parent() || !_recursive) { - break; - } - r = r->get_parent(); - } - - return map_reduce(resources, [&state, this](opt_resource r) { - return auth::authorizer::get().list(state.get_client_state().user(), _permissions, std::move(r), _username); - }, std::vector(), [](std::vector details, std::vector pd) { - details.insert(details.end(), pd.begin(), pd.end()); - return std::move(details); - }).then([this](std::vector details) { - std::sort(details.begin(), details.end()); - - auto rs = std::make_unique(metadata); - - for (auto& v : details) { - // Make sure names are sorted. - auto names = auth::permissions::to_strings(v.permissions); - for (auto& p : std::set(names.begin(), names.end())) { - rs->add_row( - std::vector { utf8_type->decompose( - v.user), utf8_type->decompose( - v.resource.to_string()), - utf8_type->decompose(p), }); - } - } - - auto rows = ::make_shared(std::move(rs)); - return ::shared_ptr(std::move(rows)); - }); -} diff --git a/scylla/cql3/statements/list_permissions_statement.hh b/scylla/cql3/statements/list_permissions_statement.hh deleted file mode 100644 index f3239e4..0000000 --- a/scylla/cql3/statements/list_permissions_statement.hh +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright 2016 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include - -#include "authorization_statement.hh" -#include "auth/permission.hh" -#include "auth/data_resource.hh" - -namespace cql3 { - -namespace statements { - -class list_permissions_statement : public authorization_statement { -private: - auth::permission_set _permissions; - std::experimental::optional _resource; - std::experimental::optional _username; - bool _recursive; - -public: - list_permissions_statement(auth::permission_set, std::experimental::optional, std::experimental::optional, bool); - - void validate(distributed&, const service::client_state&) override; - future<> check_access(const service::client_state&) override; - future<::shared_ptr> execute(distributed& - , service::query_state& - , const query_options&) override; -}; - -} - -} diff --git a/scylla/cql3/statements/list_users_statement.cc b/scylla/cql3/statements/list_users_statement.cc deleted file mode 100644 index 838b32d..0000000 --- a/scylla/cql3/statements/list_users_statement.cc +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright 2016 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "list_users_statement.hh" -#include "cql3/query_processor.hh" -#include "cql3/query_options.hh" -#include "auth/auth.hh" - -void cql3::statements::list_users_statement::validate(distributed& proxy, const service::client_state& state) { -} - -future<> cql3::statements::list_users_statement::check_access(const service::client_state& state) { - state.ensure_not_anonymous(); - return make_ready_future(); -} - -future<::shared_ptr> -cql3::statements::list_users_statement::execute(distributed& proxy, service::query_state& state, const query_options& options) { - auto is = std::make_unique(service::client_state::for_internal_calls()); - auto io = std::make_unique(db::consistency_level::QUORUM, std::vector{}); - auto f = get_local_query_processor().process( - sprint("SELECT * FROM %s.%s", auth::auth::AUTH_KS, - auth::auth::USERS_CF), *is, *io); - return f.finally([is = std::move(is), io = std::move(io)] {}); -} diff --git a/scylla/cql3/statements/list_users_statement.hh b/scylla/cql3/statements/list_users_statement.hh deleted file mode 100644 index aa409c6..0000000 --- a/scylla/cql3/statements/list_users_statement.hh +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright 2016 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "authentication_statement.hh" -#include "cql3/user_options.hh" - -namespace cql3 { - -namespace statements { - -class list_users_statement : public authentication_statement { -public: - void validate(distributed&, const service::client_state&) override; - future<> check_access(const service::client_state&) override; - future<::shared_ptr> execute(distributed& - , service::query_state& - , const query_options&) override; -}; - -} - -} diff --git a/scylla/cql3/statements/modification_statement.cc b/scylla/cql3/statements/modification_statement.cc deleted file mode 100644 index 2bb1ee6..0000000 --- a/scylla/cql3/statements/modification_statement.cc +++ /dev/null @@ -1,667 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "cql3/statements/modification_statement.hh" -#include "cql3/statements/raw/modification_statement.hh" -#include "cql3/statements/prepared_statement.hh" -#include "cql3/restrictions/single_column_restriction.hh" -#include "validation.hh" -#include "core/shared_ptr.hh" -#include "query-result-reader.hh" -#include -#include -#include -#include -#include -#include "service/storage_service.hh" -#include - -namespace cql3 { - -namespace statements { - -thread_local const ::shared_ptr modification_statement::CAS_RESULT_COLUMN = ::make_shared("[applied]", false); - -modification_statement::modification_statement(statement_type type_, uint32_t bound_terms, schema_ptr schema_, std::unique_ptr attrs_, uint64_t* cql_stats_counter_ptr) - : type{type_} - , _bound_terms{bound_terms} - , s{schema_} - , attrs{std::move(attrs_)} - , _column_operations{} - , _cql_modification_counter_ptr(cql_stats_counter_ptr) -{ } - -bool modification_statement::uses_function(const sstring& ks_name, const sstring& function_name) const { - if (attrs->uses_function(ks_name, function_name)) { - return true; - } - if (_restrictions->uses_function(ks_name, function_name)) { - return true; - } - for (auto&& operation : _column_operations) { - if (operation && operation->uses_function(ks_name, function_name)) { - return true; - } - } - for (auto&& condition : _column_conditions) { - if (condition && condition->uses_function(ks_name, function_name)) { - return true; - } - } - for (auto&& condition : _static_conditions) { - if (condition && condition->uses_function(ks_name, function_name)) { - return true; - } - } - return false; -} - -uint32_t modification_statement::get_bound_terms() { - return _bound_terms; -} - -const sstring& modification_statement::keyspace() const { - return s->ks_name(); -} - -const sstring& modification_statement::column_family() const { - return s->cf_name(); -} - -bool modification_statement::is_counter() const { - return s->is_counter(); -} - -bool modification_statement::is_view() const { - return s->is_view(); -} - -int64_t modification_statement::get_timestamp(int64_t now, const query_options& options) const { - return attrs->get_timestamp(now, options); -} - -bool modification_statement::is_timestamp_set() const { - return attrs->is_timestamp_set(); -} - -gc_clock::duration modification_statement::get_time_to_live(const query_options& options) const { - return gc_clock::duration(attrs->get_time_to_live(options)); -} - -future<> modification_statement::check_access(const service::client_state& state) { - auto f = state.has_column_family_access(keyspace(), column_family(), auth::permission::MODIFY); - if (has_conditions()) { - f = f.then([this, &state] { - return state.has_column_family_access(keyspace(), column_family(), auth::permission::SELECT); - }); - } - // MV updates need to get the current state from the table, and might update the views - // Require Permission.SELECT on the base table, and Permission.MODIFY on the views - auto& db = service::get_local_storage_service().db().local(); - auto&& views = db.find_column_family(keyspace(), column_family()).views(); - if (!views.empty()) { - f = f.then([this, &state] { - return state.has_column_family_access(keyspace(), column_family(), auth::permission::SELECT); - }).then([this, &state, views = std::move(views)] { - return parallel_for_each(views, [this, &state] (auto&& view) { - return state.has_column_family_access(this->keyspace(), view->cf_name(), auth::permission::MODIFY); - }); - }); - } - return f; -} - -future> -modification_statement::get_mutations(distributed& proxy, const query_options& options, bool local, int64_t now, tracing::trace_state_ptr trace_state) { - auto keys = make_lw_shared(build_partition_keys(options)); - auto ranges = make_lw_shared(create_clustering_ranges(options)); - return make_update_parameters(proxy, keys, ranges, options, local, now, std::move(trace_state)).then( - [this, keys, ranges, now] (auto params_ptr) { - std::vector mutations; - mutations.reserve(keys->size()); - for (auto key : *keys) { - // We know key.start() must be defined since we only allow EQ relations on the partition key. - mutations.emplace_back(std::move(*key.start()->value().key()), s); - auto& m = mutations.back(); - for (auto&& r : *ranges) { - this->add_update_for_key(m, r, *params_ptr); - } - } - return make_ready_future(std::move(mutations)); - }); -} - -future> -modification_statement::make_update_parameters( - distributed& proxy, - lw_shared_ptr keys, - lw_shared_ptr ranges, - const query_options& options, - bool local, - int64_t now, - tracing::trace_state_ptr trace_state) { - return read_required_rows(proxy, *keys, std::move(ranges), local, options.get_consistency(), std::move(trace_state)).then( - [this, &options, now] (auto rows) { - return make_ready_future>( - std::make_unique(s, options, - this->get_timestamp(now, options), - this->get_time_to_live(options), - std::move(rows))); - }); -} - - -// Implements ResultVisitor concept from query.hh -class prefetch_data_builder { - update_parameters::prefetch_data& _data; - const query::partition_slice& _ps; - schema_ptr _schema; - std::experimental::optional _pkey; -private: - void add_cell(update_parameters::prefetch_data::row& cells, const column_definition& def, const std::experimental::optional& cell) { - if (cell) { - auto ctype = static_pointer_cast(def.type); - if (!ctype->is_multi_cell()) { - throw std::logic_error(sprint("cannot prefetch frozen collection: %s", def.name_as_text())); - } - auto map_type = map_type_impl::get_instance(ctype->name_comparator(), ctype->value_comparator(), true); - update_parameters::prefetch_data::cell_list list; - // FIXME: Iterate over a range instead of fully exploded collection - auto dv = map_type->deserialize(*cell); - for (auto&& el : value_cast(dv)) { - list.emplace_back(update_parameters::prefetch_data::cell{el.first.serialize(), el.second.serialize()}); - } - cells.emplace(def.id, std::move(list)); - } - }; -public: - prefetch_data_builder(schema_ptr s, update_parameters::prefetch_data& data, const query::partition_slice& ps) - : _data(data) - , _ps(ps) - , _schema(std::move(s)) - { } - - void accept_new_partition(const partition_key& key, uint32_t row_count) { - _pkey = key; - } - - void accept_new_partition(uint32_t row_count) { - assert(0); - } - - void accept_new_row(const clustering_key& key, const query::result_row_view& static_row, - const query::result_row_view& row) { - update_parameters::prefetch_data::row cells; - - auto row_iterator = row.iterator(); - for (auto&& id : _ps.regular_columns) { - add_cell(cells, _schema->regular_column_at(id), row_iterator.next_collection_cell()); - } - - _data.rows.emplace(std::make_pair(*_pkey, key), std::move(cells)); - } - - void accept_new_row(const query::result_row_view& static_row, const query::result_row_view& row) { - assert(0); - } - - void accept_partition_end(const query::result_row_view& static_row) { - update_parameters::prefetch_data::row cells; - - auto static_row_iterator = static_row.iterator(); - for (auto&& id : _ps.static_columns) { - add_cell(cells, _schema->static_column_at(id), static_row_iterator.next_collection_cell()); - } - - _data.rows.emplace(std::make_pair(*_pkey, clustering_key_prefix::make_empty()), std::move(cells)); - } -}; - -future -modification_statement::read_required_rows( - distributed& proxy, - dht::partition_range_vector keys, - lw_shared_ptr ranges, - bool local, - db::consistency_level cl, - tracing::trace_state_ptr trace_state) { - if (!requires_read()) { - return make_ready_future( - update_parameters::prefetched_rows_type{}); - } - try { - validate_for_read(keyspace(), cl); - } catch (exceptions::invalid_request_exception& e) { - throw exceptions::invalid_request_exception(sprint("Write operation require a read but consistency %s is not supported on reads", cl)); - } - - static auto is_collection = [] (const column_definition& def) { - return def.type->is_collection(); - }; - - // FIXME: we read all collection columns, but could be enhanced just to read the list(s) being RMWed - std::vector static_cols; - boost::range::push_back(static_cols, s->static_columns() - | boost::adaptors::filtered(is_collection) | boost::adaptors::transformed([] (auto&& col) { return col.id; })); - std::vector regular_cols; - boost::range::push_back(regular_cols, s->regular_columns() - | boost::adaptors::filtered(is_collection) | boost::adaptors::transformed([] (auto&& col) { return col.id; })); - query::partition_slice ps( - *ranges, - std::move(static_cols), - std::move(regular_cols), - query::partition_slice::option_set::of< - query::partition_slice::option::send_partition_key, - query::partition_slice::option::send_clustering_key, - query::partition_slice::option::collections_as_maps>()); - query::read_command cmd(s->id(), s->version(), ps, std::numeric_limits::max()); - // FIXME: ignoring "local" - return proxy.local().query(s, make_lw_shared(std::move(cmd)), std::move(keys), cl, std::move(trace_state)).then([this, ps] (auto result) { - return query::result_view::do_with(*result, [&] (query::result_view v) { - auto prefetched_rows = update_parameters::prefetched_rows_type({update_parameters::prefetch_data(s)}); - v.consume(ps, prefetch_data_builder(s, prefetched_rows.value(), ps)); - return prefetched_rows; - }); - }); -} - -std::vector -modification_statement::create_clustering_ranges(const query_options& options) { - // If the only updated/deleted columns are static, then we don't need clustering columns. - // And in fact, unless it is an INSERT, we reject if clustering columns are provided as that - // suggest something unintended. For instance, given: - // CREATE TABLE t (k int, v int, s int static, PRIMARY KEY (k, v)) - // it can make sense to do: - // INSERT INTO t(k, v, s) VALUES (0, 1, 2) - // but both - // UPDATE t SET s = 3 WHERE k = 0 AND v = 1 - // DELETE v FROM t WHERE k = 0 AND v = 1 - // sounds like you don't really understand what your are doing. - if (applies_only_to_static_columns()) { - // If we set no non-static columns, then it's fine not to have clustering columns - if (!_restrictions->has_clustering_columns_restriction()) { - return { query::clustering_range::make_open_ended_both_sides() }; - } - - // If we do have clustering columns however, then either it's an INSERT and the query is valid - // but we still need to build a proper prefix, or it's not an INSERT, and then we want to reject - // (see above) - if (!type.is_insert()) { - if (_restrictions->has_clustering_columns_restriction()) { - throw exceptions::invalid_request_exception(sprint( - "Invalid restriction on clustering column %s since the %s statement modifies only static columns", - _restrictions->get_clustering_columns_restrictions()->get_column_defs().front()->name_as_text(), type)); - } - - // we should get there as it contradicts !_restrictions->has_clustering_columns_restriction() - throw std::logic_error("contradicts !_restrictions->has_clustering_columns_restriction()"); - } - } - - return _restrictions->get_clustering_bounds(options); -} - -dht::partition_range_vector -modification_statement::build_partition_keys(const query_options& options) { - auto keys = _restrictions->get_partition_key_restrictions()->bounds_ranges(options); - for (auto&& k : keys) { - validation::validate_cql_key(s, *k.start()->value().key()); - } - return keys; -} - -struct modification_statement_executor { - static auto get() { return &modification_statement::do_execute; } -}; -static thread_local auto modify_stage = seastar::make_execution_stage("cql3_modification", modification_statement_executor::get()); - -future<::shared_ptr> -modification_statement::execute(distributed& proxy, service::query_state& qs, const query_options& options) { - return modify_stage(this, seastar::ref(proxy), seastar::ref(qs), seastar::cref(options)); -} - -future<::shared_ptr> -modification_statement::do_execute(distributed& proxy, service::query_state& qs, const query_options& options) { - if (has_conditions() && options.get_protocol_version() == 1) { - throw exceptions::invalid_request_exception("Conditional updates are not supported by the protocol version in use. You need to upgrade to a driver using the native protocol v2."); - } - - tracing::add_table_name(qs.get_trace_state(), keyspace(), column_family()); - - if (has_conditions()) { - return execute_with_condition(proxy, qs, options); - } - - inc_cql_stats(); - - return execute_without_condition(proxy, qs, options).then([] { - return make_ready_future<::shared_ptr>( - ::shared_ptr{}); - }); -} - -future<> -modification_statement::execute_without_condition(distributed& proxy, service::query_state& qs, const query_options& options) { - auto cl = options.get_consistency(); - if (is_counter()) { - db::validate_counter_for_write(s, cl); - } else { - db::validate_for_write(s->ks_name(), cl); - } - - return get_mutations(proxy, options, false, options.get_timestamp(qs), qs.get_trace_state()).then([this, cl, &proxy, &qs] (auto mutations) { - if (mutations.empty()) { - return now(); - } - - return proxy.local().mutate_with_triggers(std::move(mutations), cl, false, qs.get_trace_state(), this->is_raw_counter_shard_write()); - }); -} - -future<::shared_ptr> -modification_statement::execute_with_condition(distributed& proxy, service::query_state& qs, const query_options& options) { - fail(unimplemented::cause::LWT); -#if 0 - List keys = buildPartitionKeyNames(options); - // We don't support IN for CAS operation so far - if (keys.size() > 1) - throw new InvalidRequestException("IN on the partition key is not supported with conditional updates"); - - ByteBuffer key = keys.get(0); - long now = options.getTimestamp(queryState); - Composite prefix = createClusteringPrefix(options); - - CQL3CasRequest request = new CQL3CasRequest(cfm, key, false); - addConditions(prefix, request, options); - request.addRowUpdate(prefix, this, options, now); - - ColumnFamily result = StorageProxy.cas(keyspace(), - columnFamily(), - key, - request, - options.getSerialConsistency(), - options.getConsistency(), - queryState.getClientState()); - return new ResultMessage.Rows(buildCasResultSet(key, result, options)); -#endif -} - -future<::shared_ptr> -modification_statement::execute_internal(distributed& proxy, service::query_state& qs, const query_options& options) { - if (has_conditions()) { - throw exceptions::unsupported_operation_exception(); - } - - tracing::add_table_name(qs.get_trace_state(), keyspace(), column_family()); - - inc_cql_stats(); - - return get_mutations(proxy, options, true, options.get_timestamp(qs), qs.get_trace_state()).then( - [&proxy] (auto mutations) { - return proxy.local().mutate_locally(std::move(mutations)); - }).then( - [] { - return make_ready_future<::shared_ptr>( - ::shared_ptr {}); - }); -} - -void -modification_statement::process_where_clause(database& db, std::vector where_clause, ::shared_ptr names) { - _restrictions = ::make_shared( - db, s, type, where_clause, std::move(names), applies_only_to_static_columns(), _sets_a_collection, false); - if (_restrictions->get_partition_key_restrictions()->is_on_token()) { - throw exceptions::invalid_request_exception(sprint("The token function cannot be used in WHERE clauses for UPDATE and DELETE statements: %s", - _restrictions->get_partition_key_restrictions()->to_string())); - } - if (!_restrictions->get_non_pk_restriction().empty()) { - auto column_names = ::join(", ", _restrictions->get_non_pk_restriction() - | boost::adaptors::map_keys - | boost::adaptors::indirected - | boost::adaptors::transformed(std::mem_fn(&column_definition::name))); - throw exceptions::invalid_request_exception(sprint("Invalid where clause contains non PRIMARY KEY columns: %s", column_names)); - } - auto ck_restrictions = _restrictions->get_clustering_columns_restrictions(); - if (ck_restrictions->is_slice() && !allow_clustering_key_slices()) { - throw exceptions::invalid_request_exception(sprint("Invalid operator in where clause %s", ck_restrictions->to_string())); - } - if (_restrictions->has_unrestricted_clustering_columns() && !applies_only_to_static_columns() && !s->is_dense()) { - // Tomek: Origin had "&& s->comparator->is_composite()" in the condition below. - // Comparator is a thrift concept, not CQL concept, and we want to avoid - // using thrift concepts here. I think it's safe to drop this here because the only - // case in which we would get a non-composite comparator here would be if the cell - // name type is SimpleSparse, which means: - // (a) CQL compact table without clustering columns - // (b) thrift static CF with non-composite comparator - // Those tables don't have clustering columns so we wouldn't reach this code, thus - // the check seems redundant. - if (require_full_clustering_key()) { - auto& col = s->column_at(column_kind::clustering_key, ck_restrictions->size()); - throw exceptions::invalid_request_exception(sprint("Missing mandatory PRIMARY KEY part %s", col.name_as_text())); - } - // In general, we can't modify specific columns if not all clustering columns have been specified. - // However, if we modify only static columns, it's fine since we won't really use the prefix anyway. - if (!ck_restrictions->is_slice()) { - auto& col = s->column_at(column_kind::clustering_key, ck_restrictions->size()); - for (auto&& op : _column_operations) { - if (!op->column.is_static()) { - throw exceptions::invalid_request_exception(sprint( - "Primary key column '%s' must be specified in order to modify column '%s'", - col.name_as_text(), op->column.name_as_text())); - } - } - } - } - if (_restrictions->has_partition_key_unrestricted_components()) { - auto& col = s->column_at(column_kind::partition_key, _restrictions->get_partition_key_restrictions()->size()); - throw exceptions::invalid_request_exception(sprint("Missing mandatory PRIMARY KEY part %s", col.name_as_text())); - } -} - -namespace raw { - -std::unique_ptr -modification_statement::modification_statement::prepare(database& db, cql_stats& stats) { - schema_ptr schema = validation::validate_column_family(db, keyspace(), column_family()); - auto bound_names = get_bound_variables(); - auto statement = prepare(db, bound_names, stats); - auto partition_key_bind_indices = bound_names->get_partition_key_bind_indexes(schema); - return std::make_unique(std::move(statement), *bound_names, std::move(partition_key_bind_indices)); -} - -::shared_ptr -modification_statement::prepare(database& db, ::shared_ptr bound_names, cql_stats& stats) { - schema_ptr schema = validation::validate_column_family(db, keyspace(), column_family()); - - auto prepared_attributes = _attrs->prepare(db, keyspace(), column_family()); - prepared_attributes->collect_marker_specification(bound_names); - - ::shared_ptr stmt = prepare_internal(db, schema, bound_names, std::move(prepared_attributes), stats); - - if (_if_not_exists || _if_exists || !_conditions.empty()) { - if (stmt->is_counter()) { - throw exceptions::invalid_request_exception("Conditional updates are not supported on counter tables"); - } - if (_attrs->timestamp) { - throw exceptions::invalid_request_exception("Cannot provide custom timestamp for conditional updates"); - } - - if (_if_not_exists) { - // To have both 'IF NOT EXISTS' and some other conditions doesn't make sense. - // So far this is enforced by the parser, but let's assert it for sanity if ever the parse changes. - assert(_conditions.empty()); - assert(!_if_exists); - stmt->set_if_not_exist_condition(); - } else if (_if_exists) { - assert(_conditions.empty()); - assert(!_if_not_exists); - stmt->set_if_exist_condition(); - } else { - for (auto&& entry : _conditions) { - auto id = entry.first->prepare_column_identifier(schema); - const column_definition* def = get_column_definition(schema, *id); - if (!def) { - throw exceptions::invalid_request_exception(sprint("Unknown identifier %s", *id)); - } - - auto condition = entry.second->prepare(db, keyspace(), *def); - condition->collect_marker_specificaton(bound_names); - - if (def->is_primary_key()) { - throw exceptions::invalid_request_exception(sprint("PRIMARY KEY column '%s' cannot have IF conditions", *id)); - } - stmt->add_condition(condition); - } - } - stmt->validate_where_clause_for_conditions(); - } - return stmt; -} - -} - -void -modification_statement::validate(distributed&, const service::client_state& state) { - if (has_conditions() && attrs->is_timestamp_set()) { - throw exceptions::invalid_request_exception("Cannot provide custom timestamp for conditional updates"); - } - - if (is_counter() && attrs->is_timestamp_set() && !is_raw_counter_shard_write()) { - throw exceptions::invalid_request_exception("Cannot provide custom timestamp for counter updates"); - } - - if (is_counter() && attrs->is_time_to_live_set()) { - throw exceptions::invalid_request_exception("Cannot provide custom TTL for counter updates"); - } - - if (is_view()) { - throw exceptions::invalid_request_exception("Cannot directly modify a materialized view"); - } -} - -bool modification_statement::depends_on_keyspace(const sstring& ks_name) const { - return keyspace() == ks_name; -} - -bool modification_statement::depends_on_column_family(const sstring& cf_name) const { - return column_family() == cf_name; -} - -void modification_statement::add_operation(::shared_ptr op) { - if (op->column.is_static()) { - _sets_static_columns = true; - } else { - _sets_regular_columns = true; - _sets_a_collection |= op->column.type->is_collection(); - } - - if (op->column.is_counter()) { - auto is_raw_counter_shard_write = op->is_raw_counter_shard_write(); - if (_is_raw_counter_shard_write && _is_raw_counter_shard_write != is_raw_counter_shard_write) { - throw exceptions::invalid_request_exception("Cannot mix regular and raw counter updates"); - } - _is_raw_counter_shard_write = is_raw_counter_shard_write; - } - - _column_operations.push_back(std::move(op)); -} - -void modification_statement::add_condition(::shared_ptr cond) { - if (cond->column.is_static()) { - _sets_static_columns = true; - _static_conditions.emplace_back(std::move(cond)); - } else { - _sets_regular_columns = true; - _sets_a_collection |= cond->column.type->is_collection(); - _column_conditions.emplace_back(std::move(cond)); - } -} - -void modification_statement::set_if_not_exist_condition() { - _if_not_exists = true; -} - -bool modification_statement::has_if_not_exist_condition() const { - return _if_not_exists; -} - -void modification_statement::set_if_exist_condition() { - _if_exists = true; -} - -bool modification_statement::has_if_exist_condition() const { - return _if_exists; -} - -bool modification_statement::requires_read() { - return std::any_of(_column_operations.begin(), _column_operations.end(), [] (auto&& op) { - return op->requires_read(); - }); -} - -bool modification_statement::has_conditions() { - return _if_not_exists || _if_exists || !_column_conditions.empty() || !_static_conditions.empty(); -} - -void modification_statement::validate_where_clause_for_conditions() { - // no-op by default -} - -const statement_type statement_type::INSERT = statement_type(statement_type::type::insert); -const statement_type statement_type::UPDATE = statement_type(statement_type::type::update); -const statement_type statement_type::DELETE = statement_type(statement_type::type::del); -const statement_type statement_type::SELECT = statement_type(statement_type::type::select); - -namespace raw { - -modification_statement::modification_statement(::shared_ptr name, ::shared_ptr attrs, conditions_vector conditions, bool if_not_exists, bool if_exists) - : cf_statement{std::move(name)} - , _attrs{std::move(attrs)} - , _conditions{std::move(conditions)} - , _if_not_exists{if_not_exists} - , _if_exists{if_exists} -{ } - -} - -} - -} diff --git a/scylla/cql3/statements/modification_statement.hh b/scylla/cql3/statements/modification_statement.hh deleted file mode 100644 index 7442231..0000000 --- a/scylla/cql3/statements/modification_statement.hh +++ /dev/null @@ -1,376 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "cql3/restrictions/restriction.hh" -#include "cql3/statements/raw/cf_statement.hh" -#include "cql3/statements/bound.hh" -#include "cql3/column_identifier.hh" -#include "cql3/update_parameters.hh" -#include "cql3/column_condition.hh" -#include "cql3/cql_statement.hh" -#include "cql3/attributes.hh" -#include "cql3/operation.hh" -#include "cql3/relation.hh" -#include "cql3/restrictions/statement_restrictions.hh" -#include "cql3/single_column_relation.hh" -#include "cql3/statements/statement_type.hh" - -#include "db/consistency_level.hh" - -#include "core/shared_ptr.hh" -#include "core/future-util.hh" - -#include "unimplemented.hh" -#include "validation.hh" -#include "service/storage_proxy.hh" - -#include -#include - -namespace cql3 { - -namespace statements { - - -namespace raw { class modification_statement; } - -/* - * Abstract parent class of individual modifications, i.e. INSERT, UPDATE and DELETE. - */ -class modification_statement : public cql_statement_no_metadata { -private: - static thread_local const ::shared_ptr CAS_RESULT_COLUMN; - -public: - const statement_type type; - -private: - const uint32_t _bound_terms; - -public: - const schema_ptr s; - const std::unique_ptr attrs; - -protected: - std::vector<::shared_ptr> _column_operations; -private: - // Separating normal and static conditions makes things somewhat easier - std::vector<::shared_ptr> _column_conditions; - std::vector<::shared_ptr> _static_conditions; - - bool _if_not_exists = false; - bool _if_exists = false; - - bool _sets_static_columns = false; - bool _sets_regular_columns = false; - bool _sets_a_collection = false; - std::experimental::optional _is_raw_counter_shard_write; - - const std::function)> get_column_for_condition = - [](::shared_ptr cond) -> const column_definition& { - return cond->column; - }; - - uint64_t* _cql_modification_counter_ptr = nullptr; - - ::shared_ptr _restrictions; -public: - modification_statement(statement_type type_, uint32_t bound_terms, schema_ptr schema_, std::unique_ptr attrs_, uint64_t* cql_stats_counter_ptr); - - virtual bool uses_function(const sstring& ks_name, const sstring& function_name) const override; - - virtual bool require_full_clustering_key() const = 0; - - virtual bool allow_clustering_key_slices() const = 0; - - virtual void add_update_for_key(mutation& m, const query::clustering_range& range, const update_parameters& params) = 0; - - virtual uint32_t get_bound_terms() override; - - virtual const sstring& keyspace() const; - - virtual const sstring& column_family() const; - - virtual bool is_counter() const; - - virtual bool is_view() const; - - int64_t get_timestamp(int64_t now, const query_options& options) const; - - bool is_timestamp_set() const; - - gc_clock::duration get_time_to_live(const query_options& options) const; - - virtual future<> check_access(const service::client_state& state) override; - - void validate(distributed&, const service::client_state& state) override; - - virtual bool depends_on_keyspace(const sstring& ks_name) const override; - - virtual bool depends_on_column_family(const sstring& cf_name) const override; - - void add_operation(::shared_ptr op); - -#if 0 - public Iterable getColumnsWithConditions() - { - if (ifNotExists || ifExists) - return null; - - return Iterables.concat(columnConditions == null ? Collections.emptyList() : Iterables.transform(columnConditions, getColumnForCondition), - staticConditions == null ? Collections.emptyList() : Iterables.transform(staticConditions, getColumnForCondition)); - } -#endif - - void inc_cql_stats() { - ++(*_cql_modification_counter_ptr); - } - - const ::shared_ptr& restrictions() const { - return _restrictions; - } -public: - void add_condition(::shared_ptr cond); - - void set_if_not_exist_condition(); - - bool has_if_not_exist_condition() const; - - void set_if_exist_condition(); - - bool has_if_exist_condition() const; - - bool is_raw_counter_shard_write() const { - return _is_raw_counter_shard_write.value_or(false); - } - - void process_where_clause(database& db, std::vector where_clause, ::shared_ptr names); - -private: - dht::partition_range_vector build_partition_keys(const query_options& options); - query::clustering_row_ranges create_clustering_ranges(const query_options& options); - - bool applies_only_to_static_columns() const { - return _sets_static_columns && !_sets_regular_columns; - } -public: - bool requires_read(); - -protected: - future read_required_rows( - distributed& proxy, - dht::partition_range_vector keys, - lw_shared_ptr ranges, - bool local, - db::consistency_level cl, - tracing::trace_state_ptr trace_state); -private: - future<::shared_ptr> - do_execute(distributed& proxy, service::query_state& qs, const query_options& options); - friend class modification_statement_executor; -public: - bool has_conditions(); - - virtual future<::shared_ptr> - execute(distributed& proxy, service::query_state& qs, const query_options& options) override; - - virtual future<::shared_ptr> - execute_internal(distributed& proxy, service::query_state& qs, const query_options& options) override; - -private: - future<> - execute_without_condition(distributed& proxy, service::query_state& qs, const query_options& options); - - future<::shared_ptr> - execute_with_condition(distributed& proxy, service::query_state& qs, const query_options& options); - -#if 0 - public void addConditions(Composite clusteringPrefix, CQL3CasRequest request, QueryOptions options) throws InvalidRequestException - { - if (ifNotExists) - { - // If we use ifNotExists, if the statement applies to any non static columns, then the condition is on the row of the non-static - // columns and the prefix should be the clusteringPrefix. But if only static columns are set, then the ifNotExists apply to the existence - // of any static columns and we should use the prefix for the "static part" of the partition. - request.addNotExist(clusteringPrefix); - } - else if (ifExists) - { - request.addExist(clusteringPrefix); - } - else - { - if (columnConditions != null) - request.addConditions(clusteringPrefix, columnConditions, options); - if (staticConditions != null) - request.addConditions(cfm.comparator.staticPrefix(), staticConditions, options); - } - } - - private ResultSet buildCasResultSet(ByteBuffer key, ColumnFamily cf, QueryOptions options) throws InvalidRequestException - { - return buildCasResultSet(keyspace(), key, columnFamily(), cf, getColumnsWithConditions(), false, options); - } - - public static ResultSet buildCasResultSet(String ksName, ByteBuffer key, String cfName, ColumnFamily cf, Iterable columnsWithConditions, boolean isBatch, QueryOptions options) - throws InvalidRequestException - { - boolean success = cf == null; - - ColumnSpecification spec = new ColumnSpecification(ksName, cfName, CAS_RESULT_COLUMN, BooleanType.instance); - ResultSet.Metadata metadata = new ResultSet.Metadata(Collections.singletonList(spec)); - List> rows = Collections.singletonList(Collections.singletonList(BooleanType.instance.decompose(success))); - - ResultSet rs = new ResultSet(metadata, rows); - return success ? rs : merge(rs, buildCasFailureResultSet(key, cf, columnsWithConditions, isBatch, options)); - } - - private static ResultSet merge(ResultSet left, ResultSet right) - { - if (left.size() == 0) - return right; - else if (right.size() == 0) - return left; - - assert left.size() == 1; - int size = left.metadata.names.size() + right.metadata.names.size(); - List specs = new ArrayList(size); - specs.addAll(left.metadata.names); - specs.addAll(right.metadata.names); - List> rows = new ArrayList<>(right.size()); - for (int i = 0; i < right.size(); i++) - { - List row = new ArrayList(size); - row.addAll(left.rows.get(0)); - row.addAll(right.rows.get(i)); - rows.add(row); - } - return new ResultSet(new ResultSet.Metadata(specs), rows); - } - - private static ResultSet buildCasFailureResultSet(ByteBuffer key, ColumnFamily cf, Iterable columnsWithConditions, boolean isBatch, QueryOptions options) - throws InvalidRequestException - { - CFMetaData cfm = cf.metadata(); - Selection selection; - if (columnsWithConditions == null) - { - selection = Selection.wildcard(cfm); - } - else - { - // We can have multiple conditions on the same columns (for collections) so use a set - // to avoid duplicate, but preserve the order just to it follows the order of IF in the query in general - Set defs = new LinkedHashSet<>(); - // Adding the partition key for batches to disambiguate if the conditions span multipe rows (we don't add them outside - // of batches for compatibility sakes). - if (isBatch) - { - defs.addAll(cfm.partitionKeyColumns()); - defs.addAll(cfm.clusteringColumns()); - } - for (ColumnDefinition def : columnsWithConditions) - defs.add(def); - selection = Selection.forColumns(cfm, new ArrayList<>(defs)); - - } - - long now = System.currentTimeMillis(); - Selection.ResultSetBuilder builder = selection.resultSetBuilder(now); - SelectStatement.forSelection(cfm, selection).processColumnFamily(key, cf, options, now, builder); - - return builder.build(options.getProtocolVersion()); - } - - public ResultMessage executeInternal(QueryState queryState, QueryOptions options) throws RequestValidationException, RequestExecutionException - { - if (hasConditions()) - throw new UnsupportedOperationException(); - - for (IMutation mutation : getMutations(options, true, queryState.getTimestamp())) - { - // We don't use counters internally. - assert mutation instanceof Mutation; - - ((Mutation) mutation).apply(); - } - return null; - } -#endif - -public: - /** - * Convert statement into a list of mutations to apply on the server - * - * @param options value for prepared statement markers - * @param local if true, any requests (for collections) performed by getMutation should be done locally only. - * @param now the current timestamp in microseconds to use if no timestamp is user provided. - * - * @return vector of the mutations - * @throws invalid_request_exception on invalid requests - */ - future> get_mutations(distributed& proxy, const query_options& options, bool local, int64_t now, tracing::trace_state_ptr trace_state); - -public: - future> make_update_parameters( - distributed& proxy, - lw_shared_ptr keys, - lw_shared_ptr ranges, - const query_options& options, - bool local, - int64_t now, - tracing::trace_state_ptr trace_state); - -protected: - /** - * If there are conditions on the statement, this is called after the where clause and conditions have been - * processed to check that they are compatible. - * @throws InvalidRequestException - */ - virtual void validate_where_clause_for_conditions(); - friend class raw::modification_statement; -}; - -} - -} diff --git a/scylla/cql3/statements/parsed_statement.cc b/scylla/cql3/statements/parsed_statement.cc deleted file mode 100644 index 3a13920..0000000 --- a/scylla/cql3/statements/parsed_statement.cc +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2014 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "raw/parsed_statement.hh" - -#include "prepared_statement.hh" - -namespace cql3 { - -namespace statements { - -namespace raw { - -parsed_statement::~parsed_statement() -{ } - -shared_ptr parsed_statement::get_bound_variables() { - return _variables; -} - -// Used by the parser and preparable statement -void parsed_statement::set_bound_variables(const std::vector<::shared_ptr>& bound_names) { - _variables = ::make_shared(bound_names); -} - -bool parsed_statement::uses_function(const sstring& ks_name, const sstring& function_name) const { - return false; -} - -} - -prepared_statement::prepared_statement(::shared_ptr statement_, std::vector<::shared_ptr> bound_names_, std::vector partition_key_bind_indices) - : statement(std::move(statement_)) - , bound_names(std::move(bound_names_)) - , partition_key_bind_indices(std::move(partition_key_bind_indices)) -{ } - -prepared_statement::prepared_statement(::shared_ptr statement_, const variable_specifications& names, const std::vector& partition_key_bind_indices) - : prepared_statement(statement_, names.get_specifications(), partition_key_bind_indices) -{ } - -prepared_statement::prepared_statement(::shared_ptr statement_, variable_specifications&& names, std::vector&& partition_key_bind_indices) - : prepared_statement(statement_, std::move(names).get_specifications(), std::move(partition_key_bind_indices)) -{ } - -prepared_statement::prepared_statement(::shared_ptr&& statement_) - : prepared_statement(statement_, std::vector<::shared_ptr>(), std::vector()) -{ } - -} - -} diff --git a/scylla/cql3/statements/permission_altering_statement.cc b/scylla/cql3/statements/permission_altering_statement.cc deleted file mode 100644 index b5cd84e..0000000 --- a/scylla/cql3/statements/permission_altering_statement.cc +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright 2016 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include - -#include "permission_altering_statement.hh" -#include "cql3/query_processor.hh" -#include "cql3/query_options.hh" -#include "cql3/selection/selection.hh" -#include "auth/auth.hh" - -cql3::statements::permission_altering_statement::permission_altering_statement( - auth::permission_set permissions, auth::data_resource resource, - sstring username) - : _permissions(permissions), _resource(std::move(resource)), _username( - std::move(username)) { -} - -void cql3::statements::permission_altering_statement::validate(distributed& proxy, const service::client_state& state) { - // a check to ensure the existence of the user isn't being leaked by user existence check. - state.ensure_not_anonymous(); -} - -future<> cql3::statements::permission_altering_statement::check_access(const service::client_state& state) { - return auth::auth::is_existing_user(_username).then([this, &state](bool exists) { - if (!exists) { - throw exceptions::invalid_request_exception(sprint("User %s doesn't exist", _username)); - } - mayme_correct_resource(_resource, state); - if (!_resource.exists()) { - throw exceptions::invalid_request_exception(sprint("%s doesn't exist", _resource)); - } - - // check that the user has AUTHORIZE permission on the resource or its parents, otherwise reject GRANT/REVOKE. - return state.ensure_has_permission(auth::permission::AUTHORIZE, _resource).then([this, &state] { - static auto perm_list = { - auth::permission::READ, - auth::permission::WRITE, - auth::permission::CREATE, - auth::permission::ALTER, - auth::permission::DROP, - auth::permission::SELECT, - auth::permission::MODIFY, - }; - return do_for_each(perm_list, [this, &state](auth::permission p) { - // TODO: how about we re-write the access check to check a set - // right away. Might need some tweaking of enum_set to make it - // neat/transparent, but still... - // This is not critical code however. - if (_permissions.contains(p)) { - return state.ensure_has_permission(p, _resource); - } - return make_ready_future(); - }); - }); - }); -} - diff --git a/scylla/cql3/statements/permission_altering_statement.hh b/scylla/cql3/statements/permission_altering_statement.hh deleted file mode 100644 index f8c5e36..0000000 --- a/scylla/cql3/statements/permission_altering_statement.hh +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright 2016 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "authorization_statement.hh" -#include "auth/permission.hh" -#include "auth/data_resource.hh" - -namespace cql3 { - -namespace statements { - -class permission_altering_statement : public authorization_statement { -protected: - auth::permission_set _permissions; - auth::data_resource _resource; - sstring _username; - -public: - permission_altering_statement(auth::permission_set, auth::data_resource, sstring); - - void validate(distributed&, const service::client_state&) override; - future<> check_access(const service::client_state&) override; -}; - -} - -} diff --git a/scylla/cql3/statements/prepared_statement.hh b/scylla/cql3/statements/prepared_statement.hh deleted file mode 100644 index 1856534..0000000 --- a/scylla/cql3/statements/prepared_statement.hh +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2016 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "cql3/variable_specifications.hh" -#include "cql3/column_specification.hh" -#include "cql3/column_identifier.hh" -#include "cql3/cql_statement.hh" - -#include "core/shared_ptr.hh" - -#include -#include -#include -#include - -namespace cql3 { - -namespace statements { - -struct invalidated_prepared_usage_attempt { - void operator()() const { - throw exceptions::invalidated_prepared_usage_attempt_exception(); - } -}; - -class prepared_statement : public weakly_referencable { -public: - typedef seastar::checked_ptr> checked_weak_ptr; - -public: - sstring raw_cql_statement; - const ::shared_ptr statement; - const std::vector<::shared_ptr> bound_names; - std::vector partition_key_bind_indices; - - prepared_statement(::shared_ptr statement_, std::vector<::shared_ptr> bound_names_, std::vector partition_key_bind_indices); - - prepared_statement(::shared_ptr statement_, const variable_specifications& names, const std::vector& partition_key_bind_indices); - - prepared_statement(::shared_ptr statement_, variable_specifications&& names, std::vector&& partition_key_bind_indices); - - prepared_statement(::shared_ptr&& statement_); - - checked_weak_ptr checked_weak_from_this() { - return checked_weak_ptr(this->weak_from_this()); - } -}; - -} - -} diff --git a/scylla/cql3/statements/property_definitions.cc b/scylla/cql3/statements/property_definitions.cc deleted file mode 100644 index fc45a01..0000000 --- a/scylla/cql3/statements/property_definitions.cc +++ /dev/null @@ -1,201 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "cql3/statements/property_definitions.hh" - -namespace cql3 { - -namespace statements { - -property_definitions::property_definitions() - : _properties{} -{ } - -void property_definitions::add_property(const sstring& name, sstring value) { - auto it = _properties.find(name); - if (it != _properties.end()) { - throw exceptions::syntax_exception(sprint("Multiple definition for property '%s'", name)); - } - _properties.emplace(name, value); -} - -void property_definitions::add_property(const sstring& name, const std::map& value) { - auto it = _properties.find(name); - if (it != _properties.end()) { - throw exceptions::syntax_exception(sprint("Multiple definition for property '%s'", name)); - } - _properties.emplace(name, value); -} - -void property_definitions::validate(const std::set& keywords, const std::set& obsolete) { - for (auto&& kv : _properties) { - auto&& name = kv.first; - if (keywords.count(name)) { - continue; - } - if (obsolete.count(name)) { -#if 0 - logger.warn("Ignoring obsolete property {}", name); -#endif - } else { - throw exceptions::syntax_exception(sprint("Unknown property '%s'", name)); - } - } -} - -std::experimental::optional property_definitions::get_simple(const sstring& name) const { - auto it = _properties.find(name); - if (it == _properties.end()) { - return std::experimental::nullopt; - } - try { - return boost::any_cast(it->second); - } catch (const boost::bad_any_cast& e) { - throw exceptions::syntax_exception(sprint("Invalid value for property '%s'. It should be a string", name)); - } -} - -std::experimental::optional> property_definitions::get_map(const sstring& name) const { - auto it = _properties.find(name); - if (it == _properties.end()) { - return std::experimental::nullopt; - } - try { - return boost::any_cast>(it->second); - } catch (const boost::bad_any_cast& e) { - throw exceptions::syntax_exception(sprint("Invalid value for property '%s'. It should be a map.", name)); - } -} - -bool property_definitions::has_property(const sstring& name) const { - return _properties.find(name) != _properties.end(); -} - -sstring property_definitions::get_string(sstring key, sstring default_value) const { - auto value = get_simple(key); - if (value) { - return value.value(); - } else { - return default_value; - } -} - -// Return a property value, typed as a Boolean -bool property_definitions::get_boolean(sstring key, bool default_value) const { - auto value = get_simple(key); - if (value) { - std::string s{value.value()}; - std::transform(s.begin(), s.end(), s.begin(), ::tolower); - return s == "1" || s == "true" || s == "yes"; - } else { - return default_value; - } -} - -// Return a property value, typed as a double -double property_definitions::get_double(sstring key, double default_value) const { - auto value = get_simple(key); - return to_double(key, value, default_value); -} - -double property_definitions::to_double(sstring key, std::experimental::optional value, double default_value) { - if (value) { - auto val = value.value(); - try { - return std::stod(val); - } catch (const std::exception& e) { - throw exceptions::syntax_exception(sprint("Invalid double value %s for '%s'", val, key)); - } - } else { - return default_value; - } -} - -// Return a property value, typed as an Integer -int32_t property_definitions::get_int(sstring key, int32_t default_value) const { - auto value = get_simple(key); - return to_int(key, value, default_value); -} - -int32_t property_definitions::to_int(sstring key, std::experimental::optional value, int32_t default_value) { - if (value) { - auto val = value.value(); - try { - return std::stoi(val); - } catch (const std::exception& e) { - throw exceptions::syntax_exception(sprint("Invalid integer value %s for '%s'", val, key)); - } - } else { - return default_value; - } -} - -long property_definitions::to_long(sstring key, std::experimental::optional value, long default_value) { - if (value) { - auto val = value.value(); - try { - return std::stol(val); - } catch (const std::exception& e) { - throw exceptions::syntax_exception(sprint("Invalid long value %s for '%s'", val, key)); - } - } else { - return default_value; - } -} - -void property_definitions::remove_from_map_if_exists(const sstring& name, const sstring& key) -{ - auto it = _properties.find(name); - if (it == _properties.end()) { - return; - } - try { - auto map = boost::any_cast>(it->second); - map.erase(key); - _properties[name] = map; - } catch (const boost::bad_any_cast& e) { - throw exceptions::syntax_exception(sprint("Invalid value for property '%s'. It should be a map.", name)); - } -} - -} - -} diff --git a/scylla/cql3/statements/property_definitions.hh b/scylla/cql3/statements/property_definitions.hh deleted file mode 100644 index c99968a..0000000 --- a/scylla/cql3/statements/property_definitions.hh +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "exceptions/exceptions.hh" -#include "core/print.hh" -#include "core/sstring.hh" - -#include -#include -#include -#include -#include -#include -#include - -#include - -namespace cql3 { - -namespace statements { - -class property_definitions { -protected: -#if 0 - protected static final Logger logger = LoggerFactory.getLogger(PropertyDefinitions.class); -#endif - std::unordered_map _properties; - - property_definitions(); -public: - void add_property(const sstring& name, sstring value); - - void add_property(const sstring& name, const std::map& value); - - void validate(const std::set& keywords, const std::set& obsolete); - -protected: - std::experimental::optional get_simple(const sstring& name) const; - - std::experimental::optional> get_map(const sstring& name) const; - - void remove_from_map_if_exists(const sstring& name, const sstring& key); -public: - bool has_property(const sstring& name) const; - - sstring get_string(sstring key, sstring default_value) const; - - // Return a property value, typed as a Boolean - bool get_boolean(sstring key, bool default_value) const; - - // Return a property value, typed as a double - double get_double(sstring key, double default_value) const; - - static double to_double(sstring key, std::experimental::optional value, double default_value); - - // Return a property value, typed as an Integer - int32_t get_int(sstring key, int32_t default_value) const; - - static int32_t to_int(sstring key, std::experimental::optional value, int32_t default_value); - - static long to_long(sstring key, std::experimental::optional value, long default_value); -}; - -} - -} diff --git a/scylla/cql3/statements/raw/batch_statement.hh b/scylla/cql3/statements/raw/batch_statement.hh deleted file mode 100644 index ae8e1dd..0000000 --- a/scylla/cql3/statements/raw/batch_statement.hh +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "cql3/cql_statement.hh" -#include "modification_statement.hh" -#include "service/storage_proxy.hh" -#include "transport/messages/result_message.hh" -#include "timestamp.hh" -#include "log.hh" -#include "to_string.hh" -#include -#include -#include -#include -#include - -#pragma once - -namespace cql3 { - -namespace statements { - -namespace raw { - -class batch_statement : public raw::cf_statement { -public: - enum class type { - LOGGED, UNLOGGED, COUNTER - }; -private: - type _type; - shared_ptr _attrs; - std::vector> _parsed_statements; -public: - batch_statement( - type type_, - shared_ptr attrs, - std::vector> parsed_statements) - : cf_statement(nullptr) - , _type(type_) - , _attrs(std::move(attrs)) - , _parsed_statements(std::move(parsed_statements)) { - } - - virtual void prepare_keyspace(const service::client_state& state) override { - for (auto&& s : _parsed_statements) { - s->prepare_keyspace(state); - } - } - - virtual std::unique_ptr prepare(database& db, cql_stats& stats) override; -}; - -} - -} -} diff --git a/scylla/cql3/statements/raw/cf_statement.hh b/scylla/cql3/statements/raw/cf_statement.hh deleted file mode 100644 index 03528bb..0000000 --- a/scylla/cql3/statements/raw/cf_statement.hh +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2014 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "cql3/cf_name.hh" - -#include - -#include "parsed_statement.hh" - -namespace service { class client_state; } - -namespace cql3 { - -namespace statements { - -namespace raw { - -/** - * Abstract class for statements that apply on a given column family. - */ -class cf_statement : public parsed_statement { -protected: - ::shared_ptr _cf_name; - - cf_statement(::shared_ptr cf_name); -public: - virtual void prepare_keyspace(const service::client_state& state); - - // Only for internal calls, use the version with ClientState for user queries - virtual void prepare_keyspace(sstring keyspace); - - virtual const sstring& keyspace() const; - - virtual const sstring& column_family() const; -}; - -} - -} - -} diff --git a/scylla/cql3/statements/raw/delete_statement.hh b/scylla/cql3/statements/raw/delete_statement.hh deleted file mode 100644 index 84838bd..0000000 --- a/scylla/cql3/statements/raw/delete_statement.hh +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "cql3/statements/modification_statement.hh" -#include "cql3/statements/raw/modification_statement.hh" -#include "cql3/attributes.hh" -#include "cql3/operation.hh" -#include "database_fwd.hh" - -namespace cql3 { - -namespace statements { - -namespace raw { - -class delete_statement : public modification_statement { -private: - std::vector<::shared_ptr> _deletions; - std::vector<::shared_ptr> _where_clause; -public: - delete_statement(::shared_ptr name, - ::shared_ptr attrs, - std::vector<::shared_ptr> deletions, - std::vector<::shared_ptr> where_clause, - conditions_vector conditions, - bool if_exists); -protected: - virtual ::shared_ptr prepare_internal(database& db, schema_ptr schema, - ::shared_ptr bound_names, std::unique_ptr attrs, cql_stats& stats); -}; - -} - -} - -} diff --git a/scylla/cql3/statements/raw/insert_statement.hh b/scylla/cql3/statements/raw/insert_statement.hh deleted file mode 100644 index b5be04c..0000000 --- a/scylla/cql3/statements/raw/insert_statement.hh +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "cql3/statements/modification_statement.hh" -#include "cql3/statements/raw/modification_statement.hh" -#include "cql3/column_identifier.hh" -#include "cql3/term.hh" - -#include "database_fwd.hh" - -#include -#include "unimplemented.hh" - -namespace cql3 { - -namespace statements { - -namespace raw { - -class insert_statement : public raw::modification_statement { -private: - const std::vector<::shared_ptr> _column_names; - const std::vector<::shared_ptr> _column_values; -public: - /** - * A parsed INSERT statement. - * - * @param name column family being operated on - * @param columnNames list of column names - * @param columnValues list of column values (corresponds to names) - * @param attrs additional attributes for statement (CL, timestamp, timeToLive) - */ - insert_statement(::shared_ptr name, - ::shared_ptr attrs, - std::vector<::shared_ptr> column_names, - std::vector<::shared_ptr> column_values, - bool if_not_exists); - - virtual ::shared_ptr prepare_internal(database& db, schema_ptr schema, - ::shared_ptr bound_names, std::unique_ptr attrs, cql_stats& stats) override; - -}; - -} - -} - -} diff --git a/scylla/cql3/statements/raw/modification_statement.hh b/scylla/cql3/statements/raw/modification_statement.hh deleted file mode 100644 index 56e8ec2..0000000 --- a/scylla/cql3/statements/raw/modification_statement.hh +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "cql3/restrictions/restriction.hh" -#include "cql3/statements/raw/cf_statement.hh" -#include "cql3/column_identifier.hh" -#include "cql3/update_parameters.hh" -#include "cql3/column_condition.hh" -#include "cql3/cql_statement.hh" -#include "cql3/attributes.hh" -#include "cql3/operation.hh" -#include "cql3/relation.hh" - -#include "db/consistency_level.hh" - -#include "core/shared_ptr.hh" -#include "core/future-util.hh" - -#include "unimplemented.hh" -#include "validation.hh" -#include "service/storage_proxy.hh" - -#include - -namespace cql3 { - -namespace statements { - -class modification_statement; - -namespace raw { - -class modification_statement : public cf_statement { -public: - using conditions_vector = std::vector, ::shared_ptr>>; -protected: - const ::shared_ptr _attrs; - const std::vector, ::shared_ptr>> _conditions; -private: - const bool _if_not_exists; - const bool _if_exists; -protected: - modification_statement(::shared_ptr name, ::shared_ptr attrs, conditions_vector conditions, bool if_not_exists, bool if_exists); - -public: - virtual std::unique_ptr prepare(database& db, cql_stats& stats) override; - ::shared_ptr prepare(database& db, ::shared_ptr bound_names, cql_stats& stats); -protected: - virtual ::shared_ptr prepare_internal(database& db, schema_ptr schema, - ::shared_ptr bound_names, std::unique_ptr attrs, cql_stats& stats) = 0; -}; - -} - -} - -} diff --git a/scylla/cql3/statements/raw/parsed_statement.hh b/scylla/cql3/statements/raw/parsed_statement.hh deleted file mode 100644 index 68d8470..0000000 --- a/scylla/cql3/statements/raw/parsed_statement.hh +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2016 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "cql3/variable_specifications.hh" -#include "cql3/column_specification.hh" -#include "cql3/column_identifier.hh" -#include "cql3/stats.hh" - -#include - -#include -#include - -namespace cql3 { - -namespace statements { - -class prepared_statement; - -namespace raw { - -class parsed_statement { -private: - ::shared_ptr _variables; - -public: - using prepared = statements::prepared_statement; - virtual ~parsed_statement(); - - shared_ptr get_bound_variables(); - - void set_bound_variables(const std::vector<::shared_ptr>& bound_names); - - virtual std::unique_ptr prepare(database& db, cql_stats& stats) = 0; - - virtual bool uses_function(const sstring& ks_name, const sstring& function_name) const; -}; - -} - -} - -} diff --git a/scylla/cql3/statements/raw/select_statement.hh b/scylla/cql3/statements/raw/select_statement.hh deleted file mode 100644 index 677918d..0000000 --- a/scylla/cql3/statements/raw/select_statement.hh +++ /dev/null @@ -1,158 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "cql3/statements/raw/cf_statement.hh" -#include "cql3/statements/prepared_statement.hh" -#include "cql3/cql_statement.hh" -#include "cql3/selection/selection.hh" -#include "cql3/selection/raw_selector.hh" -#include "cql3/restrictions/statement_restrictions.hh" -#include "cql3/result_set.hh" -#include "exceptions/unrecognized_entity_exception.hh" -#include "service/client_state.hh" -#include "core/shared_ptr.hh" -#include "core/distributed.hh" -#include "validation.hh" - -namespace cql3 { - -namespace statements { - -namespace raw { - -/** - * Encapsulates a completely parsed SELECT query, including the target - * column family, expression, result count, and ordering clause. - * - */ -class select_statement : public cf_statement -{ -public: - class parameters final { - public: - using orderings_type = std::vector, bool>>; - private: - const orderings_type _orderings; - const bool _is_distinct; - const bool _allow_filtering; - public: - parameters(); - parameters(orderings_type orderings, - bool is_distinct, - bool allow_filtering); - bool is_distinct(); - bool allow_filtering(); - orderings_type const& orderings(); - }; - template - using compare_fn = std::function; - - using result_row_type = std::vector; - using ordering_comparator_type = compare_fn; -private: - ::shared_ptr _parameters; - std::vector<::shared_ptr> _select_clause; - std::vector<::shared_ptr> _where_clause; - ::shared_ptr _limit; -public: - select_statement(::shared_ptr cf_name, - ::shared_ptr parameters, - std::vector<::shared_ptr> select_clause, - std::vector<::shared_ptr> where_clause, - ::shared_ptr limit); - - virtual std::unique_ptr prepare(database& db, cql_stats& stats) override { - return prepare(db, stats, false); - } - std::unique_ptr prepare(database& db, cql_stats& stats, bool for_view); -private: - ::shared_ptr prepare_restrictions( - database& db, - schema_ptr schema, - ::shared_ptr bound_names, - ::shared_ptr selection, - bool for_view = false); - - /** Returns a ::shared_ptr for the limit or null if no limit is set */ - ::shared_ptr prepare_limit(database& db, ::shared_ptr bound_names); - - static void verify_ordering_is_allowed(::shared_ptr restrictions); - - static void validate_distinct_selection(schema_ptr schema, - ::shared_ptr selection, - ::shared_ptr restrictions); - - void handle_unrecognized_ordering_column(::shared_ptr column); - - select_statement::ordering_comparator_type get_ordering_comparator(schema_ptr schema, - ::shared_ptr selection, - ::shared_ptr restrictions); - - bool is_reversed(schema_ptr schema); - - /** If ALLOW FILTERING was not specified, this verifies that it is not needed */ - void check_needs_filtering(::shared_ptr restrictions); - - bool contains_alias(::shared_ptr name); - - ::shared_ptr limit_receiver(); - -#if 0 - public: - virtual sstring to_string() override { - return sstring("raw_statement(") - + "name=" + cf_name->to_string() - + ", selectClause=" + to_string(_select_clause) - + ", whereClause=" + to_string(_where_clause) - + ", isDistinct=" + to_string(_parameters->is_distinct()) - + ")"; - } - }; -#endif -}; - -} - -} - -} diff --git a/scylla/cql3/statements/raw/update_statement.hh b/scylla/cql3/statements/raw/update_statement.hh deleted file mode 100644 index 9ae2ee5..0000000 --- a/scylla/cql3/statements/raw/update_statement.hh +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "cql3/statements/modification_statement.hh" -#include "cql3/statements/raw/modification_statement.hh" -#include "cql3/column_identifier.hh" -#include "cql3/term.hh" - -#include "database_fwd.hh" - -#include -#include "unimplemented.hh" - -namespace cql3 { - -namespace statements { - -class update_statement; - -namespace raw { - -class update_statement : public raw::modification_statement { -private: - // Provided for an UPDATE - std::vector, ::shared_ptr>> _updates; - std::vector _where_clause; -public: - /** - * Creates a new UpdateStatement from a column family name, columns map, consistency - * level, and key term. - * - * @param name column family being operated on - * @param attrs additional attributes for statement (timestamp, timeToLive) - * @param updates a map of column operations to perform - * @param whereClause the where clause - */ - update_statement(::shared_ptr name, - ::shared_ptr attrs, - std::vector, ::shared_ptr>> updates, - std::vector where_clause, - conditions_vector conditions); -protected: - virtual ::shared_ptr prepare_internal(database& db, schema_ptr schema, - ::shared_ptr bound_names, std::unique_ptr attrs, cql_stats& stats); -}; - -} - -} - -} diff --git a/scylla/cql3/statements/raw/use_statement.hh b/scylla/cql3/statements/raw/use_statement.hh deleted file mode 100644 index e0595bb..0000000 --- a/scylla/cql3/statements/raw/use_statement.hh +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2014 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "cql3/statements/raw/parsed_statement.hh" - -namespace cql3 { - -namespace statements { - -class prepared_statement; - -namespace raw { - -class use_statement : public parsed_statement { -private: - const sstring _keyspace; - -public: - use_statement(sstring keyspace); - - virtual std::unique_ptr prepare(database& db, cql_stats& stats) override; -}; - -} - -} - -} diff --git a/scylla/cql3/statements/request_validations.hh b/scylla/cql3/statements/request_validations.hh deleted file mode 100644 index 24408ae..0000000 --- a/scylla/cql3/statements/request_validations.hh +++ /dev/null @@ -1,173 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - * Modified by ScyllaDB - * - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "exceptions/exceptions.hh" -#include "core/print.hh" - -#include - -namespace cql3 { -namespace statements { - -/** - * Utility methods use to perform request validation. - */ -namespace request_validations { - -template -exceptions::invalid_request_exception -invalid_request(const char* message_template, const MessageArgs&... message_args); - - - /** - * Checks that the specified expression is true. If not an InvalidRequestException will - * be thrown. - * - * @param expression the expression to test - * @param message_template the template used to build the error message - * @param message_args the message arguments - * @throws InvalidRequestException if the specified expression is false. - */ - template - void check_true(bool expression, - const char* message_template, - const MessageArgs&... message_args) { - if (!expression) { - throw exceptions::invalid_request_exception(sprint(message_template, message_args...)); - } - } - - /** - * Checks that the specified list does not contains duplicates. - * - * @param list the list to test - * @param message The error message - * @throws InvalidRequestException if the specified list contains duplicates. - */ - template - void check_contains_no_duplicates(const std::vector& list, const char* message) { - if (std::set(list.begin(), list.end()).size() != list.size()) { - throw invalid_request(message); - } - } - - /** - * Checks that the specified list contains only the specified elements. - * - * @param list the list to test - * @param expected_elements the expected elements - * @param message the error message - * @throws InvalidRequestException if the specified list contains duplicates. - */ - template - void check_contains_only(const std::vector& list, - const std::vector& expected_elements, - const char* message) { - if (boost::count_if(list, [&] (const E& e) { return !boost::count_if(expected_elements, e); })) { - throw invalid_request(message); - } - } - - /** - * Checks that the specified expression is false. If not an InvalidRequestException will - * be thrown. - * - * @param expression the expression to test - * @param message_template the template used to build the error message - * @param message_args the message arguments - * @throws InvalidRequestException if the specified expression is true. - */ - template - void check_false(bool expression, - const char* message_template, - const MessageArgs&... message_args) { - check_true(!expression, message_template, message_args...); - } - - /** - * Checks that the specified object is NOT null. - * If it is an InvalidRequestException will be throws. - * - * @param object the object to test - * @param message_template the template used to build the error message - * @param message_args the message arguments - * @return the object - * @throws InvalidRequestException if the specified object is null. - */ - template - T check_not_null(T object, const char* message_template, const MessageArgs&... message_args) { - check_true(bool(object), message_template, message_args...); - return object; - } - - /** - * Checks that the specified object is null. - * If it is not an InvalidRequestException will be throws. - * - * @param object the object to test - * @param message_template the template used to build the error message - * @param message_args the message arguments - * @return the object - * @throws InvalidRequestException if the specified object is not null. - */ - template - T check_null(T object, const char* message_template, const MessageArgs&... message_args) { - check_true(!bool(object), message_template, message_args...); - return object; - } - - /** - * Returns an InvalidRequestException with the specified message. - * - * @param message_template the template used to build the error message - * @param message_args the message arguments - * @return an InvalidRequestException with the specified message. - */ - template - exceptions::invalid_request_exception - invalid_request(const char* message_template, const MessageArgs&... message_args) { - return exceptions::invalid_request_exception(sprint(message_template, message_args...)); - } -} - -} -} diff --git a/scylla/cql3/statements/revoke_statement.cc b/scylla/cql3/statements/revoke_statement.cc deleted file mode 100644 index 0590cd7..0000000 --- a/scylla/cql3/statements/revoke_statement.cc +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright 2016 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "revoke_statement.hh" -#include "auth/authorizer.hh" - -future<::shared_ptr> -cql3::statements::revoke_statement::execute(distributed& proxy, service::query_state& state, const query_options& options) { - return auth::authorizer::get().revoke(state.get_client_state().user(), _permissions, _resource, _username).then([] { - return make_ready_future<::shared_ptr>(); - }); -} diff --git a/scylla/cql3/statements/revoke_statement.hh b/scylla/cql3/statements/revoke_statement.hh deleted file mode 100644 index 0e9019c..0000000 --- a/scylla/cql3/statements/revoke_statement.hh +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright 2016 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "permission_altering_statement.hh" - -namespace cql3 { - -namespace statements { - -class revoke_statement : public permission_altering_statement { -public: - using permission_altering_statement::permission_altering_statement; - - future<::shared_ptr> execute(distributed& - , service::query_state& - , const query_options&) override; -}; - -} - -} diff --git a/scylla/cql3/statements/schema_altering_statement.cc b/scylla/cql3/statements/schema_altering_statement.cc deleted file mode 100644 index 60b131a..0000000 --- a/scylla/cql3/statements/schema_altering_statement.cc +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "cql3/statements/schema_altering_statement.hh" - -#include "transport/messages/result_message.hh" - -namespace cql3 { - -namespace statements { - -schema_altering_statement::schema_altering_statement() - : cf_statement{::shared_ptr{}} - , _is_column_family_level{false} -{ -} - -schema_altering_statement::schema_altering_statement(::shared_ptr name) - : cf_statement{std::move(name)} - , _is_column_family_level{true} -{ -} - -bool schema_altering_statement::uses_function(const sstring& ks_name, const sstring& function_name) const -{ - return cf_statement::uses_function(ks_name, function_name); -} - -bool schema_altering_statement::depends_on_keyspace(const sstring& ks_name) const -{ - return false; -} - -bool schema_altering_statement::depends_on_column_family(const sstring& cf_name) const -{ - return false; -} - -uint32_t schema_altering_statement::get_bound_terms() -{ - return 0; -} - -void schema_altering_statement::prepare_keyspace(const service::client_state& state) -{ - if (_is_column_family_level) { - cf_statement::prepare_keyspace(state); - } -} - -future<::shared_ptr> -schema_altering_statement::execute0(distributed& proxy, service::query_state& state, const query_options& options, bool is_local_only) { - // If an IF [NOT] EXISTS clause was used, this may not result in an actual schema change. To avoid doing - // extra work in the drivers to handle schema changes, we return an empty message in this case. (CASSANDRA-7600) - return announce_migration(proxy, is_local_only).then([this] (auto ce) { - ::shared_ptr result; - if (!ce) { - result = ::make_shared(); - } else { - result = ::make_shared(ce); - } - return make_ready_future<::shared_ptr>(result); - }); -} - -future<::shared_ptr> -schema_altering_statement::execute(distributed& proxy, service::query_state& state, const query_options& options) { - return execute0(proxy, state, options, false); -} - -future<::shared_ptr> -schema_altering_statement::execute_internal(distributed& proxy, service::query_state& state, const query_options& options) { - return execute0(proxy, state, options, true); -} - -} - -} diff --git a/scylla/cql3/statements/schema_altering_statement.hh b/scylla/cql3/statements/schema_altering_statement.hh deleted file mode 100644 index 99e9e35..0000000 --- a/scylla/cql3/statements/schema_altering_statement.hh +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2014 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "transport/messages_fwd.hh" -#include "transport/event.hh" - -#include "cql3/statements/raw/cf_statement.hh" -#include "cql3/cql_statement.hh" - -#include "core/shared_ptr.hh" - -#include - -namespace cql3 { - -namespace statements { - -namespace messages = cql_transport::messages; - -/** - * Abstract class for statements that alter the schema. - */ -class schema_altering_statement : public raw::cf_statement, public cql_statement_no_metadata { -private: - const bool _is_column_family_level; - - future<::shared_ptr> - execute0(distributed& proxy, service::query_state& state, const query_options& options, bool); -protected: - schema_altering_statement(); - - schema_altering_statement(::shared_ptr name); - - virtual bool uses_function(const sstring& ks_name, const sstring& function_name) const override; - - virtual bool depends_on_keyspace(const sstring& ks_name) const override; - - virtual bool depends_on_column_family(const sstring& cf_name) const override; - - virtual uint32_t get_bound_terms() override; - - virtual void prepare_keyspace(const service::client_state& state) override; - - virtual future<::shared_ptr> announce_migration(distributed& proxy, bool is_local_only) = 0; - - virtual future<::shared_ptr> - execute(distributed& proxy, service::query_state& state, const query_options& options) override; - - virtual future<::shared_ptr> - execute_internal(distributed&, service::query_state& state, const query_options& options) override; -}; - -} - -} diff --git a/scylla/cql3/statements/select_statement.cc b/scylla/cql3/statements/select_statement.cc deleted file mode 100644 index def4e3f..0000000 --- a/scylla/cql3/statements/select_statement.cc +++ /dev/null @@ -1,675 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "cql3/statements/select_statement.hh" -#include "cql3/statements/raw/select_statement.hh" - -#include "transport/messages/result_message.hh" -#include "cql3/selection/selection.hh" -#include "cql3/util.hh" -#include "core/shared_ptr.hh" -#include "query-result-reader.hh" -#include "query_result_merger.hh" -#include "service/pager/query_pagers.hh" -#include -#include "view_info.hh" - -namespace cql3 { - -namespace statements { - -thread_local const shared_ptr select_statement::_default_parameters = ::make_shared(); - -select_statement::parameters::parameters() - : _is_distinct{false} - , _allow_filtering{false} -{ } - -select_statement::parameters::parameters(orderings_type orderings, - bool is_distinct, - bool allow_filtering) - : _orderings{std::move(orderings)} - , _is_distinct{is_distinct} - , _allow_filtering{allow_filtering} -{ } - -bool select_statement::parameters::is_distinct() { - return _is_distinct; -} - -bool select_statement::parameters::allow_filtering() { - return _allow_filtering; -} - -select_statement::parameters::orderings_type const& select_statement::parameters::orderings() { - return _orderings; -} - -select_statement::select_statement(schema_ptr schema, - uint32_t bound_terms, - ::shared_ptr parameters, - ::shared_ptr selection, - ::shared_ptr restrictions, - bool is_reversed, - ordering_comparator_type ordering_comparator, - ::shared_ptr limit, - cql_stats& stats) - : _schema(schema) - , _bound_terms(bound_terms) - , _parameters(std::move(parameters)) - , _selection(std::move(selection)) - , _restrictions(std::move(restrictions)) - , _is_reversed(is_reversed) - , _limit(std::move(limit)) - , _ordering_comparator(std::move(ordering_comparator)) - , _stats(stats) -{ - _opts = _selection->get_query_options(); -} - -bool select_statement::uses_function(const sstring& ks_name, const sstring& function_name) const { - return _selection->uses_function(ks_name, function_name) - || _restrictions->uses_function(ks_name, function_name) - || (_limit && _limit->uses_function(ks_name, function_name)); -} - -::shared_ptr select_statement::get_result_metadata() const { - // FIXME: COUNT needs special result metadata handling. - return _selection->get_result_metadata(); -} - -uint32_t select_statement::get_bound_terms() { - return _bound_terms; -} - -future<> select_statement::check_access(const service::client_state& state) { - try { - auto&& s = service::get_local_storage_proxy().get_db().local().find_schema(keyspace(), column_family()); - auto& cf_name = s->is_view() ? s->view_info()->base_name() : column_family(); - return state.has_column_family_access(keyspace(), cf_name, auth::permission::SELECT); - } catch (const no_such_column_family& e) { - // Will be validated afterwards. - return make_ready_future<>(); - } -} - -void select_statement::validate(distributed&, const service::client_state& state) { - // Nothing to do, all validation has been done by raw_statemet::prepare() -} - -bool select_statement::depends_on_keyspace(const sstring& ks_name) const { - return keyspace() == ks_name; -} - -bool select_statement::depends_on_column_family(const sstring& cf_name) const { - return column_family() == cf_name; -} - -const sstring& select_statement::keyspace() const { - return _schema->ks_name(); -} - -const sstring& select_statement::column_family() const { - return _schema->cf_name(); -} - -query::partition_slice -select_statement::make_partition_slice(const query_options& options) -{ - std::vector static_columns; - std::vector regular_columns; - - if (_selection->contains_static_columns()) { - static_columns.reserve(_selection->get_column_count()); - } - - regular_columns.reserve(_selection->get_column_count()); - - for (auto&& col : _selection->get_columns()) { - if (col->is_static()) { - static_columns.push_back(col->id); - } else if (col->is_regular()) { - regular_columns.push_back(col->id); - } - } - - if (_parameters->is_distinct()) { - _opts.set(query::partition_slice::option::distinct); - return query::partition_slice({ query::clustering_range::make_open_ended_both_sides() }, - std::move(static_columns), {}, _opts, nullptr, options.get_cql_serialization_format()); - } - - auto bounds = _restrictions->get_clustering_bounds(options); - if (_is_reversed) { - _opts.set(query::partition_slice::option::reversed); - std::reverse(bounds.begin(), bounds.end()); - } - return query::partition_slice(std::move(bounds), - std::move(static_columns), std::move(regular_columns), _opts, nullptr, options.get_cql_serialization_format()); -} - -int32_t select_statement::get_limit(const query_options& options) const { - if (!_limit) { - return std::numeric_limits::max(); - } - - auto val = _limit->bind_and_get(options); - if (val.is_null()) { - throw exceptions::invalid_request_exception("Invalid null value of limit"); - } - if (val.is_unset_value()) { - return std::numeric_limits::max(); - } - try { - int32_type->validate(*val); - auto l = value_cast(int32_type->deserialize(*val)); - if (l <= 0) { - throw exceptions::invalid_request_exception("LIMIT must be strictly positive"); - } - return l; - } catch (const marshal_exception& e) { - throw exceptions::invalid_request_exception("Invalid limit value"); - } -} - -bool select_statement::needs_post_query_ordering() const { - // We need post-query ordering only for queries with IN on the partition key and an ORDER BY. - return _restrictions->key_is_in_relation() && !_parameters->orderings().empty(); -} - -struct select_statement_executor { - static auto get() { return &select_statement::do_execute; } -}; -static thread_local auto select_stage = seastar::make_execution_stage("cql3_select", select_statement_executor::get()); - -future> -select_statement::execute(distributed& proxy, - service::query_state& state, - const query_options& options) -{ - return select_stage(this, seastar::ref(proxy), seastar::ref(state), seastar::cref(options)); -} - -future> -select_statement::do_execute(distributed& proxy, - service::query_state& state, - const query_options& options) -{ - tracing::add_table_name(state.get_trace_state(), keyspace(), column_family()); - - auto cl = options.get_consistency(); - - validate_for_read(_schema->ks_name(), cl); - - int32_t limit = get_limit(options); - auto now = gc_clock::now(); - - ++_stats.reads; - - auto command = ::make_lw_shared(_schema->id(), _schema->version(), - make_partition_slice(options), limit, now, tracing::make_trace_info(state.get_trace_state()), query::max_partitions, options.get_timestamp(state)); - - int32_t page_size = options.get_page_size(); - - // An aggregation query will never be paged for the user, but we always page it internally to avoid OOM. - // If we user provided a page_size we'll use that to page internally (because why not), otherwise we use our default - // Note that if there are some nodes in the cluster with a version less than 2.0, we can't use paging (CASSANDRA-6707). - auto aggregate = _selection->is_aggregate(); - if (aggregate && page_size <= 0) { - page_size = DEFAULT_COUNT_PAGE_SIZE; - } - - auto key_ranges = _restrictions->get_partition_key_ranges(options); - - if (!aggregate && (page_size <= 0 - || !service::pager::query_pagers::may_need_paging(page_size, - *command, key_ranges))) { - return execute(proxy, command, std::move(key_ranges), state, options, now); - } - - command->slice.options.set(); - auto p = service::pager::query_pagers::pager(_schema, _selection, - state, options, command, std::move(key_ranges)); - - if (aggregate) { - return do_with( - cql3::selection::result_set_builder(*_selection, now, - options.get_cql_serialization_format()), - [p, page_size, now](auto& builder) { - return do_until([p] {return p->is_exhausted();}, - [p, &builder, page_size, now] { - return p->fetch_page(builder, page_size, now); - } - ).then([&builder] { - auto rs = builder.build(); - auto msg = ::make_shared(std::move(rs)); - return make_ready_future>(std::move(msg)); - }); - }); - } - - if (needs_post_query_ordering()) { - throw exceptions::invalid_request_exception( - "Cannot page queries with both ORDER BY and a IN restriction on the partition key;" - " you must either remove the ORDER BY or the IN and sort client side, or disable paging for this query"); - } - - return p->fetch_page(page_size, now).then( - [this, p, &options, limit, now](std::unique_ptr rs) { - - if (!p->is_exhausted()) { - rs->get_metadata().set_has_more_pages(p->state()); - } - - auto msg = ::make_shared(std::move(rs)); - return make_ready_future>(std::move(msg)); - }); -} - -future> -select_statement::execute(distributed& proxy, - lw_shared_ptr cmd, - dht::partition_range_vector&& partition_ranges, - service::query_state& state, - const query_options& options, - gc_clock::time_point now) -{ - // If this is a query with IN on partition key, ORDER BY clause and LIMIT - // is specified we need to get "limit" rows from each partition since there - // is no way to tell which of these rows belong to the query result before - // doing post-query ordering. - if (needs_post_query_ordering() && _limit) { - return do_with(std::forward(partition_ranges), [this, &proxy, &state, &options, cmd](auto prs) { - assert(cmd->partition_limit == query::max_partitions); - query::result_merger merger(cmd->row_limit * prs.size(), query::max_partitions); - return map_reduce(prs.begin(), prs.end(), [this, &proxy, &state, &options, cmd] (auto pr) { - dht::partition_range_vector prange { pr }; - auto command = ::make_lw_shared(*cmd); - return proxy.local().query(_schema, command, std::move(prange), options.get_consistency(), state.get_trace_state()); - }, std::move(merger)); - }).then([this, &options, now, cmd] (auto result) { - return this->process_results(std::move(result), cmd, options, now); - }); - } else { - return proxy.local().query(_schema, cmd, std::move(partition_ranges), options.get_consistency(), state.get_trace_state()) - .then([this, &options, now, cmd] (auto result) { - return this->process_results(std::move(result), cmd, options, now); - }); - } -} - -future<::shared_ptr> -select_statement::execute_internal(distributed& proxy, - service::query_state& state, - const query_options& options) -{ - int32_t limit = get_limit(options); - auto now = gc_clock::now(); - auto command = ::make_lw_shared(_schema->id(), _schema->version(), - make_partition_slice(options), limit, now, std::experimental::nullopt, query::max_partitions, options.get_timestamp(state)); - auto partition_ranges = _restrictions->get_partition_key_ranges(options); - - tracing::add_table_name(state.get_trace_state(), keyspace(), column_family()); - - ++_stats.reads; - - if (needs_post_query_ordering() && _limit) { - return do_with(std::move(partition_ranges), [this, &proxy, &state, command] (auto prs) { - assert(command->partition_limit == query::max_partitions); - query::result_merger merger(command->row_limit * prs.size(), query::max_partitions); - return map_reduce(prs.begin(), prs.end(), [this, &proxy, &state, command] (auto pr) { - dht::partition_range_vector prange { pr }; - auto cmd = ::make_lw_shared(*command); - return proxy.local().query(_schema, cmd, std::move(prange), db::consistency_level::ONE, state.get_trace_state()); - }, std::move(merger)); - }).then([command, this, &options, now] (auto result) { - return this->process_results(std::move(result), command, options, now); - }).finally([command] { }); - } else { - return proxy.local().query(_schema, command, std::move(partition_ranges), db::consistency_level::ONE, state.get_trace_state()).then([command, this, &options, now] (auto result) { - return this->process_results(std::move(result), command, options, now); - }).finally([command] {}); - } -} - -shared_ptr -select_statement::process_results(foreign_ptr> results, - lw_shared_ptr cmd, - const query_options& options, - gc_clock::time_point now) -{ - cql3::selection::result_set_builder builder(*_selection, now, - options.get_cql_serialization_format()); - query::result_view::consume(*results, cmd->slice, - cql3::selection::result_set_builder::visitor(builder, *_schema, - *_selection)); - auto rs = builder.build(); - - if (needs_post_query_ordering()) { - rs->sort(_ordering_comparator); - if (_is_reversed) { - rs->reverse(); - } - rs->trim(cmd->row_limit); - } - return ::make_shared(std::move(rs)); -} - -::shared_ptr select_statement::get_restrictions() const { - return _restrictions; -} - -namespace raw { - -select_statement::select_statement(::shared_ptr cf_name, - ::shared_ptr parameters, - std::vector<::shared_ptr> select_clause, - std::vector<::shared_ptr> where_clause, - ::shared_ptr limit) - : cf_statement(std::move(cf_name)) - , _parameters(std::move(parameters)) - , _select_clause(std::move(select_clause)) - , _where_clause(std::move(where_clause)) - , _limit(std::move(limit)) -{ } - -std::unique_ptr select_statement::prepare(database& db, cql_stats& stats, bool for_view) { - schema_ptr schema = validation::validate_column_family(db, keyspace(), column_family()); - auto bound_names = get_bound_variables(); - - auto selection = _select_clause.empty() - ? selection::selection::wildcard(schema) - : selection::selection::from_selectors(db, schema, _select_clause); - - auto restrictions = prepare_restrictions(db, schema, bound_names, selection, for_view); - - if (_parameters->is_distinct()) { - validate_distinct_selection(schema, selection, restrictions); - } - - select_statement::ordering_comparator_type ordering_comparator; - bool is_reversed_ = false; - - if (!_parameters->orderings().empty()) { - assert(!for_view); - verify_ordering_is_allowed(restrictions); - ordering_comparator = get_ordering_comparator(schema, selection, restrictions); - is_reversed_ = is_reversed(schema); - } - - check_needs_filtering(restrictions); - - auto stmt = ::make_shared(schema, - bound_names->size(), - _parameters, - std::move(selection), - std::move(restrictions), - is_reversed_, - std::move(ordering_comparator), - prepare_limit(db, bound_names), - stats); - - auto partition_key_bind_indices = bound_names->get_partition_key_bind_indexes(schema); - - return std::make_unique(std::move(stmt), std::move(*bound_names), std::move(partition_key_bind_indices)); -} - -::shared_ptr -select_statement::prepare_restrictions(database& db, - schema_ptr schema, - ::shared_ptr bound_names, - ::shared_ptr selection, - bool for_view) -{ - try { - return ::make_shared(db, schema, statement_type::SELECT, std::move(_where_clause), bound_names, - selection->contains_only_static_columns(), selection->contains_a_collection(), for_view); - } catch (const exceptions::unrecognized_entity_exception& e) { - if (contains_alias(e.entity)) { - throw exceptions::invalid_request_exception(sprint("Aliases aren't allowed in the where clause ('%s')", e.relation->to_string())); - } - throw; - } -} - -/** Returns a ::shared_ptr for the limit or null if no limit is set */ -::shared_ptr -select_statement::prepare_limit(database& db, ::shared_ptr bound_names) -{ - if (!_limit) { - return {}; - } - - auto prep_limit = _limit->prepare(db, keyspace(), limit_receiver()); - prep_limit->collect_marker_specification(bound_names); - return prep_limit; -} - -void select_statement::verify_ordering_is_allowed(::shared_ptr restrictions) -{ - if (restrictions->uses_secondary_indexing()) { - throw exceptions::invalid_request_exception("ORDER BY with 2ndary indexes is not supported."); - } - if (restrictions->is_key_range()) { - throw exceptions::invalid_request_exception("ORDER BY is only supported when the partition key is restricted by an EQ or an IN."); - } -} - -void select_statement::validate_distinct_selection(schema_ptr schema, - ::shared_ptr selection, - ::shared_ptr restrictions) -{ - for (auto&& def : selection->get_columns()) { - if (!def->is_partition_key() && !def->is_static()) { - throw exceptions::invalid_request_exception(sprint( - "SELECT DISTINCT queries must only request partition key columns and/or static columns (not %s)", - def->name_as_text())); - } - } - - // If it's a key range, we require that all partition key columns are selected so we don't have to bother - // with post-query grouping. - if (!restrictions->is_key_range()) { - return; - } - - for (auto&& def : schema->partition_key_columns()) { - if (!selection->has_column(def)) { - throw exceptions::invalid_request_exception(sprint( - "SELECT DISTINCT queries must request all the partition key columns (missing %s)", def.name_as_text())); - } - } -} - -void select_statement::handle_unrecognized_ordering_column(::shared_ptr column) -{ - if (contains_alias(column)) { - throw exceptions::invalid_request_exception(sprint("Aliases are not allowed in order by clause ('%s')", *column)); - } - throw exceptions::invalid_request_exception(sprint("Order by on unknown column %s", *column)); -} - -select_statement::ordering_comparator_type -select_statement::get_ordering_comparator(schema_ptr schema, - ::shared_ptr selection, - ::shared_ptr restrictions) -{ - if (!restrictions->key_is_in_relation()) { - return {}; - } - - std::vector> sorters; - sorters.reserve(_parameters->orderings().size()); - - // If we order post-query (see orderResults), the sorted column needs to be in the ResultSet for sorting, - // even if we don't - // ultimately ship them to the client (CASSANDRA-4911). - for (auto&& e : _parameters->orderings()) { - auto&& raw = e.first; - ::shared_ptr column = raw->prepare_column_identifier(schema); - const column_definition* def = schema->get_column_definition(column->name()); - if (!def) { - handle_unrecognized_ordering_column(column); - } - auto index = selection->index_of(*def); - if (index < 0) { - index = selection->add_column_for_ordering(*def); - } - - sorters.emplace_back(index, def->type); - } - - return [sorters = std::move(sorters)] (const result_row_type& r1, const result_row_type& r2) mutable { - for (auto&& e : sorters) { - auto& c1 = r1[e.first]; - auto& c2 = r2[e.first]; - auto type = e.second; - - if (bool(c1) != bool(c2)) { - return bool(c2); - } - if (c1) { - int result = type->compare(*c1, *c2); - if (result != 0) { - return result < 0; - } - } - } - return false; - }; -} - -bool select_statement::is_reversed(schema_ptr schema) { - assert(_parameters->orderings().size() > 0); - parameters::orderings_type::size_type i = 0; - bool is_reversed_ = false; - bool relation_order_unsupported = false; - - for (auto&& e : _parameters->orderings()) { - ::shared_ptr column = e.first->prepare_column_identifier(schema); - bool reversed = e.second; - - auto def = schema->get_column_definition(column->name()); - if (!def) { - handle_unrecognized_ordering_column(column); - } - - if (!def->is_clustering_key()) { - throw exceptions::invalid_request_exception(sprint( - "Order by is currently only supported on the clustered columns of the PRIMARY KEY, got %s", *column)); - } - - if (i != def->component_index()) { - throw exceptions::invalid_request_exception( - "Order by currently only support the ordering of columns following their declared order in the PRIMARY KEY"); - } - - bool current_reverse_status = (reversed != def->type->is_reversed()); - - if (i == 0) { - is_reversed_ = current_reverse_status; - } - - if (is_reversed_ != current_reverse_status) { - relation_order_unsupported = true; - } - ++i; - } - - if (relation_order_unsupported) { - throw exceptions::invalid_request_exception("Unsupported order by relation"); - } - - return is_reversed_; -} - -/** If ALLOW FILTERING was not specified, this verifies that it is not needed */ -void select_statement::check_needs_filtering(::shared_ptr restrictions) -{ - // non-key-range non-indexed queries cannot involve filtering underneath - if (!_parameters->allow_filtering() && (restrictions->is_key_range() || restrictions->uses_secondary_indexing())) { - // We will potentially filter data if either: - // - Have more than one IndexExpression - // - Have no index expression and the column filter is not the identity - if (restrictions->need_filtering()) { - throw exceptions::invalid_request_exception( - "Cannot execute this query as it might involve data filtering and " - "thus may have unpredictable performance. If you want to execute " - "this query despite the performance unpredictability, use ALLOW FILTERING"); - } - } -} - -bool select_statement::contains_alias(::shared_ptr name) { - return std::any_of(_select_clause.begin(), _select_clause.end(), [name] (auto raw) { - return raw->alias && *name == *raw->alias; - }); -} - -::shared_ptr select_statement::limit_receiver() { - return ::make_shared(keyspace(), column_family(), ::make_shared("[limit]", true), - int32_type); -} - -} - -} - -namespace util { - -shared_ptr build_select_statement( - const sstring_view& cf_name, - const sstring_view& where_clause, - std::vector included_columns) { - std::ostringstream out; - out << "SELECT "; - if (included_columns.empty()) { - out << "*"; - } else { - out << join(", ", included_columns); - } - out << " FROM " << cf_name << " WHERE " << where_clause << " ALLOW FILTERING"; - return do_with_parser(out.str(), std::mem_fn(&cql3_parser::CqlParser::selectStatement)); -} - -} - -} diff --git a/scylla/cql3/statements/select_statement.hh b/scylla/cql3/statements/select_statement.hh deleted file mode 100644 index 32ab909..0000000 --- a/scylla/cql3/statements/select_statement.hh +++ /dev/null @@ -1,425 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "cql3/statements/raw/cf_statement.hh" -#include "cql3/statements/raw/select_statement.hh" -#include "cql3/cql_statement.hh" -#include "cql3/selection/selection.hh" -#include "cql3/selection/raw_selector.hh" -#include "cql3/restrictions/statement_restrictions.hh" -#include "cql3/result_set.hh" -#include "exceptions/unrecognized_entity_exception.hh" -#include "service/client_state.hh" -#include "core/shared_ptr.hh" -#include "core/distributed.hh" -#include "validation.hh" - -namespace cql3 { - -namespace statements { - -/** - * Encapsulates a completely parsed SELECT query, including the target - * column family, expression, result count, and ordering clause. - * - */ -class select_statement : public cql_statement { -public: - using parameters = raw::select_statement::parameters; -private: - static constexpr int DEFAULT_COUNT_PAGE_SIZE = 10000; - static thread_local const ::shared_ptr _default_parameters; - schema_ptr _schema; - uint32_t _bound_terms; - ::shared_ptr _parameters; - ::shared_ptr _selection; - ::shared_ptr _restrictions; - bool _is_reversed; - ::shared_ptr _limit; - - template - using compare_fn = raw::select_statement::compare_fn; - - using result_row_type = raw::select_statement::result_row_type; - using ordering_comparator_type = raw::select_statement::ordering_comparator_type; - - /** - * The comparator used to orders results when multiple keys are selected (using IN). - */ - ordering_comparator_type _ordering_comparator; - - query::partition_slice::option_set _opts; - cql_stats& _stats; -private: - future<::shared_ptr> do_execute(distributed& proxy, - service::query_state& state, const query_options& options); - friend class select_statement_executor; -public: - select_statement(schema_ptr schema, - uint32_t bound_terms, - ::shared_ptr parameters, - ::shared_ptr selection, - ::shared_ptr restrictions, - bool is_reversed, - ordering_comparator_type ordering_comparator, - ::shared_ptr limit, - cql_stats& stats); - - virtual bool uses_function(const sstring& ks_name, const sstring& function_name) const override; - - virtual ::shared_ptr get_result_metadata() const override; - virtual uint32_t get_bound_terms() override; - virtual future<> check_access(const service::client_state& state) override; - virtual void validate(distributed&, const service::client_state& state) override; - virtual bool depends_on_keyspace(const sstring& ks_name) const; - virtual bool depends_on_column_family(const sstring& cf_name) const; - - virtual future<::shared_ptr> execute(distributed& proxy, - service::query_state& state, const query_options& options) override; - - virtual future<::shared_ptr> execute_internal(distributed& proxy, - service::query_state& state, const query_options& options) override; - - future<::shared_ptr> execute(distributed& proxy, - lw_shared_ptr cmd, dht::partition_range_vector&& partition_ranges, service::query_state& state, - const query_options& options, gc_clock::time_point now); - - shared_ptr process_results(foreign_ptr> results, - lw_shared_ptr cmd, const query_options& options, gc_clock::time_point now); -#if 0 - private ResultMessage.Rows pageAggregateQuery(QueryPager pager, QueryOptions options, int pageSize, long now) - throws RequestValidationException, RequestExecutionException - { - Selection.ResultSetBuilder result = _selection->resultSetBuilder(now); - while (!pager.isExhausted()) - { - for (org.apache.cassandra.db.Row row : pager.fetchPage(pageSize)) - { - // Not columns match the query, skip - if (row.cf == null) - continue; - - processColumnFamily(row.key.getKey(), row.cf, options, now, result); - } - } - return new ResultMessage.Rows(result.build(options.getProtocolVersion())); - } - - static List readLocally(String keyspaceName, List cmds) - { - Keyspace keyspace = Keyspace.open(keyspaceName); - List rows = new ArrayList(cmds.size()); - for (ReadCommand cmd : cmds) - rows.add(cmd.getRow(keyspace)); - return rows; - } - - public ResultMessage.Rows executeInternal(QueryState state, QueryOptions options) throws RequestExecutionException, RequestValidationException - { - int limit = getLimit(options); - long now = System.currentTimeMillis(); - Pageable command = getPageableCommand(options, limit, now); - List rows = command == null - ? Collections.emptyList() - : (command instanceof Pageable.ReadCommands - ? readLocally(keyspace(), ((Pageable.ReadCommands)command).commands) - : ((RangeSliceCommand)command).executeLocally()); - - return processResults(rows, options, limit, now); - } - - public ResultSet process(List rows) throws InvalidRequestException - { - QueryOptions options = QueryOptions.DEFAULT; - return process(rows, options, getLimit(options), System.currentTimeMillis()); - } -#endif - - const sstring& keyspace() const; - - const sstring& column_family() const; - - query::partition_slice make_partition_slice(const query_options& options); - - ::shared_ptr get_restrictions() const; - -#if 0 - private SliceQueryFilter sliceFilter(ColumnSlice slice, int limit, int toGroup) - { - return sliceFilter(new ColumnSlice[]{ slice }, limit, toGroup); - } - - private SliceQueryFilter sliceFilter(ColumnSlice[] slices, int limit, int toGroup) - { - assert ColumnSlice.validateSlices(slices, _schema.comparator, _is_reversed) : String.format("Invalid slices: " + Arrays.toString(slices) + (_is_reversed ? " (reversed)" : "")); - return new SliceQueryFilter(slices, _is_reversed, limit, toGroup); - } -#endif - -private: - int32_t get_limit(const query_options& options) const; - bool needs_post_query_ordering() const; - -#if 0 - private int updateLimitForQuery(int limit) - { - // Internally, we don't support exclusive bounds for slices. Instead, we query one more element if necessary - // and exclude it later (in processColumnFamily) - return restrictions.isNonCompositeSliceWithExclusiveBounds() && limit != Integer.MAX_VALUE - ? limit + 1 - : limit; - } - - private SortedSet getRequestedColumns(QueryOptions options) throws InvalidRequestException - { - // Note: getRequestedColumns don't handle static columns, but due to CASSANDRA-5762 - // we always do a slice for CQL3 tables, so it's ok to ignore them here - assert !restrictions.isColumnRange(); - SortedSet columns = new TreeSet(cfm.comparator); - for (Composite composite : restrictions.getClusteringColumnsAsComposites(options)) - columns.addAll(addSelectedColumns(composite)); - return columns; - } - - private SortedSet addSelectedColumns(Composite prefix) - { - if (cfm.comparator.isDense()) - { - return FBUtilities.singleton(cfm.comparator.create(prefix, null), cfm.comparator); - } - else - { - SortedSet columns = new TreeSet(cfm.comparator); - - // We need to query the selected column as well as the marker - // column (for the case where the row exists but has no columns outside the PK) - // Two exceptions are "static CF" (non-composite non-compact CF) and "super CF" - // that don't have marker and for which we must query all columns instead - if (cfm.comparator.isCompound() && !cfm.isSuper()) - { - // marker - columns.add(cfm.comparator.rowMarker(prefix)); - - // selected columns - for (ColumnDefinition def : selection.getColumns()) - if (def.isRegular() || def.isStatic()) - columns.add(cfm.comparator.create(prefix, def)); - } - else - { - // We now that we're not composite so we can ignore static columns - for (ColumnDefinition def : cfm.regularColumns()) - columns.add(cfm.comparator.create(prefix, def)); - } - return columns; - } - } - - public List getValidatedIndexExpressions(QueryOptions options) throws InvalidRequestException - { - if (!restrictions.usesSecondaryIndexing()) - return Collections.emptyList(); - - List expressions = restrictions.getIndexExpressions(options); - - ColumnFamilyStore cfs = Keyspace.open(keyspace()).getColumnFamilyStore(columnFamily()); - SecondaryIndexManager secondaryIndexManager = cfs.indexManager; - secondaryIndexManager.validateIndexSearchersForQuery(expressions); - - return expressions; - } - - private CellName makeExclusiveSliceBound(Bound bound, CellNameType type, QueryOptions options) throws InvalidRequestException - { - if (restrictions.areRequestedBoundsInclusive(bound)) - return null; - - return type.makeCellName(restrictions.getClusteringColumnsBounds(bound, options).get(0)); - } - - private Iterator applySliceRestriction(final Iterator cells, final QueryOptions options) throws InvalidRequestException - { - final CellNameType type = cfm.comparator; - - final CellName excludedStart = makeExclusiveSliceBound(Bound.START, type, options); - final CellName excludedEnd = makeExclusiveSliceBound(Bound.END, type, options); - - return Iterators.filter(cells, new Predicate() - { - public boolean apply(Cell c) - { - // For dynamic CF, the column could be out of the requested bounds (because we don't support strict bounds internally (unless - // the comparator is composite that is)), filter here - return !((excludedStart != null && type.compare(c.name(), excludedStart) == 0) - || (excludedEnd != null && type.compare(c.name(), excludedEnd) == 0)); - } - }); - } - - private ResultSet process(List rows, QueryOptions options, int limit, long now) throws InvalidRequestException - { - Selection.ResultSetBuilder result = selection.resultSetBuilder(now); - for (org.apache.cassandra.db.Row row : rows) - { - // Not columns match the query, skip - if (row.cf == null) - continue; - - processColumnFamily(row.key.getKey(), row.cf, options, now, result); - } - - ResultSet cqlRows = result.build(options.getProtocolVersion()); - - orderResults(cqlRows); - - // Internal calls always return columns in the comparator order, even when reverse was set - if (isReversed) - cqlRows.reverse(); - - // Trim result if needed to respect the user limit - cqlRows.trim(limit); - return cqlRows; - } - - // Used by ModificationStatement for CAS operations - void processColumnFamily(ByteBuffer key, ColumnFamily cf, QueryOptions options, long now, Selection.ResultSetBuilder result) - throws InvalidRequestException - { - CFMetaData cfm = cf.metadata(); - ByteBuffer[] keyComponents = null; - if (cfm.getKeyValidator() instanceof CompositeType) - { - keyComponents = ((CompositeType)cfm.getKeyValidator()).split(key); - } - else - { - keyComponents = new ByteBuffer[]{ key }; - } - - Iterator cells = cf.getSortedColumns().iterator(); - if (restrictions.isNonCompositeSliceWithExclusiveBounds()) - cells = applySliceRestriction(cells, options); - - CQL3Row.RowIterator iter = cfm.comparator.CQL3RowBuilder(cfm, now).group(cells); - - // If there is static columns but there is no non-static row, then provided the select was a full - // partition selection (i.e. not a 2ndary index search and there was no condition on clustering columns) - // then we want to include the static columns in the result set (and we're done). - CQL3Row staticRow = iter.getStaticRow(); - if (staticRow != null && !iter.hasNext() && !restrictions.usesSecondaryIndexing() && restrictions.hasNoClusteringColumnsRestriction()) - { - result.newRow(options.getProtocolVersion()); - for (ColumnDefinition def : selection.getColumns()) - { - switch (def.kind) - { - case PARTITION_KEY: - result.add(keyComponents[def.position()]); - break; - case STATIC: - addValue(result, def, staticRow, options); - break; - default: - result.add((ByteBuffer)null); - } - } - return; - } - - while (iter.hasNext()) - { - CQL3Row cql3Row = iter.next(); - - // Respect requested order - result.newRow(options.getProtocolVersion()); - // Respect selection order - for (ColumnDefinition def : selection.getColumns()) - { - switch (def.kind) - { - case PARTITION_KEY: - result.add(keyComponents[def.position()]); - break; - case CLUSTERING_COLUMN: - result.add(cql3Row.getClusteringColumn(def.position())); - break; - case COMPACT_VALUE: - result.add(cql3Row.getColumn(null)); - break; - case REGULAR: - addValue(result, def, cql3Row, options); - break; - case STATIC: - addValue(result, def, staticRow, options); - break; - } - } - } - } - - private static void addValue(Selection.ResultSetBuilder result, ColumnDefinition def, CQL3Row row, QueryOptions options) - { - if (row == null) - { - result.add((ByteBuffer)null); - return; - } - - if (def.type.isMultiCell()) - { - List cells = row.getMultiCellColumn(def.name); - ByteBuffer buffer = cells == null - ? null - : ((CollectionType)def.type).serializeForNativeProtocol(cells, options.getProtocolVersion()); - result.add(buffer); - return; - } - - result.add(row.getColumn(def.name)); - } -#endif -}; - -} - -} diff --git a/scylla/cql3/statements/statement_type.hh b/scylla/cql3/statements/statement_type.hh deleted file mode 100644 index ae46be7..0000000 --- a/scylla/cql3/statements/statement_type.hh +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2017 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include -#include "to_string.hh" -#include "schema.hh" -#include "cql3/restrictions/restrictions.hh" -#include "cql3/restrictions/primary_key_restrictions.hh" -#include "cql3/restrictions/single_column_restrictions.hh" -#include "cql3/relation.hh" -#include "cql3/variable_specifications.hh" - -namespace cql3 { - -namespace statements { - -class statement_type final { - enum class type { - insert, - update, - del, - select - }; - const type _type; - - statement_type(type t) : _type(t) { - } -public: - statement_type() = delete; - - bool is_insert() const { - return _type == type::insert; - } - bool is_update() const { - return _type == type::update; - } - bool is_delete() const { - return _type == type::del; - } - bool is_select() const { - return _type == type::select; - } - - static const statement_type INSERT; - static const statement_type UPDATE; - static const statement_type DELETE; - static const statement_type SELECT; - - bool operator==(const statement_type& other) const { - return _type == other._type; - } - - bool operator!=(const statement_type& other) const { - return !(_type == other._type); - } - - friend std::ostream &operator<<(std::ostream &os, const statement_type& t) { - switch (t._type) { - case type::insert: return os << "INSERT"; - case type::update: return os << "UPDATE"; - case type::del: return os << "DELETE"; - case type::select : return os << "SELECT"; - } - return os; - } -}; - -} -} diff --git a/scylla/cql3/statements/truncate_statement.cc b/scylla/cql3/statements/truncate_statement.cc deleted file mode 100644 index f36b288..0000000 --- a/scylla/cql3/statements/truncate_statement.cc +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2014 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "cql3/statements/truncate_statement.hh" -#include "cql3/statements/prepared_statement.hh" -#include "cql3/cql_statement.hh" - -#include - -namespace cql3 { - -namespace statements { - -truncate_statement::truncate_statement(::shared_ptr name) - : cf_statement{std::move(name)} -{ -} - -uint32_t truncate_statement::get_bound_terms() -{ - return 0; -} - -std::unique_ptr truncate_statement::prepare(database& db,cql_stats& stats) -{ - return std::make_unique(this->shared_from_this()); -} - -bool truncate_statement::uses_function(const sstring& ks_name, const sstring& function_name) const -{ - return parsed_statement::uses_function(ks_name, function_name); -} - -bool truncate_statement::depends_on_keyspace(const sstring& ks_name) const -{ - return false; -} - -bool truncate_statement::depends_on_column_family(const sstring& cf_name) const -{ - return false; -} - -future<> truncate_statement::check_access(const service::client_state& state) -{ - return state.has_column_family_access(keyspace(), column_family(), auth::permission::MODIFY); -} - -void truncate_statement::validate(distributed&, const service::client_state& state) -{ - warn(unimplemented::cause::VALIDATION); -#if 0 - ThriftValidation.validateColumnFamily(keyspace(), columnFamily()); -#endif -} - -future<::shared_ptr> -truncate_statement::execute(distributed& proxy, service::query_state& state, const query_options& options) -{ - return service::get_local_storage_proxy().truncate_blocking(keyspace(), column_family()).handle_exception([](auto ep) { - throw exceptions::truncate_exception(ep); - }).then([] { - return ::shared_ptr{}; - }); -} - -future<::shared_ptr> -truncate_statement::execute_internal(distributed& proxy, service::query_state& state, const query_options& options) -{ - throw std::runtime_error("unsupported operation"); -} - -} - -} diff --git a/scylla/cql3/statements/truncate_statement.hh b/scylla/cql3/statements/truncate_statement.hh deleted file mode 100644 index 6a05b62..0000000 --- a/scylla/cql3/statements/truncate_statement.hh +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2014 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "cql3/statements/raw/cf_statement.hh" -#include "cql3/cql_statement.hh" - -#include - -namespace cql3 { - -namespace statements { - -class truncate_statement : public raw::cf_statement, public cql_statement_no_metadata, public ::enable_shared_from_this { -public: - truncate_statement(::shared_ptr name); - - virtual uint32_t get_bound_terms() override; - - virtual std::unique_ptr prepare(database& db,cql_stats& stats) override; - - virtual bool uses_function(const sstring& ks_name, const sstring& function_name) const override; - - virtual bool depends_on_keyspace(const sstring& ks_name) const override; - - virtual bool depends_on_column_family(const sstring& cf_name) const override; - - virtual future<> check_access(const service::client_state& state) override; - - virtual void validate(distributed&, const service::client_state& state) override; - - virtual future<::shared_ptr> - execute(distributed& proxy, service::query_state& state, const query_options& options) override; - - virtual future<::shared_ptr> - execute_internal(distributed& proxy, service::query_state& state, const query_options& options) override; -}; - -} - -} diff --git a/scylla/cql3/statements/update_statement.cc b/scylla/cql3/statements/update_statement.cc deleted file mode 100644 index 04ab5f2..0000000 --- a/scylla/cql3/statements/update_statement.cc +++ /dev/null @@ -1,218 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "update_statement.hh" -#include "raw/update_statement.hh" -#include "raw/insert_statement.hh" -#include "unimplemented.hh" - -#include "cql3/operation_impl.hh" - -namespace cql3 { - -namespace statements { - -update_statement::update_statement(statement_type type, uint32_t bound_terms, schema_ptr s, std::unique_ptr attrs, uint64_t* cql_stats_counter_ptr) - : modification_statement{type, bound_terms, std::move(s), std::move(attrs), cql_stats_counter_ptr} -{ } - -bool update_statement::require_full_clustering_key() const { - return true; -} - -bool update_statement::allow_clustering_key_slices() const { - return false; -} - -void update_statement::add_update_for_key(mutation& m, const query::clustering_range& range, const update_parameters& params) { - auto prefix = range.start() ? std::move(range.start()->value()) : clustering_key_prefix::make_empty(); - if (s->is_dense()) { - if (prefix.is_empty(*s) || prefix.components().front().empty()) { - throw exceptions::invalid_request_exception(sprint("Missing PRIMARY KEY part %s", s->clustering_key_columns().begin()->name_as_text())); - } - // An empty name for the value is what we use to recognize the case where there is not column - // outside the PK, see CreateStatement. - // Since v3 schema we use empty_type instead, see schema.cc. - auto rb = s->regular_begin(); - if (rb->name().empty() || rb->type == empty_type) { - // There is no column outside the PK. So no operation could have passed through validation - assert(_column_operations.empty()); - constants::setter(*s->regular_begin(), make_shared(constants::value(cql3::raw_value::make_value(bytes())))).execute(m, prefix, params); - } else { - // dense means we don't have a row marker, so don't accept to set only the PK. See CASSANDRA-5648. - if (_column_operations.empty()) { - throw exceptions::invalid_request_exception(sprint("Column %s is mandatory for this COMPACT STORAGE table", s->regular_begin()->name_as_text())); - } - } - } else { - // If there are static columns, there also must be clustering columns, in which - // case empty prefix can only refer to the static row. - bool is_static_prefix = s->has_static_columns() && prefix.is_empty(*s); - if (type.is_insert() && !is_static_prefix && s->is_cql3_table()) { - auto& row = m.partition().clustered_row(*s, prefix); - row.apply(row_marker(params.timestamp(), params.ttl(), params.expiry())); - } - } - - for (auto&& update : _column_operations) { - update->execute(m, prefix, params); - } - - warn(unimplemented::cause::INDEXES); -#if 0 - SecondaryIndexManager indexManager = Keyspace.open(cfm.ksName).getColumnFamilyStore(cfm.cfId).indexManager; - if (indexManager.hasIndexes()) - { - for (Cell cell : cf) - { - // Indexed values must be validated by any applicable index. See CASSANDRA-3057/4240/8081 for more details - if (!indexManager.validate(cell)) - throw new InvalidRequestException(String.format("Can't index column value of size %d for index %s on %s.%s", - cell.value().remaining(), - cfm.getColumnDefinition(cell.name()).getIndexName(), - cfm.ksName, - cfm.cfName)); - } - } - } -#endif -} - -namespace raw { - -insert_statement::insert_statement( ::shared_ptr name, - ::shared_ptr attrs, - std::vector<::shared_ptr> column_names, - std::vector<::shared_ptr> column_values, - bool if_not_exists) - : raw::modification_statement{std::move(name), std::move(attrs), conditions_vector{}, if_not_exists, false} - , _column_names{std::move(column_names)} - , _column_values{std::move(column_values)} -{ } - -::shared_ptr -insert_statement::prepare_internal(database& db, schema_ptr schema, - ::shared_ptr bound_names, std::unique_ptr attrs, cql_stats& stats) -{ - auto stmt = ::make_shared(statement_type::INSERT, bound_names->size(), schema, std::move(attrs), &stats.inserts); - - // Created from an INSERT - if (stmt->is_counter()) { - throw exceptions::invalid_request_exception("INSERT statement are not allowed on counter tables, use UPDATE instead"); - } - - if (_column_names.size() != _column_values.size()) { - throw exceptions::invalid_request_exception("Unmatched column names/values"); - } - - if (_column_names.empty()) { - throw exceptions::invalid_request_exception("No columns provided to INSERT"); - } - - std::vector<::shared_ptr> relations; - std::unordered_set column_ids; - for (size_t i = 0; i < _column_names.size(); i++) { - auto&& col = _column_names[i]; - auto id = col->prepare_column_identifier(schema); - auto def = get_column_definition(schema, *id); - if (!def) { - throw exceptions::invalid_request_exception(sprint("Unknown identifier %s", *id)); - } - if (column_ids.count(id->name())) { - throw exceptions::invalid_request_exception(sprint("Multiple definitions found for column %s", *id)); - } - column_ids.emplace(id->name()); - - auto&& value = _column_values[i]; - - if (def->is_primary_key()) { - relations.push_back(::make_shared(col, operator_type::EQ, value)); - } else { - auto operation = operation::set_value(value).prepare(db, keyspace(), *def); - operation->collect_marker_specification(bound_names); - stmt->add_operation(std::move(operation)); - }; - } - stmt->process_where_clause(db, relations, std::move(bound_names)); - return stmt; -} - -update_statement::update_statement( ::shared_ptr name, - ::shared_ptr attrs, - std::vector, ::shared_ptr>> updates, - std::vector where_clause, - conditions_vector conditions) - : raw::modification_statement(std::move(name), std::move(attrs), std::move(conditions), false, false) - , _updates(std::move(updates)) - , _where_clause(std::move(where_clause)) -{ } - -::shared_ptr -update_statement::prepare_internal(database& db, schema_ptr schema, - ::shared_ptr bound_names, std::unique_ptr attrs, cql_stats& stats) -{ - auto stmt = ::make_shared(statement_type::UPDATE, bound_names->size(), schema, std::move(attrs), &stats.updates); - - for (auto&& entry : _updates) { - auto id = entry.first->prepare_column_identifier(schema); - auto def = get_column_definition(schema, *id); - if (!def) { - throw exceptions::invalid_request_exception(sprint("Unknown identifier %s", *entry.first)); - } - - auto operation = entry.second->prepare(db, keyspace(), *def); - operation->collect_marker_specification(bound_names); - - if (def->is_primary_key()) { - throw exceptions::invalid_request_exception(sprint("PRIMARY KEY part %s found in SET part", *entry.first)); - } - stmt->add_operation(std::move(operation)); - } - - stmt->process_where_clause(db, _where_clause, std::move(bound_names)); - return stmt; -} - -} - -} - -} diff --git a/scylla/cql3/statements/update_statement.hh b/scylla/cql3/statements/update_statement.hh deleted file mode 100644 index 31be589..0000000 --- a/scylla/cql3/statements/update_statement.hh +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "cql3/statements/modification_statement.hh" -#include "cql3/statements/raw/modification_statement.hh" -#include "cql3/column_identifier.hh" -#include "cql3/term.hh" - -#include "database_fwd.hh" - -#include -#include "unimplemented.hh" - -namespace cql3 { - -namespace statements { - -/** - * An UPDATE statement parsed from a CQL query statement. - * - */ -class update_statement : public modification_statement { -public: -#if 0 - private static final Constants.Value EMPTY = new Constants.Value(ByteBufferUtil.EMPTY_BYTE_BUFFER); -#endif - - update_statement(statement_type type, uint32_t bound_terms, schema_ptr s, std::unique_ptr attrs, uint64_t* cql_stats_counter_ptr); -private: - virtual bool require_full_clustering_key() const override; - - virtual bool allow_clustering_key_slices() const override; - - virtual void add_update_for_key(mutation& m, const query::clustering_range& range, const update_parameters& params) override; -}; - -} - -} diff --git a/scylla/cql3/statements/use_statement.cc b/scylla/cql3/statements/use_statement.cc deleted file mode 100644 index a634247..0000000 --- a/scylla/cql3/statements/use_statement.cc +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "cql3/statements/use_statement.hh" -#include "cql3/statements/raw/use_statement.hh" - -#include "transport/messages/result_message.hh" - -namespace cql3 { - -namespace statements { - -use_statement::use_statement(sstring keyspace) - : _keyspace(keyspace) -{ -} - -uint32_t use_statement::get_bound_terms() -{ - return 0; -} - -namespace raw { - -use_statement::use_statement(sstring keyspace) - : _keyspace(keyspace) -{ -} - -std::unique_ptr use_statement::prepare(database& db, cql_stats& stats) -{ - return std::make_unique(make_shared(_keyspace)); -} - -} - -bool use_statement::uses_function(const sstring& ks_name, const sstring& function_name) const -{ - return false; -} - -bool use_statement::depends_on_keyspace(const sstring& ks_name) const -{ - return false; -} - -bool use_statement::depends_on_column_family(const sstring& cf_name) const -{ - return false; -} - -future<> use_statement::check_access(const service::client_state& state) -{ - state.validate_login(); - return make_ready_future<>(); -} - -void use_statement::validate(distributed&, const service::client_state& state) -{ -} - -future<::shared_ptr> -use_statement::execute(distributed& proxy, service::query_state& state, const query_options& options) { - state.get_client_state().set_keyspace(proxy.local().get_db(), _keyspace); - auto result =::make_shared(_keyspace); - return make_ready_future<::shared_ptr>(result); -} - -future<::shared_ptr> -use_statement::execute_internal(distributed& proxy, service::query_state& state, const query_options& options) { - // Internal queries are exclusively on the system keyspace and 'use' is thus useless - throw std::runtime_error("unsupported operation"); -} - -} - -} diff --git a/scylla/cql3/statements/use_statement.hh b/scylla/cql3/statements/use_statement.hh deleted file mode 100644 index 035be31..0000000 --- a/scylla/cql3/statements/use_statement.hh +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2014 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "transport/messages_fwd.hh" -#include "cql3/cql_statement.hh" -#include "cql3/statements/raw/parsed_statement.hh" -#include "prepared_statement.hh" - -namespace cql3 { - -namespace statements { - -class use_statement : public cql_statement_no_metadata { -private: - const sstring _keyspace; - -public: - use_statement(sstring keyspace); - - virtual uint32_t get_bound_terms() override; - - virtual bool uses_function(const sstring& ks_name, const sstring& function_name) const override; - - virtual bool depends_on_keyspace(const sstring& ks_name) const override; - - virtual bool depends_on_column_family(const sstring& cf_name) const override; - - virtual future<> check_access(const service::client_state& state) override; - - virtual void validate(distributed&, const service::client_state& state) override; - - virtual future<::shared_ptr> - execute(distributed& proxy, service::query_state& state, const query_options& options) override; - - virtual future<::shared_ptr> - execute_internal(distributed& proxy, service::query_state& state, const query_options& options) override; -}; - -} - -} diff --git a/scylla/cql3/stats.hh b/scylla/cql3/stats.hh deleted file mode 100644 index 4d429a9..0000000 --- a/scylla/cql3/stats.hh +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -namespace cql3 { - -struct cql_stats { - uint64_t reads = 0; - uint64_t inserts = 0; - uint64_t updates = 0; - uint64_t deletes = 0; - uint64_t batches = 0; - uint64_t statements_in_batches = 0; - uint64_t batches_pure_logged = 0; - uint64_t batches_pure_unlogged = 0; - uint64_t batches_unlogged_from_logged = 0; -}; - -} diff --git a/scylla/cql3/term.hh b/scylla/cql3/term.hh deleted file mode 100644 index ef7f875..0000000 --- a/scylla/cql3/term.hh +++ /dev/null @@ -1,237 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include -#include "variable_specifications.hh" -#include "cql3/assignment_testable.hh" -#include "cql3/query_options.hh" -#include "cql3/values.hh" -#include "types.hh" - -namespace cql3 { - -class terminal; - -class term; - -/** - * A CQL3 term, i.e. a column value with or without bind variables. - * - * A Term can be either terminal or non terminal. A term object is one that is typed and is obtained - * from a raw term (Term.Raw) by poviding the actual receiver to which the term is supposed to be a - * value of. - */ -class term : public ::enable_shared_from_this { -public: - virtual ~term() {} - - /** - * Collects the column specification for the bind variables in this Term. - * This is obviously a no-op if the term is Terminal. - * - * @param boundNames the variables specification where to collect the - * bind variables of this term in. - */ - virtual void collect_marker_specification(::shared_ptr bound_names) = 0; - - /** - * Bind the values in this term to the values contained in {@code values}. - * This is obviously a no-op if the term is Terminal. - * - * @param options the values to bind markers to. - * @return the result of binding all the variables of this NonTerminal (or - * 'this' if the term is terminal). - */ - virtual ::shared_ptr bind(const query_options& options) = 0; - - /** - * A shorter for bind(values).get(). - * We expose it mainly because for constants it can avoids allocating a temporary - * object between the bind and the get (note that we still want to be able - * to separate bind and get for collections). - */ - virtual cql3::raw_value_view bind_and_get(const query_options& options) = 0; - - /** - * Whether or not that term contains at least one bind marker. - * - * Note that this is slightly different from being or not a NonTerminal, - * because calls to non pure functions will be NonTerminal (see #5616) - * even if they don't have bind markers. - */ - virtual bool contains_bind_marker() const = 0; - - virtual bool uses_function(const sstring& ks_name, const sstring& function_name) const = 0; - - virtual sstring to_string() const { - return sprint("term@%p", static_cast(this)); - } - - friend std::ostream& operator<<(std::ostream& out, const term& t) { - return out << t.to_string(); - } - - /** - * A parsed, non prepared (thus untyped) term. - * - * This can be one of: - * - a constant - * - a collection literal - * - a function call - * - a marker - */ - class raw : public virtual assignment_testable { - public: - /** - * This method validates this RawTerm is valid for provided column - * specification and "prepare" this RawTerm, returning the resulting - * prepared Term. - * - * @param receiver the "column" this RawTerm is supposed to be a value of. Note - * that the ColumnSpecification may not correspond to a real column in the - * case this RawTerm describe a list index or a map key, etc... - * @return the prepared term. - */ - virtual ::shared_ptr prepare(database& db, const sstring& keyspace, ::shared_ptr receiver) = 0; - - virtual sstring to_string() const = 0; - - virtual sstring assignment_testable_source_context() const override { - return to_string(); - } - - friend std::ostream& operator<<(std::ostream& os, const raw& r) { - // FIXME: kill const_cast - return os << const_cast(r).to_string(); - } - }; - - class multi_column_raw : public virtual raw { - public: - virtual ::shared_ptr prepare(database& db, const sstring& keyspace, const std::vector>& receiver) = 0; - }; -}; - -/** - * A terminal term, one that can be reduced to a byte buffer directly. - * - * This includes most terms that don't have a bind marker (an exception - * being delayed call for non pure function that are NonTerminal even - * if they don't have bind markers). - * - * This can be only one of: - * - a constant value - * - a collection value - * - * Note that a terminal term will always have been type checked, and thus - * consumer can (and should) assume so. - */ -class terminal : public term { -public: - virtual void collect_marker_specification(::shared_ptr bound_names) { - } - - virtual ::shared_ptr bind(const query_options& options) override { - return static_pointer_cast(this->shared_from_this()); - } - - virtual bool uses_function(const sstring& ks_name, const sstring& function_name) const override { - return false; - } - - // While some NonTerminal may not have bind markers, no Term can be Terminal - // with a bind marker - virtual bool contains_bind_marker() const override { - return false; - } - - /** - * @return the serialized value of this terminal. - */ - virtual cql3::raw_value get(const query_options& options) = 0; - - virtual cql3::raw_value_view bind_and_get(const query_options& options) override { - return options.make_temporary(get(options)); - } - - virtual sstring to_string() const = 0; -}; - -class multi_item_terminal : public terminal { -public: - virtual std::vector get_elements() = 0; -}; - -class collection_terminal { -public: - virtual ~collection_terminal() {} - /** Gets the value of the collection when serialized with the given protocol version format */ - virtual bytes get_with_protocol_version(cql_serialization_format sf) = 0; -}; - -/** - * A non terminal term, i.e. a term that can only be reduce to a byte buffer - * at execution time. - * - * We have the following type of NonTerminal: - * - marker for a constant value - * - marker for a collection value (list, set, map) - * - a function having bind marker - * - a non pure function (even if it doesn't have bind marker - see #5616) - */ -class non_terminal : public term { -public: - virtual bool uses_function(const sstring& ks_name, const sstring& function_name) const override { - return false; - } - - virtual cql3::raw_value_view bind_and_get(const query_options& options) override { - auto t = bind(options); - if (t) { - return options.make_temporary(t->get(options)); - } - return cql3::raw_value_view::make_null(); - }; -}; - -} diff --git a/scylla/cql3/token_relation.cc b/scylla/cql3/token_relation.cc deleted file mode 100644 index adf5f6d..0000000 --- a/scylla/cql3/token_relation.cc +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "restrictions/token_restriction.hh" -#include "token_relation.hh" -#include "column_identifier.hh" -#include "term.hh" -#include "to_string.hh" - -std::vector cql3::token_relation::get_column_definitions(schema_ptr s) { - std::vector res; - std::transform(_entities.begin(), _entities.end(), std::back_inserter(res), - [this, s](auto& cr) { - return &this->to_column_definition(s, cr); - }); - return res; -} - -std::vector<::shared_ptr> cql3::token_relation::to_receivers( - schema_ptr schema, - const std::vector& column_defs) { - auto pk = schema->partition_key_columns(); - if (!std::equal(column_defs.begin(), column_defs.end(), pk.begin(), - pk.end(), [](auto* c1, auto& c2) { - return c1 == &c2; // same, not "equal". - })) { -#if 0 - checkTrue(columnDefs.containsAll(cfm.partitionKeyColumns()), - "The token() function must be applied to all partition key components or none of them"); - - checkContainsNoDuplicates(columnDefs, "The token() function contains duplicate partition key components"); - - checkContainsOnly(columnDefs, cfm.partitionKeyColumns(), "The token() function must contains only partition key components"); -#endif - throw exceptions::invalid_request_exception( - sprint( - "The token function arguments must be in the partition key order: %s", - std::to_string(column_defs))); - } - //auto* c = column_defs.front(); - return {::make_shared(schema->ks_name(), schema->cf_name(), - ::make_shared("partition key token", true), - dht::global_partitioner().get_token_validator())}; -} - -::shared_ptr cql3::token_relation::new_EQ_restriction( - database& db, schema_ptr schema, - ::shared_ptr bound_names) { - auto column_defs = get_column_definitions(schema); - auto term = to_term(to_receivers(schema, column_defs), _value, db, - schema->ks_name(), bound_names); - return ::make_shared(column_defs, term); -} - -::shared_ptr cql3::token_relation::new_IN_restriction( - database& db, schema_ptr schema, - ::shared_ptr bound_names) { - throw exceptions::invalid_request_exception( - sprint("%s cannot be used with the token function", - get_operator())); -} - -::shared_ptr cql3::token_relation::new_slice_restriction( - database& db, schema_ptr schema, - ::shared_ptr bound_names, - statements::bound bound, - bool inclusive) { - auto column_defs = get_column_definitions(schema); - auto term = to_term(to_receivers(schema, column_defs), _value, db, - schema->ks_name(), bound_names); - return ::make_shared(column_defs, - bound, inclusive, term); -} - -::shared_ptr cql3::token_relation::new_contains_restriction( - database& db, schema_ptr schema, - ::shared_ptr bound_names, bool isKey) { - throw exceptions::invalid_request_exception( - sprint("%s cannot be used with the token function", - get_operator())); -} - -sstring cql3::token_relation::to_string() const { - return sprint("token(%s) %s %s", join(", ", _entities), get_operator(), _value); -} - -::shared_ptr cql3::token_relation::to_term( - const std::vector<::shared_ptr>& receivers, - ::shared_ptr raw, database& db, const sstring& keyspace, - ::shared_ptr bound_names) { - auto term = raw->prepare(db, keyspace, receivers.front()); - term->collect_marker_specification(bound_names); - return term; -} - -::shared_ptr cql3::token_relation::maybe_rename_identifier(const cql3::column_identifier::raw& from, cql3::column_identifier::raw to) { - auto new_entities = boost::copy_range(_entities | boost::adaptors::transformed([&] (auto&& entity) { - return *entity == from ? ::make_shared(to) : entity; - })); - return ::make_shared(std::move(new_entities), _relation_type, _value); -} diff --git a/scylla/cql3/token_relation.hh b/scylla/cql3/token_relation.hh deleted file mode 100644 index 28e9848..0000000 --- a/scylla/cql3/token_relation.hh +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include - -#include "core/shared_ptr.hh" -#include "to_string.hh" - -#include "relation.hh" -#include "column_identifier.hh" -#include "term.hh" -#include "restrictions/restriction.hh" - -namespace cql3 { - -/** - * A relation using the token function. - * Examples: - *
    - *
  • SELECT ... WHERE token(a) > token(1)
  • - *
  • SELECT ... WHERE token(a, b) > token(1, 3)
  • - *
- */ -class token_relation : public relation { -private: - std::vector<::shared_ptr> _entities; - ::shared_ptr _value; - - /** - * Returns the definition of the columns to which apply the token restriction. - * - * @param cfm the column family metadata - * @return the definition of the columns to which apply the token restriction. - * @throws InvalidRequestException if the entity cannot be resolved - */ - std::vector get_column_definitions(schema_ptr s); - - /** - * Returns the receivers for this relation. - * - * @param cfm the Column Family meta data - * @param columnDefs the column definitions - * @return the receivers for the specified relation. - * @throws InvalidRequestException if the relation is invalid - */ - std::vector<::shared_ptr> to_receivers(schema_ptr schema, const std::vector& column_defs); - -public: - token_relation(std::vector<::shared_ptr> entities, - const operator_type& type, ::shared_ptr value) - : relation(type), _entities(std::move(entities)), _value( - std::move(value)) { - } - - bool on_token() const override { - return true; - } - - ::shared_ptr new_EQ_restriction(database& db, - schema_ptr schema, - ::shared_ptr bound_names) override; - - ::shared_ptr new_IN_restriction(database& db, - schema_ptr schema, - ::shared_ptr bound_names) override; - - ::shared_ptr new_slice_restriction(database& db, - schema_ptr schema, - ::shared_ptr bound_names, - statements::bound bound, - bool inclusive) override; - - ::shared_ptr new_contains_restriction( - database& db, schema_ptr schema, - ::shared_ptr bound_names, bool isKey) - override; - - ::shared_ptr maybe_rename_identifier(const column_identifier::raw& from, column_identifier::raw to) override; - - sstring to_string() const override; - -protected: - ::shared_ptr to_term(const std::vector<::shared_ptr>& receivers, - ::shared_ptr raw, - database& db, - const sstring& keyspace, - ::shared_ptr bound_names) override; -}; - -} diff --git a/scylla/cql3/tuples.hh b/scylla/cql3/tuples.hh deleted file mode 100644 index c395558..0000000 --- a/scylla/cql3/tuples.hh +++ /dev/null @@ -1,430 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "term.hh" -#include "abstract_marker.hh" - -namespace cql3 { - -/** - * Static helper methods and classes for tuples. - */ -class tuples { -public: - static shared_ptr component_spec_of(shared_ptr column, size_t component) { - return ::make_shared( - column->ks_name, - column->cf_name, - ::make_shared(sprint("%s[%d]", column->name, component), true), - static_pointer_cast(column->type)->type(component)); - } - - /** - * A raw, literal tuple. When prepared, this will become a Tuples.Value or Tuples.DelayedValue, depending - * on whether the tuple holds NonTerminals. - */ - class literal : public term::multi_column_raw { - std::vector> _elements; - public: - literal(std::vector> elements) - : _elements(std::move(elements)) { - } - virtual shared_ptr prepare(database& db, const sstring& keyspace, shared_ptr receiver) override { - validate_assignable_to(db, keyspace, receiver); - std::vector> values; - bool all_terminal = true; - for (size_t i = 0; i < _elements.size(); ++i) { - auto&& value = _elements[i]->prepare(db, keyspace, component_spec_of(receiver, i)); - if (dynamic_pointer_cast(value)) { - all_terminal = false; - } - values.push_back(std::move(value)); - } - delayed_value value(static_pointer_cast(receiver->type), values); - if (all_terminal) { - return value.bind(query_options::DEFAULT); - } else { - return make_shared(std::move(value)); - } - } - - virtual shared_ptr prepare(database& db, const sstring& keyspace, const std::vector>& receivers) override { - if (_elements.size() != receivers.size()) { - throw exceptions::invalid_request_exception(sprint("Expected %d elements in value tuple, but got %d: %s", receivers.size(), _elements.size(), *this)); - } - - std::vector> values; - std::vector types; - bool all_terminal = true; - for (size_t i = 0; i < _elements.size(); ++i) { - auto&& t = _elements[i]->prepare(db, keyspace, receivers[i]); - if (dynamic_pointer_cast(t)) { - all_terminal = false; - } - values.push_back(t); - types.push_back(receivers[i]->type); - } - delayed_value value(tuple_type_impl::get_instance(std::move(types)), std::move(values)); - if (all_terminal) { - return value.bind(query_options::DEFAULT); - } else { - return make_shared(std::move(value)); - } - } - - private: - void validate_assignable_to(database& db, const sstring& keyspace, shared_ptr receiver) { - auto tt = dynamic_pointer_cast(receiver->type); - if (!tt) { - throw exceptions::invalid_request_exception(sprint("Invalid tuple type literal for %s of type %s", receiver->name, receiver->type->as_cql3_type())); - } - for (size_t i = 0; i < _elements.size(); ++i) { - if (i >= tt->size()) { - throw exceptions::invalid_request_exception(sprint("Invalid tuple literal for %s: too many elements. Type %s expects %d but got %d", - receiver->name, tt->as_cql3_type(), tt->size(), _elements.size())); - } - - auto&& value = _elements[i]; - auto&& spec = component_spec_of(receiver, i); - if (!assignment_testable::is_assignable(value->test_assignment(db, keyspace, spec))) { - throw exceptions::invalid_request_exception(sprint("Invalid tuple literal for %s: component %d is not of type %s", receiver->name, i, spec->type->as_cql3_type())); - } - } - } - public: - virtual assignment_testable::test_result test_assignment(database& db, const sstring& keyspace, shared_ptr receiver) override { - try { - validate_assignable_to(db, keyspace, receiver); - return assignment_testable::test_result::WEAKLY_ASSIGNABLE; - } catch (exceptions::invalid_request_exception e) { - return assignment_testable::test_result::NOT_ASSIGNABLE; - } - } - - virtual sstring to_string() const override { - return tuple_to_string(_elements); - } - }; - - /** - * A tuple of terminal values (e.g (123, 'abc')). - */ - class value : public multi_item_terminal { - public: - std::vector _elements; - public: - value(std::vector elements) - : _elements(std::move(elements)) { - } - value(std::vector elements) { - for (auto&& e : elements) { - _elements.push_back(e ? bytes_opt(bytes(e->begin(), e->size())) : bytes_opt()); - } - } - static value from_serialized(bytes_view buffer, tuple_type type) { - return value(type->split(buffer)); - } - virtual cql3::raw_value get(const query_options& options) override { - return cql3::raw_value::make_value(tuple_type_impl::build_value(_elements)); - } - - virtual std::vector get_elements() override { - return _elements; - } - virtual sstring to_string() const override { - return sprint("(%s)", join(", ", _elements)); - } - }; - - /** - * Similar to Value, but contains at least one NonTerminal, such as a non-pure functions or bind marker. - */ - class delayed_value : public non_terminal { - tuple_type _type; - std::vector> _elements; - public: - delayed_value(tuple_type type, std::vector> elements) - : _type(std::move(type)), _elements(std::move(elements)) { - } - - virtual bool contains_bind_marker() const override { - return std::all_of(_elements.begin(), _elements.end(), std::mem_fn(&term::contains_bind_marker)); - } - - virtual void collect_marker_specification(shared_ptr bound_names) override { - for (auto&& term : _elements) { - term->collect_marker_specification(bound_names); - } - } - private: - std::vector bind_internal(const query_options& options) { - std::vector buffers; - buffers.resize(_elements.size()); - for (size_t i = 0; i < _elements.size(); ++i) { - const auto& value = _elements[i]->bind_and_get(options); - if (value.is_unset_value()) { - throw exceptions::invalid_request_exception(sprint("Invalid unset value for tuple field number %d", i)); - } - buffers[i] = to_bytes_opt(value); - // Inside tuples, we must force the serialization of collections to v3 whatever protocol - // version is in use since we're going to store directly that serialized value. - if (options.get_cql_serialization_format() != cql_serialization_format::internal() - && _type->type(i)->is_collection()) { - if (buffers[i]) { - buffers[i] = static_pointer_cast(_type->type(i))->reserialize( - options.get_cql_serialization_format(), - cql_serialization_format::internal(), - bytes_view(*buffers[i])); - } - } - } - return buffers; - } - - public: - virtual shared_ptr bind(const query_options& options) override { - return ::make_shared(bind_internal(options)); - } - - virtual cql3::raw_value_view bind_and_get(const query_options& options) override { - // We don't "need" that override but it saves us the allocation of a Value object if used - return options.make_temporary(cql3::raw_value::make_value(_type->build_value(bind_internal(options)))); - } - }; - - /** - * A terminal value for a list of IN values that are tuples. For example: "SELECT ... WHERE (a, b, c) IN ?" - * This is similar to Lists.Value, but allows us to keep components of the tuples in the list separate. - */ - class in_value : public terminal { - private: - std::vector> _elements; - public: - in_value(std::vector> items) : _elements(std::move(items)) { } - in_value(std::vector> items) { - _elements.reserve(items.size()); - for (auto&& tuple : items) { - std::vector elems; - elems.reserve(tuple.size()); - for (auto&& e : tuple) { - elems.emplace_back(e ? bytes_opt(bytes(e->begin(), e->end())) : bytes_opt()); - } - _elements.emplace_back(std::move(elems)); - } - } - - static in_value from_serialized(bytes_view value, list_type type, const query_options& options) { - try { - // Collections have this small hack that validate cannot be called on a serialized object, - // but the deserialization does the validation (so we're fine). - auto l = value_cast(type->deserialize(value, options.get_cql_serialization_format())); - auto ttype = dynamic_pointer_cast(type->get_elements_type()); - assert(ttype); - - std::vector> elements; - elements.reserve(l.size()); - for (auto&& element : l) { - elements.emplace_back(ttype->split(ttype->decompose(element))); - } - return in_value(elements); - } catch (marshal_exception& e) { - throw exceptions::invalid_request_exception(e.what()); - } - } - - virtual cql3::raw_value get(const query_options& options) override { - throw exceptions::unsupported_operation_exception(); - } - - std::vector> get_split_values() const { - return _elements; - } - - virtual sstring to_string() const override { - std::vector tuples(_elements.size()); - std::transform(_elements.begin(), _elements.end(), tuples.begin(), &tuples::tuple_to_string); - return tuple_to_string(tuples); - } - }; - - /** - * A raw placeholder for a tuple of values for different multiple columns, each of which may have a different type. - * For example, "SELECT ... WHERE (col1, col2) > ?". - */ - class raw : public abstract_marker::raw, public term::multi_column_raw { - public: - using abstract_marker::raw::raw; - - virtual ::shared_ptr prepare(database& db, const sstring& keyspace, const std::vector>& receivers) override { - return make_shared(_bind_index, make_receiver(receivers)); - } - - virtual ::shared_ptr prepare(database& db, const sstring& keyspace, ::shared_ptr receiver) override { - throw std::runtime_error("Tuples.Raw.prepare() requires a list of receivers"); - } - - virtual sstring assignment_testable_source_context() const override { - return abstract_marker::raw::to_string(); - } - - virtual sstring to_string() const override { - return abstract_marker::raw::to_string(); - } - private: - static ::shared_ptr make_receiver(const std::vector>& receivers) { - std::vector types; - types.reserve(receivers.size()); - sstring in_name = "("; - for (auto&& receiver : receivers) { - in_name += receiver->name->text(); - if (receiver != receivers.back()) { - in_name += ","; - } - types.push_back(receiver->type); - } - in_name += ")"; - - auto identifier = make_shared(in_name, true); - auto type = tuple_type_impl::get_instance(types); - return make_shared(receivers.front()->ks_name, receivers.front()->cf_name, identifier, type); - } - }; - - /** - * A raw marker for an IN list of tuples, like "SELECT ... WHERE (a, b, c) IN ?" - */ - class in_raw : public abstract_marker::raw, public term::multi_column_raw { - public: - using abstract_marker::raw::raw; - - virtual ::shared_ptr prepare(database& db, const sstring& keyspace, const std::vector>& receivers) override { - return make_shared(_bind_index, make_in_receiver(receivers)); - } - - virtual ::shared_ptr prepare(database& db, const sstring& keyspace, ::shared_ptr receiver) override { - throw std::runtime_error("Tuples.INRaw.prepare() requires a list of receivers"); - } - - virtual sstring assignment_testable_source_context() const override { - return to_string(); - } - - virtual sstring to_string() const override { - return abstract_marker::raw::to_string(); - } - private: - static ::shared_ptr make_in_receiver(const std::vector>& receivers) { - std::vector types; - types.reserve(receivers.size()); - sstring in_name = "in("; - for (auto&& receiver : receivers) { - in_name += receiver->name->text(); - if (receiver != receivers.back()) { - in_name += ","; - } - - if (receiver->type->is_collection() && receiver->type->is_multi_cell()) { - throw exceptions::invalid_request_exception("Non-frozen collection columns do not support IN relations"); - } - - types.emplace_back(receiver->type); - } - in_name += ")"; - - auto identifier = make_shared(in_name, true); - auto type = tuple_type_impl::get_instance(types); - return make_shared(receivers.front()->ks_name, receivers.front()->cf_name, identifier, list_type_impl::get_instance(type, false)); - } - }; - - /** - * Represents a marker for a single tuple, like "SELECT ... WHERE (a, b, c) > ?" - */ - class marker : public abstract_marker { - public: - marker(int32_t bind_index, ::shared_ptr receiver) - : abstract_marker(bind_index, std::move(receiver)) - { } - - virtual shared_ptr bind(const query_options& options) override { - const auto& value = options.get_value_at(_bind_index); - if (value.is_null()) { - return nullptr; - } else if (value.is_unset_value()) { - throw exceptions::invalid_request_exception(sprint("Invalid unset value for tuple %s", _receiver->name->text())); - } else { - auto as_tuple_type = static_pointer_cast(_receiver->type); - return make_shared(value::from_serialized(*value, as_tuple_type)); - } - } - }; - - /** - * Represents a marker for a set of IN values that are tuples, like "SELECT ... WHERE (a, b, c) IN ?" - */ - class in_marker : public abstract_marker { - public: - in_marker(int32_t bind_index, ::shared_ptr receiver) - : abstract_marker(bind_index, std::move(receiver)) - { - assert(dynamic_pointer_cast(receiver->type)); - } - - virtual shared_ptr bind(const query_options& options) override { - const auto& value = options.get_value_at(_bind_index); - if (value.is_null()) { - return nullptr; - } else if (value.is_unset_value()) { - throw exceptions::invalid_request_exception(sprint("Invalid unset value for tuple %s", _receiver->name->text())); - } else { - auto as_list_type = static_pointer_cast(_receiver->type); - return make_shared(in_value::from_serialized(*value, as_list_type, options)); - } - } - }; - - template - static sstring tuple_to_string(const std::vector& items) { - return sprint("(%s)", join(", ", items)); - } -}; - -} diff --git a/scylla/cql3/type_cast.hh b/scylla/cql3/type_cast.hh deleted file mode 100644 index 88a85fe..0000000 --- a/scylla/cql3/type_cast.hh +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "term.hh" -#include "cql3_type.hh" - -namespace cql3 { - -class type_cast : public term::raw { - shared_ptr _type; - shared_ptr _term; -public: - type_cast(shared_ptr type, shared_ptr term) - : _type(std::move(type)), _term(std::move(term)) { - } - - virtual shared_ptr prepare(database& db, const sstring& keyspace, shared_ptr receiver) override { - if (!is_assignable(_term->test_assignment(db, keyspace, casted_spec_of(db, keyspace, receiver)))) { - throw exceptions::invalid_request_exception(sprint("Cannot cast value %s to type %s", _term, _type)); - } - if (!is_assignable(test_assignment(db, keyspace, receiver))) { - throw exceptions::invalid_request_exception(sprint("Cannot assign value %s to %s of type %s", *this, receiver->name, receiver->type->as_cql3_type())); - } - return _term->prepare(db, keyspace, receiver); - } -private: - shared_ptr casted_spec_of(database& db, const sstring& keyspace, shared_ptr receiver) { - return make_shared(receiver->ks_name, receiver->cf_name, - make_shared(to_string(), true), _type->prepare(db, keyspace)->get_type()); - } -public: - virtual assignment_testable::test_result test_assignment(database& db, const sstring& keyspace, shared_ptr receiver) override { - try { - auto&& casted_type = _type->prepare(db, keyspace)->get_type(); - if (receiver->type->equals(casted_type)) { - return assignment_testable::test_result::EXACT_MATCH; - } else if (receiver->type->is_value_compatible_with(*casted_type)) { - return assignment_testable::test_result::WEAKLY_ASSIGNABLE; - } else { - return assignment_testable::test_result::NOT_ASSIGNABLE; - } - } catch (exceptions::invalid_request_exception& e) { - abort(); - } - } - - virtual sstring to_string() const override { - return sprint("(%s)%s", _type, _term); - } -}; - -} diff --git a/scylla/cql3/untyped_result_set.cc b/scylla/cql3/untyped_result_set.cc deleted file mode 100644 index c417bce..0000000 --- a/scylla/cql3/untyped_result_set.cc +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ -#include -#include -#include -#include -#include "untyped_result_set.hh" -#include "result_set.hh" -#include "transport/messages/result_message.hh" - -cql3::untyped_result_set::row::row(const std::unordered_map& data) - : _data(data) -{} - -cql3::untyped_result_set::row::row(const std::vector<::shared_ptr>& columns, std::vector data) -: _columns(columns) -, _data([&columns, data = std::move(data)] () mutable { - std::unordered_map tmp; - std::transform(columns.begin(), columns.end(), data.begin(), std::inserter(tmp, tmp.end()), [](::shared_ptr c, bytes_opt& d) { - return std::make_pair(c->name->to_string(), std::move(d)); - }); - return tmp; -}()) -{} - -bool cql3::untyped_result_set::row::has(const sstring& name) const { - auto i = _data.find(name); - return i != _data.end() && i->second; -} - -using cql_transport::messages::result_message; - -cql3::untyped_result_set::untyped_result_set(::shared_ptr msg) - : _rows([msg]{ - class visitor : public result_message::visitor_base { - public: - rows_type rows; - void visit(const result_message::rows& rmrs) override { - auto& rs = rmrs.rs(); - auto& cn = rs.get_metadata().get_names(); - for (auto& r : rs.rows()) { - rows.emplace_back(cn, r); - } - } - }; - visitor v; - if (msg != nullptr) { - msg->accept(v); - } - return std::move(v.rows); -}()) -{} - -const cql3::untyped_result_set::row& cql3::untyped_result_set::one() const { - if (_rows.size() != 1) { - throw std::runtime_error("One row required, " + std::to_string(_rows.size()) + " found"); - } - return at(0); -} diff --git a/scylla/cql3/untyped_result_set.hh b/scylla/cql3/untyped_result_set.hh deleted file mode 100644 index 7678947..0000000 --- a/scylla/cql3/untyped_result_set.hh +++ /dev/null @@ -1,179 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ -#include -#include -#include "bytes.hh" -#include "types.hh" -#include "transport/messages/result_message_base.hh" - -#pragma once - -namespace cql3 { - -class untyped_result_set { -public: - class row { - private: - const std::vector<::shared_ptr> _columns; - const std::unordered_map _data; - public: - row(const std::unordered_map&); - row(const std::vector<::shared_ptr>&, std::vector); - row(row&&) = default; - row(const row&) = delete; - - bool has(const sstring&) const; - bytes get_blob(const sstring& name) const { - return *_data.at(name); - } - template - T get_as(const sstring& name) const { - return value_cast(data_type_for()->deserialize(get_blob(name))); - } - template - std::experimental::optional get_opt(const sstring& name) const { - return has(name) ? get_as(name) : std::experimental::optional{}; - } - template - T get_or(const sstring& name, T t) const { - return has(name) ? get_as(name) : t; - } - // this could maybe be done as an overload of get_as (or something), but that just - // muddles things for no real gain. Let user (us) attempt to know what he is doing instead. - template - void get_map_data(const sstring& name, Iter out, data_type keytype = - data_type_for(), data_type valtype = - data_type_for()) const { - auto vec = - value_cast( - map_type_impl::get_instance(keytype, valtype, false)->deserialize( - get_blob(name))); - std::transform(vec.begin(), vec.end(), out, - [](auto& p) { - return std::pair(value_cast(p.first), value_cast(p.second)); - }); - } - template - std::unordered_map get_map(const sstring& name, - data_type keytype = data_type_for(), data_type valtype = - data_type_for()) const { - std::unordered_map res; - get_map_data(name, std::inserter(res, res.end()), keytype, valtype); - return res; - } - template - void get_list_data(const sstring& name, Iter out, data_type valtype = data_type_for()) const { - auto vec = - value_cast( - list_type_impl::get_instance(valtype, false)->deserialize( - get_blob(name))); - std::transform(vec.begin(), vec.end(), out, [](auto& v) { return value_cast(v); }); - } - template - std::vector get_list(const sstring& name, data_type valtype = data_type_for()) const { - std::vector res; - get_list_data(name, std::back_inserter(res), valtype); - return res; - } - template - void get_set_data(const sstring& name, Iter out, data_type valtype = - data_type_for()) const { - auto vec = - value_cast( - set_type_impl::get_instance(valtype, - false)->deserialize( - get_blob(name))); - std::transform(vec.begin(), vec.end(), out, [](auto& p) { - return value_cast(p); - }); - } - template - std::unordered_set get_set(const sstring& name, - data_type valtype = - data_type_for()) const { - std::unordered_set res; - get_set_data(name, std::inserter(res, res.end()), valtype); - return res; - } - const std::vector<::shared_ptr>& get_columns() const { - return _columns; - } - }; - - typedef std::vector rows_type; - using const_iterator = rows_type::const_iterator; - - untyped_result_set(::shared_ptr); - untyped_result_set(untyped_result_set&&) = default; - - const_iterator begin() const { - return _rows.begin(); - } - const_iterator end() const { - return _rows.end(); - } - size_t size() const { - return _rows.size(); - } - bool empty() const { - return _rows.empty(); - } - const row& one() const; - const row& at(size_t i) const { - return _rows.at(i); - } - const row& front() const { - return _rows.front(); - } - const row& back() const { - return _rows.back(); - } -private: - rows_type _rows; - untyped_result_set() = default; -public: - static untyped_result_set make_empty() { - return untyped_result_set(); - } -}; - -} diff --git a/scylla/cql3/update_parameters.cc b/scylla/cql3/update_parameters.cc deleted file mode 100644 index 1a0cf16..0000000 --- a/scylla/cql3/update_parameters.cc +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "cql3/update_parameters.hh" - -namespace cql3 { - -const update_parameters::prefetch_data::cell_list* -update_parameters::get_prefetched_list( - partition_key_view pkey, - clustering_key_view ckey, - const column_definition& column) const -{ - if (!_prefetched) { - return {}; - } - - auto i = _prefetched->rows.find(std::make_pair(std::move(pkey), std::move(ckey))); - if (i == _prefetched->rows.end()) { - return {}; - } - - auto&& row = i->second; - auto j = row.find(column.id); - if (j == row.end()) { - return {}; - } - return &j->second; -} - -update_parameters::prefetch_data::prefetch_data(schema_ptr schema) - : rows(8, key_hashing(*schema), key_equality(*schema)) - , schema(schema) -{ } - -} diff --git a/scylla/cql3/update_parameters.hh b/scylla/cql3/update_parameters.hh deleted file mode 100644 index b52fcf5..0000000 --- a/scylla/cql3/update_parameters.hh +++ /dev/null @@ -1,206 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "gc_clock.hh" -#include "timestamp.hh" -#include "schema.hh" -#include "atomic_cell.hh" -#include "tombstone.hh" -#include "exceptions/exceptions.hh" -#include "cql3/query_options.hh" - -#include - -namespace cql3 { - -/** - * A simple container that simplify passing parameters for collections methods. - */ -class update_parameters final { -public: - // Holder for data needed by CQL list updates which depend on current state of the list. - struct prefetch_data { - using key = std::pair; - using key_view = std::pair; - struct key_hashing { - partition_key::hashing pk_hash; - clustering_key::hashing ck_hash; - - key_hashing(const schema& s) - : pk_hash(s) - , ck_hash(s) - { } - - size_t operator()(const key& k) const { - return pk_hash(k.first) ^ ck_hash(k.second); - } - - size_t operator()(const key_view& k) const { - return pk_hash(k.first) ^ ck_hash(k.second); - } - }; - struct key_equality { - partition_key::equality pk_eq; - clustering_key::equality ck_eq; - - key_equality(const schema& s) - : pk_eq(s) - , ck_eq(s) - { } - - bool operator()(const key& k1, const key& k2) const { - return pk_eq(k1.first, k2.first) && ck_eq(k1.second, k2.second); - } - bool operator()(const key_view& k1, const key& k2) const { - return pk_eq(k1.first, k2.first) && ck_eq(k1.second, k2.second); - } - bool operator()(const key& k1, const key_view& k2) const { - return pk_eq(k1.first, k2.first) && ck_eq(k1.second, k2.second); - } - }; - struct cell { - bytes key; - bytes value; - }; - using cell_list = std::vector; - using row = std::unordered_map; - public: - std::unordered_map rows; - schema_ptr schema; - public: - prefetch_data(schema_ptr schema); - }; - // Note: value (mutation) only required to contain the rows we are interested in - using prefetched_rows_type = std::experimental::optional; -private: - const gc_clock::duration _ttl; - const prefetched_rows_type _prefetched; // For operation that require a read-before-write -public: - const api::timestamp_type _timestamp; - const gc_clock::time_point _local_deletion_time; - const schema_ptr _schema; - const query_options& _options; - - update_parameters(const schema_ptr schema_, const query_options& options, - api::timestamp_type timestamp, gc_clock::duration ttl, prefetched_rows_type prefetched) - : _ttl(ttl) - , _prefetched(std::move(prefetched)) - , _timestamp(timestamp) - , _local_deletion_time(gc_clock::now()) - , _schema(std::move(schema_)) - , _options(options) - { - // We use MIN_VALUE internally to mean the absence of of timestamp (in Selection, in sstable stats, ...), so exclude - // it to avoid potential confusion. - if (timestamp < api::min_timestamp || timestamp > api::max_timestamp) { - throw exceptions::invalid_request_exception(sprint("Out of bound timestamp, must be in [%d, %d]", - api::min_timestamp, api::max_timestamp)); - } - } - - atomic_cell make_dead_cell() const { - return atomic_cell::make_dead(_timestamp, _local_deletion_time); - } - - atomic_cell make_cell(bytes_view value) const { - auto ttl = _ttl; - - if (ttl.count() <= 0) { - ttl = _schema->default_time_to_live(); - } - - if (ttl.count() > 0) { - return atomic_cell::make_live(_timestamp, value, _local_deletion_time + ttl, ttl); - } else { - return atomic_cell::make_live(_timestamp, value); - } - }; - - atomic_cell make_counter_update_cell(int64_t delta) const { - return atomic_cell::make_live_counter_update(_timestamp, delta); - } - - tombstone make_tombstone() const { - return {_timestamp, _local_deletion_time}; - } - - tombstone make_tombstone_just_before() const { - return {_timestamp - 1, _local_deletion_time}; - } - -#if 0 - public RangeTombstone makeRangeTombstone(ColumnSlice slice) throws InvalidRequestException - { - QueryProcessor.validateComposite(slice.start, metadata.comparator); - QueryProcessor.validateComposite(slice.finish, metadata.comparator); - return new RangeTombstone(slice.start, slice.finish, timestamp, localDeletionTime); - } - - public RangeTombstone makeTombstoneForOverwrite(ColumnSlice slice) throws InvalidRequestException - { - QueryProcessor.validateComposite(slice.start, metadata.comparator); - QueryProcessor.validateComposite(slice.finish, metadata.comparator); - return new RangeTombstone(slice.start, slice.finish, timestamp - 1, localDeletionTime); - } -#endif - - gc_clock::duration ttl() const { - return _ttl.count() > 0 ? _ttl : _schema->default_time_to_live(); - } - - gc_clock::time_point expiry() const { - return ttl() + _local_deletion_time; - } - - api::timestamp_type timestamp() const { - return _timestamp; - } - - const prefetch_data::cell_list* - get_prefetched_list( - partition_key_view pkey, - clustering_key_view ckey, - const column_definition& column) const; -}; - -} diff --git a/scylla/cql3/user_options.cc b/scylla/cql3/user_options.cc deleted file mode 100644 index 57b8735..0000000 --- a/scylla/cql3/user_options.cc +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - * Modified by ScyllaDB - * - * Copyright 2016 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include - -#include - -#include "auth/authenticator.hh" -#include "user_options.hh" - -void cql3::user_options::put(const sstring& name, const sstring& value) { - _options[auth::authenticator::string_to_option(name)] = value; -} - -void cql3::user_options::validate() const { - auto& a = auth::authenticator::get(); - for (auto o : _options | boost::adaptors::map_keys) { - if (!a.supported_options().contains(o)) { - throw exceptions::invalid_request_exception( - sprint("%s doesn't support %s option", - a.class_name(), - a.option_to_string(o))); - } - } -} - diff --git a/scylla/cql3/user_options.hh b/scylla/cql3/user_options.hh deleted file mode 100644 index eecf102..0000000 --- a/scylla/cql3/user_options.hh +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - * Modified by ScyllaDB - * - * Copyright 2016 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "auth/authenticator.hh" - -namespace cql3 { - -class user_options { -private: - auth::authenticator::option_map _options; -public: - void put(const sstring&, const sstring&); - - bool empty() const { - return _options.empty(); - } - const auth::authenticator::option_map& options() const { - return _options; - } - void validate() const; -}; - -} diff --git a/scylla/cql3/user_types.cc b/scylla/cql3/user_types.cc deleted file mode 100644 index f66507b..0000000 --- a/scylla/cql3/user_types.cc +++ /dev/null @@ -1,191 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - * Modified by ScyllaDB - * - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "cql3/user_types.hh" - -#include "cql3/cql3_type.hh" - -#include -#include -#include - -namespace cql3 { - -shared_ptr user_types::field_spec_of(shared_ptr column, size_t field) { - auto&& ut = static_pointer_cast(column->type); - auto&& name = ut->field_name(field); - auto&& sname = sstring(reinterpret_cast(name.data()), name.size()); - return make_shared( - column->ks_name, - column->cf_name, - make_shared(column->name->to_string() + "." + sname, true), - ut->field_type(field)); -} - -user_types::literal::literal(elements_map_type entries) - : _entries(std::move(entries)) { -} - -shared_ptr user_types::literal::prepare(database& db, const sstring& keyspace, shared_ptr receiver) { - validate_assignable_to(db, keyspace, receiver); - auto&& ut = static_pointer_cast(receiver->type); - bool all_terminal = true; - std::vector> values; - values.reserve(_entries.size()); - size_t found_values = 0; - for (size_t i = 0; i < ut->size(); ++i) { - auto&& field = column_identifier(to_bytes(ut->field_name(i)), utf8_type); - auto iraw = _entries.find(field); - shared_ptr raw; - if (iraw == _entries.end()) { - raw = cql3::constants::NULL_LITERAL; - } else { - raw = iraw->second; - ++found_values; - } - auto&& value = raw->prepare(db, keyspace, field_spec_of(receiver, i)); - - if (dynamic_cast(value.get())) { - all_terminal = false; - } - - values.push_back(std::move(value)); - } - if (found_values != _entries.size()) { - // We had some field that are not part of the type - for (auto&& id_val : _entries) { - auto&& id = id_val.first; - if (!boost::range::count(ut->field_names(), id.bytes_)) { - throw exceptions::invalid_request_exception(sprint("Unknown field '%s' in value of user defined type %s", id, ut->get_name_as_string())); - } - } - } - - delayed_value value(ut, values); - if (all_terminal) { - return value.bind(query_options::DEFAULT); - } else { - return make_shared(std::move(value)); - } -} - -void user_types::literal::validate_assignable_to(database& db, const sstring& keyspace, shared_ptr receiver) { - auto&& ut = dynamic_pointer_cast(receiver->type); - if (!ut) { - throw exceptions::invalid_request_exception(sprint("Invalid user type literal for %s of type %s", receiver->name, receiver->type->as_cql3_type())); - } - - for (size_t i = 0; i < ut->size(); i++) { - column_identifier field(to_bytes(ut->field_name(i)), utf8_type); - if (_entries.count(field) == 0) { - continue; - } - shared_ptr value = _entries[field]; - auto&& field_spec = field_spec_of(receiver, i); - if (!assignment_testable::is_assignable(value->test_assignment(db, keyspace, field_spec))) { - throw exceptions::invalid_request_exception(sprint("Invalid user type literal for %s: field %s is not of type %s", receiver->name, field, field_spec->type->as_cql3_type())); - } - } -} - -assignment_testable::test_result user_types::literal::test_assignment(database& db, const sstring& keyspace, shared_ptr receiver) { - try { - validate_assignable_to(db, keyspace, receiver); - return assignment_testable::test_result::WEAKLY_ASSIGNABLE; - } catch (exceptions::invalid_request_exception& e) { - return assignment_testable::test_result::NOT_ASSIGNABLE; - } -} - -sstring user_types::literal::assignment_testable_source_context() const { - return to_string(); -} - -sstring user_types::literal::to_string() const { - auto kv_to_str = [] (auto&& kv) { return sprint("%s:%s", kv.first, kv.second); }; - return sprint("{%s}", ::join(", ", _entries | boost::adaptors::transformed(kv_to_str))); -} - -user_types::delayed_value::delayed_value(user_type type, std::vector> values) - : _type(std::move(type)), _values(std::move(values)) { -} -bool user_types::delayed_value::uses_function(const sstring& ks_name, const sstring& function_name) const { - return boost::algorithm::any_of(_values, - std::bind(&term::uses_function, std::placeholders::_1, std::cref(ks_name), std::cref(function_name))); -} -bool user_types::delayed_value::contains_bind_marker() const { - return boost::algorithm::any_of(_values, std::mem_fn(&term::contains_bind_marker)); -} - -void user_types::delayed_value::collect_marker_specification(shared_ptr bound_names) { - for (auto&& v : _values) { - v->collect_marker_specification(bound_names); - } -} - -std::vector user_types::delayed_value::bind_internal(const query_options& options) { - auto sf = options.get_cql_serialization_format(); - std::vector buffers; - for (size_t i = 0; i < _type->size(); ++i) { - const auto& value = _values[i]->bind_and_get(options); - if (!_type->is_multi_cell() && value.is_unset_value()) { - throw exceptions::invalid_request_exception(sprint("Invalid unset value for field '%s' of user defined type %s", _type->field_name_as_string(i), _type->get_name_as_string())); - } - buffers.push_back(cql3::raw_value::make_value(value)); - // Inside UDT values, we must force the serialization of collections to v3 whatever protocol - // version is in use since we're going to store directly that serialized value. - if (!sf.collection_format_unchanged() && _type->field_type(i)->is_collection() && buffers.back()) { - auto&& ctype = static_pointer_cast(_type->field_type(i)); - buffers.back() = cql3::raw_value::make_value( - ctype->reserialize(sf, cql_serialization_format::latest(), bytes_view(*buffers.back()))); - } - } - return buffers; -} - -shared_ptr user_types::delayed_value::bind(const query_options& options) { - return ::make_shared(cql3::raw_value::make_value((bind_and_get(options)))); -} - -cql3::raw_value_view user_types::delayed_value::bind_and_get(const query_options& options) { - return options.make_temporary(cql3::raw_value::make_value(user_type_impl::build_value(bind_internal(options)))); -} - -} diff --git a/scylla/cql3/user_types.hh b/scylla/cql3/user_types.hh deleted file mode 100644 index 0c50ba8..0000000 --- a/scylla/cql3/user_types.hh +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - * Modified by ScyllaDB - * - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "column_specification.hh" -#include "term.hh" -#include "column_identifier.hh" -#include "constants.hh" -#include "to_string.hh" - -namespace cql3 { - -/** - * Static helper methods and classes for user types. - */ -class user_types { - user_types() = delete; -public: - static shared_ptr field_spec_of(shared_ptr column, size_t field); - - class literal : public term::raw { - public: - using elements_map_type = std::unordered_map>; - elements_map_type _entries; - - literal(elements_map_type entries); - virtual shared_ptr prepare(database& db, const sstring& keyspace, shared_ptr receiver) override; - private: - void validate_assignable_to(database& db, const sstring& keyspace, shared_ptr receiver); - public: - virtual assignment_testable::test_result test_assignment(database& db, const sstring& keyspace, shared_ptr receiver) override; - virtual sstring assignment_testable_source_context() const override; - virtual sstring to_string() const override; - }; - - // Same purpose than Lists.DelayedValue, except we do handle bind marker in that case - class delayed_value : public non_terminal { - user_type _type; - std::vector> _values; - public: - delayed_value(user_type type, std::vector> values); - virtual bool uses_function(const sstring& ks_name, const sstring& function_name) const override; - virtual bool contains_bind_marker() const override; - virtual void collect_marker_specification(shared_ptr bound_names); - private: - std::vector bind_internal(const query_options& options); - public: - virtual shared_ptr bind(const query_options& options) override; - virtual cql3::raw_value_view bind_and_get(const query_options& options) override; - }; -}; - -} diff --git a/scylla/cql3/ut_name.cc b/scylla/cql3/ut_name.cc deleted file mode 100644 index 9c8801d..0000000 --- a/scylla/cql3/ut_name.cc +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "cql3/ut_name.hh" - -namespace cql3 { - -ut_name::ut_name(shared_ptr ks_name, ::shared_ptr ut_name) - : _ks_name{!ks_name ? std::experimental::nullopt : std::experimental::optional{ks_name->to_string()}} - , _ut_name{ut_name} -{ } - -bool ut_name::has_keyspace() const { - return bool(_ks_name); -} - -void ut_name::set_keyspace(sstring keyspace) { - _ks_name = std::experimental::optional{keyspace}; -} - -const sstring& ut_name::get_keyspace() const { - return _ks_name.value(); -} - -bytes ut_name::get_user_type_name() const { - return _ut_name->bytes_; -} - -sstring ut_name::get_string_type_name() const -{ - return _ut_name->to_string(); -} - -sstring ut_name::to_string() const { - return (has_keyspace() ? (_ks_name.value() + ".") : "") + _ut_name->to_string(); -} - -} diff --git a/scylla/cql3/ut_name.hh b/scylla/cql3/ut_name.hh deleted file mode 100644 index 104589c..0000000 --- a/scylla/cql3/ut_name.hh +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "core/shared_ptr.hh" -#include "column_identifier.hh" - -#include - -namespace cql3 { - -class ut_name final { - std::experimental::optional _ks_name; - ::shared_ptr _ut_name; -public: - ut_name(shared_ptr ks_name, ::shared_ptr ut_name); - - bool has_keyspace() const; - - void set_keyspace(sstring keyspace); - - const sstring& get_keyspace() const; - - bytes get_user_type_name() const; - - sstring get_string_type_name() const; - - sstring to_string() const; - - friend std::ostream& operator<<(std::ostream& os, const ut_name& n) { - return os << n.to_string(); - } -}; - -} diff --git a/scylla/cql3/util.hh b/scylla/cql3/util.hh deleted file mode 100644 index a8bbacb..0000000 --- a/scylla/cql3/util.hh +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (C) 2016 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include - -#include -#include - -#include - -#include "cql3/column_identifier.hh" -#include "cql3/CqlParser.hpp" -#include "cql3/error_collector.hh" -#include "cql3/relation.hh" -#include "cql3/statements/raw/select_statement.hh" - -namespace cql3 { - -namespace util { - -template > -Result do_with_parser(const sstring_view& cql, Func&& f) { - cql3_parser::CqlLexer::collector_type lexer_error_collector(cql); - cql3_parser::CqlParser::collector_type parser_error_collector(cql); - cql3_parser::CqlLexer::InputStreamType input{reinterpret_cast(cql.begin()), ANTLR_ENC_UTF8, static_cast(cql.size()), nullptr}; - cql3_parser::CqlLexer lexer{&input}; - lexer.set_error_listener(lexer_error_collector); - cql3_parser::CqlParser::TokenStreamType tstream(ANTLR_SIZE_HINT, lexer.get_tokSource()); - cql3_parser::CqlParser parser{&tstream}; - parser.set_error_listener(parser_error_collector); - auto result = f(parser); - lexer_error_collector.throw_first_syntax_error(); - parser_error_collector.throw_first_syntax_error(); - return result; -} - -template // Range -sstring relations_to_where_clause(Range&& relations) { - auto expressions = relations | boost::adaptors::transformed(std::mem_fn(&relation::to_string)); - return boost::algorithm::join(expressions, " AND "); -} - -static std::vector where_clause_to_relations(const sstring_view& where_clause) { - return do_with_parser(where_clause, std::mem_fn(&cql3_parser::CqlParser::whereClause)); -} - -inline sstring rename_column_in_where_clause(const sstring_view& where_clause, column_identifier::raw from, column_identifier::raw to) { - auto relations = where_clause_to_relations(where_clause); - auto new_relations = relations | boost::adaptors::transformed([&] (auto&& rel) { - return rel->maybe_rename_identifier(from, to); - }); - return relations_to_where_clause(std::move(new_relations)); -} - -shared_ptr build_select_statement( - const sstring_view& cf_name, - const sstring_view& where_clause, - std::vector included_columns); - -sstring maybe_quote(const sstring& s); - -} // namespace util - -} // namespace cql3 diff --git a/scylla/cql3/values.hh b/scylla/cql3/values.hh deleted file mode 100644 index 3d7b85b..0000000 --- a/scylla/cql3/values.hh +++ /dev/null @@ -1,185 +0,0 @@ -/* - * Copyright (C) 2017 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "types.hh" -#include "bytes.hh" - -#include - -#include - -namespace cql3 { - -struct null_value { -}; - -struct unset_value { -}; - -/// \brief View to a raw CQL protocol value. -/// -/// \see raw_value -struct raw_value_view { - boost::variant _data; - - raw_value_view(null_value&& data) - : _data{std::move(data)} - {} - raw_value_view(unset_value&& data) - : _data{std::move(data)} - {} - raw_value_view(bytes_view&& data) - : _data{std::move(data)} - {} - raw_value_view(const bytes_view& data) - : _data{data} - {} -public: - static raw_value_view make_null() { - return raw_value_view{std::move(null_value{})}; - } - static raw_value_view make_unset_value() { - return raw_value_view{std::move(unset_value{})}; - } - static raw_value_view make_value(bytes_view &&view) { - return raw_value_view{std::move(view)}; - } - static raw_value_view make_value(const bytes_view& view) { - return raw_value_view{view}; - } - bool is_null() const { - return _data.which() == 1; - } - bool is_unset_value() const { - return _data.which() == 2; - } - bool is_value() const { - return _data.which() == 0; - } - bytes_view_opt data() const { - if (_data.which() == 0) { - return boost::get(_data); - } - return {}; - } - explicit operator bool() const { - return _data.which() == 0; - } - const bytes_view* operator->() const { - return &boost::get(_data); - } - const bytes_view& operator*() const { - return boost::get(_data); - } -}; - -/// \brief Raw CQL protocol value. -/// -/// The `raw_value` type represents an uninterpreted value from the CQL wire -/// protocol. A raw value can hold either a null value, an unset value, or a byte -/// blob that represents the value. -class raw_value { - boost::variant _data; - - raw_value(null_value&& data) - : _data{std::move(data)} - {} - raw_value(unset_value&& data) - : _data{std::move(data)} - {} - raw_value(bytes&& data) - : _data{std::move(data)} - {} - raw_value(const bytes& data) - : _data{data} - {} -public: - static raw_value make_null() { - return raw_value{std::move(null_value{})}; - } - static raw_value make_unset_value() { - return raw_value{std::move(unset_value{})}; - } - static raw_value make_value(const raw_value_view& view) { - if (view.is_null()) { - return make_null(); - } - if (view.is_unset_value()) { - return make_unset_value(); - } - return make_value(to_bytes(*view)); - } - static raw_value make_value(bytes&& bytes) { - return raw_value{std::move(bytes)}; - } - static raw_value make_value(const bytes& bytes) { - return raw_value{bytes}; - } - static raw_value make_value(const bytes_opt& bytes) { - if (bytes) { - return make_value(*bytes); - } - return make_null(); - } - bool is_null() const { - return _data.which() == 1; - } - bool is_unset_value() const { - return _data.which() == 2; - } - bool is_value() const { - return _data.which() == 0; - } - bytes_opt data() const { - if (_data.which() == 0) { - return boost::get(_data); - } - return {}; - } - explicit operator bool() const { - return _data.which() == 0; - } - const bytes* operator->() const { - return &boost::get(_data); - } - const bytes& operator*() const { - return boost::get(_data); - } - raw_value_view to_view() const { - switch (_data.which()) { - case 0: return raw_value_view::make_value(bytes_view{boost::get(_data)}); - case 1: return raw_value_view::make_null(); - default: return raw_value_view::make_unset_value(); - } - } -}; - -} - -inline bytes_opt to_bytes_opt(const cql3::raw_value_view& view) { - return to_bytes_opt(view.data()); -} - -inline bytes_opt to_bytes_opt(const cql3::raw_value& value) { - return value.data(); -} diff --git a/scylla/cql3/variable_specifications.cc b/scylla/cql3/variable_specifications.cc deleted file mode 100644 index 9a0862b..0000000 --- a/scylla/cql3/variable_specifications.cc +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "cql3/variable_specifications.hh" - -namespace cql3 { - -variable_specifications::variable_specifications(const std::vector<::shared_ptr>& variable_names) - : _variable_names{variable_names} - , _specs{variable_names.size()} - , _target_columns{variable_names.size()} -{ } - -::shared_ptr variable_specifications::empty() { - return ::make_shared(std::vector<::shared_ptr>{}); -} - -size_t variable_specifications::size() const { - return _variable_names.size(); -} - -std::vector<::shared_ptr> variable_specifications::get_specifications() const & { - return std::vector<::shared_ptr>(_specs.begin(), _specs.end()); -} - -std::vector<::shared_ptr> variable_specifications::get_specifications() && { - return std::move(_specs); -} - -std::vector variable_specifications::get_partition_key_bind_indexes(schema_ptr schema) const { - auto count = schema->partition_key_columns().size(); - std::vector partition_key_positions(count, uint16_t(0)); - std::vector set(count, false); - for (size_t i = 0; i < _target_columns.size(); i++) { - auto& target_column = _target_columns[i]; - const auto* cdef = schema->get_column_definition(target_column->name->name()); - if (cdef && cdef->is_partition_key()) { - partition_key_positions[cdef->position()] = i; - set[cdef->position()] = true; - } - } - for (bool b : set) { - if (!b) { - return {}; - } - } - return partition_key_positions; -} - -void variable_specifications::add(int32_t bind_index, ::shared_ptr spec) { - _target_columns[bind_index] = spec; - auto name = _variable_names[bind_index]; - // Use the user name, if there is one - if (name) { - spec = ::make_shared(spec->ks_name, spec->cf_name, name, spec->type); - } - _specs[bind_index] = spec; -} - -} diff --git a/scylla/cql3/variable_specifications.hh b/scylla/cql3/variable_specifications.hh deleted file mode 100644 index 312b150..0000000 --- a/scylla/cql3/variable_specifications.hh +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "cql3/column_specification.hh" -#include "cql3/column_identifier.hh" - -#include -#include - -namespace cql3 { - -class variable_specifications final { -private: - std::vector> _variable_names; - std::vector<::shared_ptr> _specs; - std::vector<::shared_ptr> _target_columns; - -public: - variable_specifications(const std::vector<::shared_ptr>& variable_names); - - /** - * Returns an empty instance of VariableSpecifications. - * @return an empty instance of VariableSpecifications - */ - static ::shared_ptr empty(); - - size_t size() const; - - std::vector<::shared_ptr> get_specifications() const &; - - std::vector<::shared_ptr> get_specifications() &&; - - std::vector get_partition_key_bind_indexes(schema_ptr schema) const; - - void add(int32_t bind_index, ::shared_ptr spec); -}; - -} diff --git a/scylla/cql_serialization_format.hh b/scylla/cql_serialization_format.hh deleted file mode 100644 index c2d69bd..0000000 --- a/scylla/cql_serialization_format.hh +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include - -using cql_protocol_version_type = uint8_t; - -// Abstraction of transport protocol-dependent serialization format -// Protocols v1, v2 used 16 bits for collection sizes, while v3 and -// above use 32 bits. But letting every bit of the code know what -// transport protocol we're using (and in some cases, we aren't using -// any transport -- it's for internal storage) is bad, so abstract it -// away here. - -class cql_serialization_format { - cql_protocol_version_type _version; -public: - static constexpr cql_protocol_version_type latest_version = 4; - explicit cql_serialization_format(cql_protocol_version_type version) : _version(version) {} - static cql_serialization_format latest() { return cql_serialization_format{latest_version}; } - static cql_serialization_format internal() { return latest(); } - bool using_32_bits_for_collections() const { return _version >= 3; } - bool operator==(cql_serialization_format x) const { return _version == x._version; } - bool operator!=(cql_serialization_format x) const { return !operator==(x); } - cql_protocol_version_type protocol_version() const { return _version; } - friend std::ostream& operator<<(std::ostream& out, const cql_serialization_format& sf) { - return out << static_cast(sf._version); - } - bool collection_format_unchanged(cql_serialization_format other = cql_serialization_format::latest()) const { - return using_32_bits_for_collections() == other.using_32_bits_for_collections(); - } -}; diff --git a/scylla/database.cc b/scylla/database.cc deleted file mode 100644 index dd14473..0000000 --- a/scylla/database.cc +++ /dev/null @@ -1,4129 +0,0 @@ -/* - * Copyright (C) 2014 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "log.hh" -#include "lister.hh" -#include "database.hh" -#include "unimplemented.hh" -#include "core/future-util.hh" -#include "db/commitlog/commitlog_entry.hh" -#include "db/system_keyspace.hh" -#include "db/consistency_level.hh" -#include "db/commitlog/commitlog.hh" -#include "db/config.hh" -#include "to_string.hh" -#include "query-result-writer.hh" -#include "nway_merger.hh" -#include "cql3/column_identifier.hh" -#include "core/seastar.hh" -#include -#include -#include -#include -#include -#include "sstables/sstables.hh" -#include "sstables/compaction.hh" -#include "sstables/remove.hh" -#include -#include -#include "locator/simple_snitch.hh" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "frozen_mutation.hh" -#include "mutation_partition_applier.hh" -#include "core/do_with.hh" -#include "service/migration_manager.hh" -#include "service/storage_service.hh" -#include "message/messaging_service.hh" -#include "mutation_query.hh" -#include "sstable_mutation_readers.hh" -#include -#include -#include "utils/latency.hh" -#include "schema_registry.hh" -#include "service/priority_manager.hh" -#include "cell_locking.hh" -#include -#include "view_info.hh" -#include "memtable-sstable.hh" -#include "db/schema_tables.hh" - -#include "checked-file-impl.hh" -#include "disk-error-handler.hh" - -using namespace std::chrono_literals; - -logging::logger dblog("database"); - -static const std::unordered_set system_keyspaces = { - db::system_keyspace::NAME, db::schema_tables::NAME -}; - -bool is_system_keyspace(const sstring& name) { - return system_keyspaces.find(name) != system_keyspaces.end(); -} - -// Used for tests where the CF exists without a database object. We need to pass a valid -// dirty_memory manager in that case. -thread_local dirty_memory_manager default_dirty_memory_manager; - -lw_shared_ptr -column_family::make_memory_only_memtable_list() { - auto get_schema = [this] { return schema(); }; - return make_lw_shared(std::move(get_schema), _config.dirty_memory_manager); -} - -lw_shared_ptr -column_family::make_memtable_list() { - auto seal = [this] (memtable_list::flush_behavior behavior) { return seal_active_memtable(behavior); }; - auto get_schema = [this] { return schema(); }; - return make_lw_shared(std::move(seal), std::move(get_schema), _config.dirty_memory_manager); -} - -lw_shared_ptr -column_family::make_streaming_memtable_list() { - auto seal = [this] (memtable_list::flush_behavior behavior) { return seal_active_streaming_memtable(behavior); }; - auto get_schema = [this] { return schema(); }; - return make_lw_shared(std::move(seal), std::move(get_schema), _config.streaming_dirty_memory_manager); -} - -lw_shared_ptr -column_family::make_streaming_memtable_big_list(streaming_memtable_big& smb) { - auto seal = [this, &smb] (memtable_list::flush_behavior) { return seal_active_streaming_memtable_big(smb); }; - auto get_schema = [this] { return schema(); }; - return make_lw_shared(std::move(seal), std::move(get_schema), _config.streaming_dirty_memory_manager); -} - -column_family::column_family(schema_ptr schema, config config, db::commitlog* cl, compaction_manager& compaction_manager, cell_locker_stats& cl_stats) - : _schema(std::move(schema)) - , _config(std::move(config)) - , _memtables(_config.enable_disk_writes ? make_memtable_list() : make_memory_only_memtable_list()) - , _streaming_memtables(_config.enable_disk_writes ? make_streaming_memtable_list() : make_memory_only_memtable_list()) - , _compaction_strategy(make_compaction_strategy(_schema->compaction_strategy(), _schema->compaction_strategy_options())) - , _sstables(make_lw_shared(_compaction_strategy.make_sstable_set(_schema))) - , _cache(_schema, sstables_as_snapshot_source(), global_cache_tracker()) - , _commitlog(cl) - , _compaction_manager(compaction_manager) - , _counter_cell_locks(std::make_unique(_schema, cl_stats)) -{ - if (!_config.enable_disk_writes) { - dblog.warn("Writes disabled, column family no durable."); - } - set_metrics(); -} - -partition_presence_checker -column_family::make_partition_presence_checker(lw_shared_ptr sstables) { - auto sel = make_lw_shared(sstables->make_incremental_selector()); - return [this, sstables = std::move(sstables), sel = std::move(sel)] (const dht::decorated_key& key) { - auto& sst = sel->select(key.token()); - if (sst.empty()) { - return partition_presence_checker_result::definitely_doesnt_exist; - } - auto hk = sstables::sstable::make_hashed_key(*_schema, key.key()); - for (auto&& s : sst) { - if (s->filter_has_key(hk)) { - return partition_presence_checker_result::maybe_exists; - } - } - return partition_presence_checker_result::definitely_doesnt_exist; - }; -} - -mutation_source -column_family::sstables_as_mutation_source() { - return mutation_source([this] (schema_ptr s, - const dht::partition_range& r, - const query::partition_slice& slice, - const io_priority_class& pc, - tracing::trace_state_ptr trace_state, - streamed_mutation::forwarding fwd, - mutation_reader::forwarding fwd_mr) { - return make_sstable_reader(std::move(s), _sstables, r, slice, pc, std::move(trace_state), fwd, fwd_mr); - }); -} - -snapshot_source -column_family::sstables_as_snapshot_source() { - return snapshot_source([this] () { - auto sst_set = _sstables; - return mutation_source([this, sst_set = std::move(sst_set)] (schema_ptr s, - const dht::partition_range& r, - const query::partition_slice& slice, - const io_priority_class& pc, - tracing::trace_state_ptr trace_state, - streamed_mutation::forwarding fwd, - mutation_reader::forwarding fwd_mr) { - return make_sstable_reader(std::move(s), sst_set, r, slice, pc, std::move(trace_state), fwd, fwd_mr); - }); - }); -} - -// define in .cc, since sstable is forward-declared in .hh -column_family::~column_family() { -} - - -logalloc::occupancy_stats column_family::occupancy() const { - logalloc::occupancy_stats res; - for (auto m : *_memtables) { - res += m->region().occupancy(); - } - for (auto m : *_streaming_memtables) { - res += m->region().occupancy(); - } - for (auto smb : _streaming_memtables_big) { - for (auto m : *smb.second->memtables) { - res += m->region().occupancy(); - } - } - return res; -} - -static -bool belongs_to_current_shard(const streamed_mutation& m) { - return dht::shard_of(m.decorated_key().token()) == engine().cpu_id(); -} - -// Stores ranges for all components of the same clustering key, index 0 referring to component -// range 0, and so on. -using ck_filter_clustering_key_components = std::vector>; -// Stores an entry for each clustering key range specified by the filter. -using ck_filter_clustering_key_ranges = std::vector; - -// Used to split a clustering key range into a range for each component. -// If a range in ck_filtering_all_ranges is composite, a range will be created -// for each component. If it's not composite, a single range is created. -// This split is needed to check for overlap in each component individually. -static ck_filter_clustering_key_ranges -ranges_for_clustering_key_filter(const schema_ptr& schema, const query::clustering_row_ranges& ck_filtering_all_ranges) { - ck_filter_clustering_key_ranges ranges; - - for (auto& r : ck_filtering_all_ranges) { - // this vector stores a range for each component of a key, only one if not composite. - ck_filter_clustering_key_components composite_ranges; - - if (r.is_full()) { - ranges.push_back({ nonwrapping_range::make_open_ended_both_sides() }); - continue; - } - auto start = r.start() ? r.start()->value().components() : clustering_key_prefix::make_empty().components(); - auto end = r.end() ? r.end()->value().components() : clustering_key_prefix::make_empty().components(); - auto start_it = start.begin(); - auto end_it = end.begin(); - - // This test is enough because equal bounds in nonwrapping_range are inclusive. - auto is_singular = [&schema] (const auto& type_it, const bytes_view& b1, const bytes_view& b2) { - if (type_it == schema->clustering_key_type()->types().end()) { - throw std::runtime_error(sprint("clustering key filter passed more components than defined in schema of %s.%s", - schema->ks_name(), schema->cf_name())); - } - return (*type_it)->compare(b1, b2) == 0; - }; - auto type_it = schema->clustering_key_type()->types().begin(); - composite_ranges.reserve(schema->clustering_key_size()); - - // the rule is to ignore any component cn if another component ck (k < n) is not if the form [v, v]. - // If we have [v1, v1], [v2, v2], ... {vl3, vr3}, .... - // then we generate [v1, v1], [v2, v2], ... {vl3, vr3}. Where { = '(' or '[', etc. - while (start_it != start.end() && end_it != end.end() && is_singular(type_it++, *start_it, *end_it)) { - composite_ranges.push_back(nonwrapping_range({{ std::move(*start_it++), true }}, - {{ std::move(*end_it++), true }})); - } - // handle a single non-singular tail element, if present - if (start_it != start.end() && end_it != end.end()) { - composite_ranges.push_back(nonwrapping_range({{ std::move(*start_it), r.start()->is_inclusive() }}, - {{ std::move(*end_it), r.end()->is_inclusive() }})); - } else if (start_it != start.end()) { - composite_ranges.push_back(nonwrapping_range({{ std::move(*start_it), r.start()->is_inclusive() }}, {})); - } else if (end_it != end.end()) { - composite_ranges.push_back(nonwrapping_range({}, {{ std::move(*end_it), r.end()->is_inclusive() }})); - } - - ranges.push_back(std::move(composite_ranges)); - } - return ranges; -} - -// Return true if this sstable possibly stores clustering row(s) specified by ranges. -static inline bool -contains_rows(const sstables::sstable& sst, const schema_ptr& schema, const ck_filter_clustering_key_ranges& ranges) { - auto& clustering_key_types = schema->clustering_key_type()->types(); - auto& clustering_components_ranges = sst.clustering_components_ranges(); - - if (!schema->clustering_key_size() || clustering_components_ranges.empty()) { - return true; - } - return boost::algorithm::any_of(ranges, [&] (const ck_filter_clustering_key_components& range) { - auto s = std::min(range.size(), clustering_components_ranges.size()); - return boost::algorithm::all_of(boost::irange(0, s), [&] (unsigned i) { - auto& type = clustering_key_types[i]; - return range[i].is_full() || range[i].overlaps(clustering_components_ranges[i], type->as_tri_comparator()); - }); - }); -} - -// Filter out sstables for reader using bloom filter and sstable metadata that keeps track -// of a range for each clustering component. -static std::vector -filter_sstable_for_reader(std::vector&& sstables, column_family& cf, const schema_ptr& schema, - const sstables::key& key, const query::partition_slice& slice) { - auto sstable_has_not_key = [&] (const sstables::shared_sstable& sst) { - return !sst->filter_has_key(key); - }; - sstables.erase(boost::remove_if(sstables, sstable_has_not_key), sstables.end()); - - // no clustering filtering is applied if schema defines no clustering key or - // compaction strategy thinks it will not benefit from such an optimization. - if (!schema->clustering_key_size() || !cf.get_compaction_strategy().use_clustering_key_filter()) { - return sstables; - } - ::cf_stats* stats = cf.cf_stats(); - stats->clustering_filter_count++; - stats->sstables_checked_by_clustering_filter += sstables.size(); - - auto ck_filtering_all_ranges = slice.get_all_ranges(); - // fast path to include all sstables if only one full range was specified. - // For example, this happens if query only specifies a partition key. - if (ck_filtering_all_ranges.size() == 1 && ck_filtering_all_ranges[0].is_full()) { - stats->clustering_filter_fast_path_count++; - stats->surviving_sstables_after_clustering_filter += sstables.size(); - return sstables; - } - auto ranges = ranges_for_clustering_key_filter(schema, ck_filtering_all_ranges); - if (ranges.empty()) { - return {}; - } - - int64_t min_timestamp = std::numeric_limits::max(); - auto sstable_has_clustering_key = [&min_timestamp, &schema, &ranges] (const sstables::shared_sstable& sst) { - if (!contains_rows(*sst, schema, ranges)) { - return false; // ordered after sstables that contain clustering rows. - } else { - min_timestamp = std::min(min_timestamp, sst->get_stats_metadata().min_timestamp); - return true; - } - }; - auto sstable_has_relevant_tombstone = [&min_timestamp] (const sstables::shared_sstable& sst) { - const auto& stats = sst->get_stats_metadata(); - // re-add sstable as candidate if it contains a tombstone that may cover a row in an included sstable. - return (stats.max_timestamp > min_timestamp && stats.estimated_tombstone_drop_time.bin.map.size()); - }; - auto skipped = std::partition(sstables.begin(), sstables.end(), sstable_has_clustering_key); - auto actually_skipped = std::partition(skipped, sstables.end(), sstable_has_relevant_tombstone); - sstables.erase(actually_skipped, sstables.end()); - stats->surviving_sstables_after_clustering_filter += sstables.size(); - - return sstables; -} - -class range_sstable_reader final : public combined_mutation_reader { - schema_ptr _s; - const dht::partition_range* _pr; - lw_shared_ptr _sstables; - - struct sstable_and_reader { - sstables::shared_sstable _sstable; - // This indirection is sad, but we need stable pointers to mutation - // readers. If this ever becomes a performance issue we could store - // mutation readers in an object pool (we don't need to preserve order - // and can have holes left in the container when elements are removed). - std::unique_ptr _reader; - - bool operator<(const sstable_and_reader& other) const { - return _sstable < other._sstable; - } - - struct less_compare { - bool operator()(const sstable_and_reader& a, const sstable_and_reader& b) { - return a < b; - } - bool operator()(const sstable_and_reader& a, const sstables::shared_sstable& b) { - return a._sstable < b; - } - bool operator()(const sstables::shared_sstable& a, const sstable_and_reader& b) { - return a < b._sstable; - } - }; - }; - std::vector _current_readers; - - // Use a pointer instead of copying, so we don't need to regenerate the reader if - // the priority changes. - const io_priority_class& _pc; - tracing::trace_state_ptr _trace_state; - const query::partition_slice& _slice; - streamed_mutation::forwarding _fwd; - mutation_reader::forwarding _fwd_mr; -private: - std::unique_ptr create_reader(sstables::shared_sstable sst) { - tracing::trace(_trace_state, "Reading partition range {} from sstable {}", *_pr, seastar::value_of([&sst] { return sst->get_filename(); })); - // FIXME: make sstable::read_range_rows() return ::mutation_reader so that we can drop this wrapper. - mutation_reader reader = - make_mutation_reader(sst, _s, *_pr, _slice, _pc, _fwd, _fwd_mr); - if (sst->is_shared()) { - reader = make_filtering_reader(std::move(reader), belongs_to_current_shard); - } - return std::make_unique(std::move(reader)); - } -public: - range_sstable_reader(schema_ptr s, - lw_shared_ptr sstables, - const dht::partition_range& pr, - const query::partition_slice& slice, - const io_priority_class& pc, - tracing::trace_state_ptr trace_state, - streamed_mutation::forwarding fwd, - mutation_reader::forwarding fwd_mr) - : _s(s) - , _pr(&pr) - , _sstables(std::move(sstables)) - , _pc(pc) - , _trace_state(std::move(trace_state)) - , _slice(slice) - , _fwd(fwd) - , _fwd_mr(fwd_mr) - { - auto ssts = _sstables->select(pr); - std::vector readers; - readers.reserve(ssts.size()); - _current_readers.reserve(ssts.size()); - for (auto& sst : ssts) { - auto reader = create_reader(sst); - readers.emplace_back(reader.get()); - _current_readers.emplace_back(sstable_and_reader { sst, std::move(reader) }); - } - init_mutation_reader_set(std::move(readers)); - } - - range_sstable_reader(range_sstable_reader&&) = delete; // reader takes reference to member fields - - virtual future<> fast_forward_to(const dht::partition_range& pr) override { - _pr = ≺ - - auto new_sstables = _sstables->select(pr); - boost::range::sort(new_sstables); - boost::range::sort(_current_readers); - - std::vector to_add; - std::vector to_remove, unchanged; - sstable_and_reader::less_compare cmp; - boost::set_difference(new_sstables, _current_readers, std::back_inserter(to_add), cmp); - std::set_difference(_current_readers.begin(), _current_readers.end(), new_sstables.begin(), new_sstables.end(), - boost::back_move_inserter(to_remove), cmp); - std::set_intersection(_current_readers.begin(), _current_readers.end(), new_sstables.begin(), new_sstables.end(), - boost::back_move_inserter(unchanged), cmp); - - std::vector to_add_sar; - boost::transform(to_add, std::back_inserter(to_add_sar), [&] (const sstables::shared_sstable& sst) { - return sstable_and_reader { sst, create_reader(sst) }; - }); - - auto get_mutation_readers = [] (std::vector& ssts) { - std::vector mrs; - mrs.reserve(ssts.size()); - boost::range::transform(ssts, std::back_inserter(mrs), [] (const sstable_and_reader& s_a_r) { - return s_a_r._reader.get(); - }); - return mrs; - }; - - auto to_add_mrs = get_mutation_readers(to_add_sar); - auto to_remove_mrs = get_mutation_readers(to_remove); - - unchanged.insert(unchanged.end(), std::make_move_iterator(to_add_sar.begin()), std::make_move_iterator(to_add_sar.end())); - return combined_mutation_reader::fast_forward_to(std::move(to_add_mrs), std::move(to_remove_mrs), pr).then([this, new_readers = std::move(unchanged)] () mutable { - _current_readers = std::move(new_readers); - }); - } -}; - -class single_key_sstable_reader final : public mutation_reader::impl { - column_family* _cf; - schema_ptr _schema; - const dht::partition_range& _pr; - sstables::key _key; - std::vector _mutations; - bool _done = false; - lw_shared_ptr _sstables; - utils::estimated_histogram& _sstable_histogram; - // Use a pointer instead of copying, so we don't need to regenerate the reader if - // the priority changes. - const io_priority_class& _pc; - const query::partition_slice& _slice; - tracing::trace_state_ptr _trace_state; - streamed_mutation::forwarding _fwd; -public: - single_key_sstable_reader(column_family* cf, - schema_ptr schema, - lw_shared_ptr sstables, - utils::estimated_histogram& sstable_histogram, - const dht::partition_range& pr, // must be singular - const query::partition_slice& slice, - const io_priority_class& pc, - tracing::trace_state_ptr trace_state, - streamed_mutation::forwarding fwd) - : _cf(cf) - , _schema(std::move(schema)) - , _pr(pr) - , _key(sstables::key::from_partition_key(*_schema, *pr.start()->value().key())) - , _sstables(std::move(sstables)) - , _sstable_histogram(sstable_histogram) - , _pc(pc) - , _slice(slice) - , _trace_state(std::move(trace_state)) - , _fwd(fwd) - { } - - virtual future operator()() override { - if (_done) { - return make_ready_future(); - } - auto candidates = filter_sstable_for_reader(_sstables->select(_pr), *_cf, _schema, _key, _slice); - return parallel_for_each(std::move(candidates), - [this](const lw_shared_ptr& sstable) { - tracing::trace(_trace_state, "Reading key {} from sstable {}", _pr, seastar::value_of([&sstable] { return sstable->get_filename(); })); - return sstable->read_row(_schema, _pr.start()->value(), _slice, _pc, _fwd).then([this](auto smo) { - if (smo) { - _mutations.emplace_back(std::move(*smo)); - } - }); - }).then([this] () -> streamed_mutation_opt { - _done = true; - if (_mutations.empty()) { - return { }; - } - _sstable_histogram.add(_mutations.size()); - return merge_mutations(std::move(_mutations)); - }); - } -}; - -mutation_reader -column_family::make_sstable_reader(schema_ptr s, - lw_shared_ptr sstables, - const dht::partition_range& pr, - const query::partition_slice& slice, - const io_priority_class& pc, - tracing::trace_state_ptr trace_state, - streamed_mutation::forwarding fwd, - mutation_reader::forwarding fwd_mr) const { - // restricts a reader's concurrency if the configuration specifies it - auto restrict_reader = [&] (mutation_reader&& in) { - auto&& config = [this, &pc] () -> const restricted_mutation_reader_config& { - if (service::get_local_streaming_read_priority().id() == pc.id()) { - return _config.streaming_read_concurrency_config; - } - return _config.read_concurrency_config; - }(); - if (config.sem) { - return make_restricted_reader(config, 1, std::move(in)); - } else { - return std::move(in); - } - }; - - // CAVEAT: if make_sstable_reader() is called on a single partition - // we want to optimize and read exactly this partition. As a - // consequence, fast_forward_to() will *NOT* work on the result, - // regardless of what the fwd_mr parameter says. - if (pr.is_singular() && pr.start()->value().has_key()) { - const dht::ring_position& pos = pr.start()->value(); - if (dht::shard_of(pos.token()) != engine().cpu_id()) { - return make_empty_reader(); // range doesn't belong to this shard - } - return restrict_reader(make_mutation_reader(const_cast(this), std::move(s), std::move(sstables), - _stats.estimated_sstable_per_read, pr, slice, pc, std::move(trace_state), fwd)); - } else { - // range_sstable_reader is not movable so we need to wrap it - return restrict_reader(make_mutation_reader(std::move(s), std::move(sstables), pr, slice, pc, std::move(trace_state), fwd, fwd_mr)); - } -} - -// Exposed for testing, not performance critical. -future -column_family::find_partition(schema_ptr s, const dht::decorated_key& key) const { - return do_with(dht::partition_range::make_singular(key), [s = std::move(s), this] (auto& range) { - return do_with(this->make_reader(s, range), [] (mutation_reader& reader) { - return reader().then([] (auto sm) { - return mutation_from_streamed_mutation(std::move(sm)); - }).then([] (mutation_opt&& mo) -> std::unique_ptr { - if (!mo) { - return {}; - } - return std::make_unique(std::move(mo->partition())); - }); - }); - }); -} - -future -column_family::find_partition_slow(schema_ptr s, const partition_key& key) const { - return find_partition(s, dht::global_partitioner().decorate_key(*s, key)); -} - -future -column_family::find_row(schema_ptr s, const dht::decorated_key& partition_key, clustering_key clustering_key) const { - return find_partition(s, partition_key).then([clustering_key = std::move(clustering_key), s] (const_mutation_partition_ptr p) { - if (!p) { - return make_ready_future(); - } - auto r = p->find_row(*s, clustering_key); - if (r) { - // FIXME: remove copy if only one data source - return make_ready_future(std::make_unique(*r)); - } else { - return make_ready_future(); - } - }); -} - -mutation_reader -column_family::make_reader(schema_ptr s, - const dht::partition_range& range, - const query::partition_slice& slice, - const io_priority_class& pc, - tracing::trace_state_ptr trace_state, - streamed_mutation::forwarding fwd, - mutation_reader::forwarding fwd_mr) const { - if (_virtual_reader) { - return (*_virtual_reader)(s, range, slice, pc, trace_state, fwd, fwd_mr); - } - - std::vector readers; - readers.reserve(_memtables->size() + 1); - - // We're assuming that cache and memtables are both read atomically - // for single-key queries, so we don't need to special case memtable - // undergoing a move to cache. At any given point in time between - // deferring points the sum of data in memtable and cache is coherent. If - // single-key queries for each data source were performed across deferring - // points, it would be possible that partitions which are ahead of the - // memtable cursor would be placed behind the cache cursor, resulting in - // those partitions being missing in the combined reader. - // - // We need to handle this in range queries though, as they are always - // deferring. scanning_reader from memtable.cc is falling back to reading - // the sstable when memtable is flushed. After memtable is moved to cache, - // new readers will no longer use the old memtable, but until then - // performance may suffer. We should fix this when we add support for - // range queries in cache, so that scans can always be satisfied form - // memtable and cache only, as long as data is not evicted. - // - // https://github.com/scylladb/scylla/issues/309 - // https://github.com/scylladb/scylla/issues/185 - - for (auto&& mt : *_memtables) { - readers.emplace_back(mt->make_reader(s, range, slice, pc, trace_state, fwd, fwd_mr)); - } - - if (_config.enable_cache) { - readers.emplace_back(_cache.make_reader(s, range, slice, pc, std::move(trace_state), fwd, fwd_mr)); - } else { - readers.emplace_back(make_sstable_reader(s, _sstables, range, slice, pc, std::move(trace_state), fwd, fwd_mr)); - } - - return make_combined_reader(std::move(readers)); -} - -mutation_reader -column_family::make_streaming_reader(schema_ptr s, - const dht::partition_range& range) const { - auto& slice = query::full_slice; - auto& pc = service::get_local_streaming_read_priority(); - - std::vector readers; - readers.reserve(_memtables->size() + 1); - - for (auto&& mt : *_memtables) { - readers.emplace_back(mt->make_reader(s, range, slice, pc, nullptr, streamed_mutation::forwarding::no, mutation_reader::forwarding::no)); - } - - readers.emplace_back(make_sstable_reader(s, _sstables, range, slice, pc, nullptr, streamed_mutation::forwarding::no, mutation_reader::forwarding::no)); - - return make_combined_reader(std::move(readers)); -} - -mutation_reader -column_family::make_streaming_reader(schema_ptr s, - const dht::partition_range_vector& ranges) const { - auto& slice = query::full_slice; - auto& pc = service::get_local_streaming_read_priority(); - - auto source = mutation_source([this] (schema_ptr s, const dht::partition_range& range, const query::partition_slice& slice, - const io_priority_class& pc, tracing::trace_state_ptr trace_state, streamed_mutation::forwarding fwd, mutation_reader::forwarding fwd_mr) { - std::vector readers; - readers.reserve(_memtables->size() + 1); - for (auto&& mt : *_memtables) { - readers.emplace_back(mt->make_reader(s, range, slice, pc, trace_state, fwd, fwd_mr)); - } - readers.emplace_back(make_sstable_reader(s, _sstables, range, slice, pc, std::move(trace_state), fwd, fwd_mr)); - return make_combined_reader(std::move(readers)); - }); - - return make_multi_range_reader(s, std::move(source), ranges, slice, pc, nullptr, streamed_mutation::forwarding::no, mutation_reader::forwarding::no); -} - -future> column_family::lock_counter_cells(const mutation& m, timeout_clock::time_point timeout) { - assert(m.schema() == _counter_cell_locks->schema()); - return _counter_cell_locks->lock_cells(m.decorated_key(), partition_cells_range(m.partition()), timeout); -} - -// Not performance critical. Currently used for testing only. -template -future -column_family::for_all_partitions(schema_ptr s, Func&& func) const { - static_assert(std::is_same>::value, - "bad Func signature"); - - struct iteration_state { - mutation_reader reader; - Func func; - bool ok = true; - bool empty = false; - public: - bool done() const { return !ok || empty; } - iteration_state(schema_ptr s, const column_family& cf, Func&& func) - : reader(cf.make_reader(std::move(s))) - , func(std::move(func)) - { } - }; - - return do_with(iteration_state(std::move(s), *this, std::move(func)), [] (iteration_state& is) { - return do_until([&is] { return is.done(); }, [&is] { - return is.reader().then([] (auto sm) { - return mutation_from_streamed_mutation(std::move(sm)); - }).then([&is](mutation_opt&& mo) { - if (!mo) { - is.empty = true; - } else { - is.ok = is.func(mo->decorated_key(), mo->partition()); - } - }); - }).then([&is] { - return is.ok; - }); - }); -} - -future -column_family::for_all_partitions_slow(schema_ptr s, std::function func) const { - return for_all_partitions(std::move(s), std::move(func)); -} - -static bool belongs_to_current_shard(const std::vector& shards) { - return boost::find(shards, engine().cpu_id()) != shards.end(); -} - -static bool belongs_to_other_shard(const std::vector& shards) { - return shards.size() != size_t(belongs_to_current_shard(shards)); -} - -future -column_family::open_sstable(sstables::foreign_sstable_open_info info, sstring dir, int64_t generation, - sstables::sstable::version_types v, sstables::sstable::format_types f) { - auto sst = make_lw_shared(_schema, dir, generation, v, f); - if (!belongs_to_current_shard(info.owners)) { - dblog.debug("sstable {} not relevant for this shard, ignoring", sst->get_filename()); - sst->mark_for_deletion(); - return make_ready_future(); - } - return sst->load(std::move(info)).then([sst] () mutable { - return make_ready_future(std::move(sst)); - }); -} - -void column_family::load_sstable(sstables::shared_sstable& sst, bool reset_level) { - auto shards = sst->get_shards_for_this_sstable(); - if (belongs_to_other_shard(shards)) { - // If we're here, this sstable is shared by this and other - // shard(s). Shared sstables cannot be deleted until all - // shards compacted them, so to reduce disk space usage we - // want to start splitting them now. - // However, we need to delay this compaction until we read all - // the sstables belonging to this CF, because we need all of - // them to know which tombstones we can drop, and what - // generation number is free. - _sstables_need_rewrite.emplace(sst->generation(), sst); - } - if (reset_level) { - // When loading a migrated sstable, set level to 0 because - // it may overlap with existing tables in levels > 0. - // This step is optional, because even if we didn't do this - // scylla would detect the overlap, and bring back some of - // the sstables to level 0. - sst->set_sstable_level(0); - } - add_sstable(sst, std::move(shards)); -} - -void column_family::update_stats_for_new_sstable(uint64_t disk_space_used_by_sstable, std::vector&& shards_for_the_sstable) { - assert(!shards_for_the_sstable.empty()); - if (*boost::min_element(shards_for_the_sstable) == engine().cpu_id()) { - _stats.live_disk_space_used += disk_space_used_by_sstable; - _stats.total_disk_space_used += disk_space_used_by_sstable; - _stats.live_sstable_count++; - } -} - -void column_family::add_sstable(lw_shared_ptr sstable, std::vector&& shards_for_the_sstable) { - // allow in-progress reads to continue using old list - _sstables = make_lw_shared(*_sstables); - update_stats_for_new_sstable(sstable->bytes_on_disk(), std::move(shards_for_the_sstable)); - _sstables->insert(std::move(sstable)); -} - -future<> -column_family::update_cache(memtable& m, lw_shared_ptr old_sstables) { - if (_config.enable_cache) { - // be careful to use the old sstable list, since the new one will hit every - // mutation in m. - return _cache.update(m, make_partition_presence_checker(std::move(old_sstables))); - - } else { - return m.clear_gently(); - } -} - -// FIXME: because we are coalescing, it could be that mutations belonging to the same -// range end up in two different tables. Technically, we should wait for both. However, -// the only way we have to make this happen now is to wait on all previous writes. This -// certainly is an overkill, so we won't do it. We can fix this longer term by looking -// at the PREPARE messages, and then noting what is the minimum future we should be -// waiting for. -future<> -column_family::seal_active_streaming_memtable_delayed() { - auto old = _streaming_memtables->back(); - if (old->empty()) { - return make_ready_future<>(); - } - - if (!_delayed_streaming_flush.armed()) { - // We don't want to wait for too long, because the incoming mutations will not be available - // until we flush them to SSTables. On top of that, if the sender ran out of messages, it won't - // send more until we respond to some - which depends on these futures resolving. Sure enough, - // the real fix for that second one is to have better communication between sender and receiver, - // but that's not realistic ATM. If we did have better negotiation here, we would not need a timer - // at all. - _delayed_streaming_flush.arm(2s); - } - - return with_gate(_streaming_flush_gate, [this, old] { - return _waiting_streaming_flushes.get_shared_future(); - }); -} - -future<> -column_family::seal_active_streaming_memtable_immediate() { - auto old = _streaming_memtables->back(); - if (old->empty()) { - return make_ready_future<>(); - } - _streaming_memtables->add_memtable(); - _streaming_memtables->erase(old); - - auto guard = _streaming_flush_phaser.start(); - return with_gate(_streaming_flush_gate, [this, old] { - _delayed_streaming_flush.cancel(); - auto current_waiters = std::exchange(_waiting_streaming_flushes, shared_promise<>()); - auto f = current_waiters.get_shared_future(); // for this seal - - with_lock(_sstables_lock.for_read(), [this, old] { - auto newtab = make_lw_shared(_schema, - _config.datadir, calculate_generation_for_new_table(), - sstables::sstable::version_types::ka, - sstables::sstable::format_types::big); - - newtab->set_unshared(); - - auto&& priority = service::get_local_streaming_write_priority(); - // This is somewhat similar to the main memtable flush, but with important differences. - // - // The first difference, is that we don't keep aggregate collectd statistics about this one. - // If we ever need to, we'll keep them separate statistics, but we don't want to polute the - // main stats about memtables with streaming memtables. - // - // Lastly, we don't have any commitlog RP to update, and we don't need to deal manipulate the - // memtable list, since this memtable was not available for reading up until this point. - return write_memtable_to_sstable(*old, newtab, incremental_backups_enabled(), priority, false, _config.background_writer_scheduling_group).then([this, newtab, old] { - return newtab->open_data(); - }).then([this, old, newtab] () { - return with_semaphore(_cache_update_sem, 1, [this, newtab, old] { - add_sstable(newtab, {engine().cpu_id()}); - trigger_compaction(); - // Cache synchronization must be started atomically with add_sstable() - if (_config.enable_cache) { - return _cache.update_invalidating(*old); - } else { - return old->clear_gently(); - } - }); - }).handle_exception([old] (auto ep) { - dblog.error("failed to write streamed sstable: {}", ep); - return make_exception_future<>(ep); - }); - // We will also not have any retry logic. If we fail here, we'll fail the streaming and let - // the upper layers know. They can then apply any logic they want here. - }).then_wrapped([this, current_waiters = std::move(current_waiters)] (future <> f) mutable { - if (f.failed()) { - current_waiters.set_exception(f.get_exception()); - } else { - current_waiters.set_value(); - } - }); - - return f; - }).finally([guard = std::move(guard)] { }); -} - -future<> column_family::seal_active_streaming_memtable_big(streaming_memtable_big& smb) { - auto old = smb.memtables->back(); - if (old->empty()) { - return make_ready_future<>(); - } - smb.memtables->add_memtable(); - smb.memtables->erase(old); - return with_gate(_streaming_flush_gate, [this, old, &smb] { - return with_gate(smb.flush_in_progress, [this, old, &smb] { - return with_lock(_sstables_lock.for_read(), [this, old, &smb] { - auto newtab = make_lw_shared(_schema, - _config.datadir, calculate_generation_for_new_table(), - sstables::sstable::version_types::ka, - sstables::sstable::format_types::big); - - newtab->set_unshared(); - - auto&& priority = service::get_local_streaming_write_priority(); - return write_memtable_to_sstable(*old, newtab, incremental_backups_enabled(), priority, true, _config.background_writer_scheduling_group).then([this, newtab, old, &smb] { - smb.sstables.emplace_back(newtab); - }).handle_exception([] (auto ep) { - dblog.error("failed to write streamed sstable: {}", ep); - return make_exception_future<>(ep); - }); - }); - }); - }); -} - -future<> -column_family::seal_active_memtable(memtable_list::flush_behavior ignored) { - auto old = _memtables->back(); - dblog.debug("Sealing active memtable of {}.{}, partitions: {}, occupancy: {}", _schema->cf_name(), _schema->ks_name(), old->partition_count(), old->occupancy()); - - if (old->empty()) { - dblog.debug("Memtable is empty"); - return _flush_barrier.advance_and_await(); - } - _memtables->add_memtable(); - _stats.memtable_switch_count++; - auto previous_flush = _flush_barrier.advance_and_await(); - auto op = _flush_barrier.start(); - - auto memtable_size = old->occupancy().total_space(); - - _stats.pending_flushes++; - _config.cf_stats->pending_memtables_flushes_count++; - _config.cf_stats->pending_memtables_flushes_bytes += memtable_size; - - return repeat([this, old] { - return with_lock(_sstables_lock.for_read(), [this, old] { - return try_flush_memtable_to_sstable(old); - }); - }).then([this, memtable_size, old, op = std::move(op), previous_flush = std::move(previous_flush)] () mutable { - _stats.pending_flushes--; - _config.cf_stats->pending_memtables_flushes_count--; - _config.cf_stats->pending_memtables_flushes_bytes -= memtable_size; - - if (_commitlog) { - _commitlog->discard_completed_segments(_schema->id(), old->rp_set()); - } - return previous_flush.finally([op = std::move(op)] { }); - }); - // FIXME: release commit log - // FIXME: provide back-pressure to upper layers -} - -future -column_family::try_flush_memtable_to_sstable(lw_shared_ptr old) { - auto gen = calculate_generation_for_new_table(); - - auto newtab = make_lw_shared(_schema, - _config.datadir, gen, - sstables::sstable::version_types::ka, - sstables::sstable::format_types::big); - - newtab->set_unshared(); - dblog.debug("Flushing to {}", newtab->get_filename()); - // Note that due to our sharded architecture, it is possible that - // in the face of a value change some shards will backup sstables - // while others won't. - // - // This is, in theory, possible to mitigate through a rwlock. - // However, this doesn't differ from the situation where all tables - // are coming from a single shard and the toggle happens in the - // middle of them. - // - // The code as is guarantees that we'll never partially backup a - // single sstable, so that is enough of a guarantee. - auto&& priority = service::get_local_memtable_flush_priority(); - return write_memtable_to_sstable(*old, newtab, incremental_backups_enabled(), priority, false, _config.memtable_scheduling_group).then([this, newtab, old] { - return newtab->open_data(); - }).then_wrapped([this, old, newtab] (future<> ret) { - dblog.debug("Flushing to {} done", newtab->get_filename()); - try { - ret.get(); - - // Cache updates are serialized because partition_presence_checker - // is using data source snapshot created before the update starts, so that - // we can use incremental_selector. If updates were done concurrently we - // could mispopulate due to stale presence information. - return with_semaphore(_cache_update_sem, 1, [this, old, newtab] { - // We must add sstable before we call update_cache(), because - // memtable's data after moving to cache can be evicted at any time. - auto old_sstables = _sstables; - add_sstable(newtab, {engine().cpu_id()}); - old->mark_flushed(newtab->as_mutation_source()); - - trigger_compaction(); - return update_cache(*old, std::move(old_sstables)); - }).then_wrapped([this, newtab, old] (future<> f) { - try { - f.get(); - } catch(...) { - dblog.error("failed to move memtable for {} to cache: {}", newtab->get_filename(), std::current_exception()); - } - - _memtables->erase(old); - dblog.debug("Memtable for {} replaced", newtab->get_filename()); - - return make_ready_future(stop_iteration::yes); - }); - } catch (...) { - newtab->mark_for_deletion(); - dblog.error("failed to write sstable {}: {}", newtab->get_filename(), std::current_exception()); - // If we failed this write we will try the write again and that will create a new flush reader - // that will decrease dirty memory again. So we need to reset the accounting. - old->revert_flushed_memory(); - } - return sleep(10s).then([] { - return make_ready_future(stop_iteration::no); - }); - }); -} - -void -column_family::start() { - // FIXME: add option to disable automatic compaction. - start_compaction(); -} - -future<> -column_family::stop() { - return when_all(_memtables->request_flush(), _streaming_memtables->request_flush()).discard_result().finally([this] { - return _compaction_manager.remove(this).then([this] { - // Nest, instead of using when_all, so we don't lose any exceptions. - return _streaming_flush_gate.close(); - }).then([this] { - return _sstable_deletion_gate.close(); - }); - }); -} - -static io_error_handler error_handler_for_upload_dir() { - return [] (std::exception_ptr eptr) { - // do nothing about sstable exception and caller will just rethrow it. - }; -} - -// This function will iterate through upload directory in column family, -// and will do the following for each sstable found: -// 1) Mutate sstable level to 0. -// 2) Create hard links to its components in column family dir. -// 3) Remove all of its components in upload directory. -// At the end, it's expected that upload dir is empty and all of its -// previous content was moved to column family dir. -// -// Return a vector containing descriptor of sstables to be loaded. -future> -distributed_loader::flush_upload_dir(distributed& db, sstring ks_name, sstring cf_name) { - struct work { - std::unordered_map descriptors; - std::vector flushed; - }; - - return do_with(work(), [&db, ks_name = std::move(ks_name), cf_name = std::move(cf_name)] (work& work) { - auto& cf = db.local().find_column_family(ks_name, cf_name); - - return lister::scan_dir(lister::path(cf._config.datadir) / "upload", { directory_entry_type::regular }, - [&work] (lister::path parent_dir, directory_entry de) { - auto comps = sstables::entry_descriptor::make_descriptor(de.name); - if (comps.component != sstables::sstable::component_type::TOC) { - return make_ready_future<>(); - } - work.descriptors.emplace(comps.generation, std::move(comps)); - return make_ready_future<>(); - }, &column_family::manifest_json_filter).then([&db, ks_name = std::move(ks_name), cf_name = std::move(cf_name), &work] { - work.flushed.reserve(work.descriptors.size()); - - return do_for_each(work.descriptors, [&db, ks_name, cf_name, &work] (auto& pair) { - return db.invoke_on(column_family::calculate_shard_from_sstable_generation(pair.first), - [ks_name, cf_name, comps = pair.second] (database& db) { - auto& cf = db.find_column_family(ks_name, cf_name); - - auto sst = make_lw_shared(cf.schema(), cf._config.datadir + "/upload", comps.generation, - comps.version, comps.format, gc_clock::now(), - [] (disk_error_signal_type&) { return error_handler_for_upload_dir(); }); - auto gen = cf.calculate_generation_for_new_table(); - - // Read toc content as it will be needed for moving and deleting a sstable. - return sst->read_toc().then([sst] { - return sst->mutate_sstable_level(0); - }).then([&cf, sst, gen] { - return sst->create_links(cf._config.datadir, gen); - }).then([sst] { - return sstables::remove_by_toc_name(sst->toc_filename(), error_handler_for_upload_dir()); - }).then([sst, gen] { - return make_ready_future(gen); - }); - }).then([&work, comps = pair.second] (auto gen) mutable { - comps.generation = gen; - work.flushed.push_back(std::move(comps)); - return make_ready_future<>(); - }); - }); - }).then([&work] { - return make_ready_future>(std::move(work.flushed)); - }); - }); -} - -future> -column_family::reshuffle_sstables(std::set all_generations, int64_t start) { - struct work { - int64_t current_gen; - std::set all_generations; // Stores generation of all live sstables in the system. - std::map sstables; - std::unordered_map descriptors; - std::vector reshuffled; - work(int64_t start, std::set gens) - : current_gen(start ? start : 1) - , all_generations(gens) {} - }; - - return do_with(work(start, std::move(all_generations)), [this] (work& work) { - return lister::scan_dir(_config.datadir, { directory_entry_type::regular }, [this, &work] (lister::path parent_dir, directory_entry de) { - auto comps = sstables::entry_descriptor::make_descriptor(de.name); - if (comps.component != sstables::sstable::component_type::TOC) { - return make_ready_future<>(); - } - // Skip generations that were already loaded by Scylla at a previous stage. - if (work.all_generations.count(comps.generation) != 0) { - return make_ready_future<>(); - } - auto sst = make_lw_shared(_schema, - _config.datadir, comps.generation, - comps.version, comps.format); - work.sstables.emplace(comps.generation, std::move(sst)); - work.descriptors.emplace(comps.generation, std::move(comps)); - // FIXME: This is the only place in which we actually issue disk activity aside from - // directory metadata operations. - // - // But without the TOC information, we don't know which files we should link. - // The alternative to that would be to change create link to try creating a - // link for all possible files and handling the failures gracefuly, but that's not - // exactly fast either. - // - // Those SSTables are not known by anyone in the system. So we don't have any kind of - // object describing them. There isn't too much of a choice. - return work.sstables[comps.generation]->read_toc(); - }, &manifest_json_filter).then([&work] { - // Note: cannot be parallel because we will be shuffling things around at this stage. Can't race. - return do_for_each(work.sstables, [&work] (auto& pair) { - auto&& comps = std::move(work.descriptors.at(pair.first)); - comps.generation = work.current_gen; - work.reshuffled.push_back(std::move(comps)); - - if (pair.first == work.current_gen) { - ++work.current_gen; - return make_ready_future<>(); - } - return pair.second->set_generation(work.current_gen++); - }); - }).then([&work] { - return make_ready_future>(std::move(work.reshuffled)); - }); - }); -} - -seastar::metrics::label column_family_label("cf"); -seastar::metrics::label keyspace_label("ks"); -void column_family::set_metrics() { - auto cf = column_family_label(_schema->cf_name()); - auto ks = keyspace_label(_schema->ks_name()); - namespace ms = seastar::metrics; - _metrics.add_group("column_family", { - ms::make_derive("memtable_switch", ms::description("Number of times flush has resulted in the memtable being switched out"), _stats.memtable_switch_count)(cf)(ks), - ms::make_gauge("pending_tasks", ms::description("Estimated number of tasks pending for this column family"), _stats.pending_flushes)(cf)(ks), - ms::make_gauge("live_disk_space", ms::description("Live disk space used"), _stats.live_disk_space_used)(cf)(ks), - ms::make_gauge("total_disk_space", ms::description("Total disk space used"), _stats.total_disk_space_used)(cf)(ks), - ms::make_gauge("live_sstable", ms::description("Live sstable count"), _stats.live_sstable_count)(cf)(ks), - ms::make_gauge("pending_compaction", ms::description("Estimated number of compactions pending for this column family"), _stats.pending_compactions)(cf)(ks) - }); - if (_schema->ks_name() != db::system_keyspace::NAME && _schema->ks_name() != db::schema_tables::v3::NAME && _schema->ks_name() != "system_traces") { - _metrics.add_group("column_family", { - ms::make_histogram("read_latency", ms::description("Read latency histogram"), [this] {return _stats.estimated_read.get_histogram(std::chrono::microseconds(100));})(cf)(ks), - ms::make_histogram("write_latency", ms::description("Write latency histogram"), [this] {return _stats.estimated_write.get_histogram(std::chrono::microseconds(100));})(cf)(ks), - ms::make_gauge("cache_hit_rate", ms::description("Cache hit rate"), [this] {return float(_global_cache_hit_rate);})(cf)(ks) - }); - } -} - -void column_family::rebuild_statistics() { - // zeroing live_disk_space_used and live_sstable_count because the - // sstable list was re-created - _stats.live_disk_space_used = 0; - _stats.live_sstable_count = 0; - - for (auto&& tab : boost::range::join(_sstables_compacted_but_not_deleted, - // this might seem dangerous, but "move" here just avoids constness, - // making the two ranges compatible when compiling with boost 1.55. - // Noone is actually moving anything... - std::move(*_sstables->all()))) { - update_stats_for_new_sstable(tab->bytes_on_disk(), tab->get_shards_for_this_sstable()); - } -} - -void -column_family::rebuild_sstable_list(const std::vector& new_sstables, - const std::vector& sstables_to_remove) { - // Build a new list of _sstables: We remove from the existing list the - // tables we compacted (by now, there might be more sstables flushed - // later), and we add the new tables generated by the compaction. - // We create a new list rather than modifying it in-place, so that - // on-going reads can continue to use the old list. - // - // We only remove old sstables after they are successfully deleted, - // to avoid a new compaction from ignoring data in the old sstables - // if the deletion fails (note deletion of shared sstables can take - // unbounded time, because all shards must agree on the deletion). - auto current_sstables = _sstables; - auto new_sstable_list = _compaction_strategy.make_sstable_set(_schema); - auto new_compacted_but_not_deleted = _sstables_compacted_but_not_deleted; - - - std::unordered_set s( - sstables_to_remove.begin(), sstables_to_remove.end()); - - // First, add the new sstables. - - // this might seem dangerous, but "move" here just avoids constness, - // making the two ranges compatible when compiling with boost 1.55. - // Noone is actually moving anything... - for (auto&& tab : boost::range::join(new_sstables, std::move(*current_sstables->all()))) { - // Checks if oldtab is a sstable not being compacted. - if (!s.count(tab)) { - new_sstable_list.insert(tab); - } else { - new_compacted_but_not_deleted.push_back(tab); - } - } - _sstables = make_lw_shared(std::move(new_sstable_list)); - _sstables_compacted_but_not_deleted = std::move(new_compacted_but_not_deleted); - - rebuild_statistics(); - - // Second, delete the old sstables. This is done in the background, so we can - // consider this compaction completed. - seastar::with_gate(_sstable_deletion_gate, [this, sstables_to_remove] { - return sstables::delete_atomically(sstables_to_remove).then_wrapped([this, sstables_to_remove] (future<> f) { - std::exception_ptr eptr; - try { - f.get(); - } catch(...) { - eptr = std::current_exception(); - } - - // unconditionally remove compacted sstables from _sstables_compacted_but_not_deleted, - // or they could stay forever in the set, resulting in deleted files remaining - // opened and disk space not being released until shutdown. - std::unordered_set s( - sstables_to_remove.begin(), sstables_to_remove.end()); - auto e = boost::range::remove_if(_sstables_compacted_but_not_deleted, [&] (sstables::shared_sstable sst) -> bool { - return s.count(sst); - }); - _sstables_compacted_but_not_deleted.erase(e, _sstables_compacted_but_not_deleted.end()); - rebuild_statistics(); - - if (eptr) { - return make_exception_future<>(eptr); - } - return make_ready_future<>(); - }).handle_exception([] (std::exception_ptr e) { - try { - std::rethrow_exception(e); - } catch (sstables::atomic_deletion_cancelled& adc) { - dblog.debug("Failed to delete sstables after compaction: {}", adc); - } - }).then([this] { - // refresh underlying data source in row cache to prevent it from holding reference - // to sstables files which were previously deleted. - _cache.refresh_snapshot(); - }); - }); -} - -void column_family::replace_ancestors_needed_rewrite(std::vector new_sstables) { - std::vector old_sstables; - std::unordered_set ancestors; - - for (auto& sst : new_sstables) { - auto sst_ancestors = sst->ancestors(); - ancestors.insert(sst_ancestors.begin(), sst_ancestors.end()); - } - - for (auto& ancestor : ancestors) { - auto it = _sstables_need_rewrite.find(ancestor); - if (it != _sstables_need_rewrite.end()) { - old_sstables.push_back(it->second); - _sstables_need_rewrite.erase(it); - } - } - rebuild_sstable_list(new_sstables, old_sstables); -} - -void column_family::remove_ancestors_needed_rewrite(std::unordered_set ancestors) { - std::vector old_sstables; - for (auto& ancestor : ancestors) { - auto it = _sstables_need_rewrite.find(ancestor); - if (it != _sstables_need_rewrite.end()) { - old_sstables.push_back(it->second); - _sstables_need_rewrite.erase(it); - } - } - rebuild_sstable_list({}, old_sstables); -} - -future<> -column_family::compact_sstables(sstables::compaction_descriptor descriptor, bool cleanup) { - if (!descriptor.sstables.size()) { - // if there is nothing to compact, just return. - return make_ready_future<>(); - } - - return with_lock(_sstables_lock.for_read(), [this, descriptor = std::move(descriptor), cleanup] { - auto sstables_to_compact = make_lw_shared>(std::move(descriptor.sstables)); - - auto create_sstable = [this] { - auto gen = this->calculate_generation_for_new_table(); - // FIXME: use "tmp" marker in names of incomplete sstable - auto sst = make_lw_shared(_schema, _config.datadir, gen, - sstables::sstable::version_types::ka, - sstables::sstable::format_types::big); - sst->set_unshared(); - return sst; - }; - return sstables::compact_sstables(*sstables_to_compact, *this, create_sstable, descriptor.max_sstable_bytes, descriptor.level, - cleanup, _config.background_writer_scheduling_group).then([this, sstables_to_compact] (auto new_sstables) { - _compaction_strategy.notify_completion(*sstables_to_compact, new_sstables); - return this->rebuild_sstable_list(new_sstables, *sstables_to_compact); - }); - }); -} - -static bool needs_cleanup(const lw_shared_ptr& sst, - const dht::token_range_vector& owned_ranges, - schema_ptr s) { - auto first = sst->get_first_partition_key(); - auto last = sst->get_last_partition_key(); - auto first_token = dht::global_partitioner().get_token(*s, first); - auto last_token = dht::global_partitioner().get_token(*s, last); - dht::token_range sst_token_range = dht::token_range::make(first_token, last_token); - - // return true iff sst partition range isn't fully contained in any of the owned ranges. - for (auto& r : owned_ranges) { - if (r.contains(sst_token_range, dht::token_comparator())) { - return false; - } - } - return true; -} - -future<> column_family::cleanup_sstables(sstables::compaction_descriptor descriptor) { - dht::token_range_vector r = service::get_local_storage_service().get_local_ranges(_schema->ks_name()); - - return do_with(std::move(descriptor.sstables), std::move(r), [this] (auto& sstables, auto& owned_ranges) { - return do_for_each(sstables, [this, &owned_ranges] (auto& sst) { - if (!owned_ranges.empty() && !needs_cleanup(sst, owned_ranges, _schema)) { - return make_ready_future<>(); - } - - // this semaphore ensures that only one cleanup will run per shard. - // That's to prevent node from running out of space when almost all sstables - // need cleanup, so if sstables are cleaned in parallel, we may need almost - // twice the disk space used by those sstables. - static thread_local semaphore sem(1); - - return with_semaphore(sem, 1, [this, &sst] { - return this->compact_sstables(sstables::compaction_descriptor({ sst }, sst->get_sstable_level()), true); - }); - }); - }); -} - -// FIXME: this is just an example, should be changed to something more general -// Note: We assume that the column_family does not get destroyed during compaction. -future<> -column_family::compact_all_sstables() { - return _compaction_manager.submit_major_compaction(this); -} - -void column_family::start_compaction() { - set_compaction_strategy(_schema->compaction_strategy()); -} - -void column_family::trigger_compaction() { - // Submitting compaction job to compaction manager. - do_trigger_compaction(); // see below -} - -void column_family::do_trigger_compaction() { - // But only submit if we're not locked out - if (!_compaction_disabled) { - _compaction_manager.submit(this); - } -} - -future<> column_family::run_compaction(sstables::compaction_descriptor descriptor) { - return compact_sstables(std::move(descriptor)); -} - -void column_family::set_compaction_strategy(sstables::compaction_strategy_type strategy) { - dblog.info0("Setting compaction strategy of {}.{} to {}", _schema->ks_name(), _schema->cf_name(), sstables::compaction_strategy::name(strategy)); - auto new_cs = make_compaction_strategy(strategy, _schema->compaction_strategy_options()); - auto new_sstables = new_cs.make_sstable_set(_schema); - for (auto&& s : *_sstables->all()) { - new_sstables.insert(s); - } - // now exception safe: - _compaction_strategy = std::move(new_cs); - _sstables = std::move(new_sstables); -} - -size_t column_family::sstables_count() const { - return _sstables->all()->size(); -} - -std::vector column_family::sstable_count_per_level() const { - std::vector count_per_level; - for (auto&& sst : *_sstables->all()) { - auto level = sst->get_sstable_level(); - - if (level + 1 > count_per_level.size()) { - count_per_level.resize(level + 1, 0UL); - } - count_per_level[level]++; - } - return count_per_level; -} - -int64_t column_family::get_unleveled_sstables() const { - // TODO: when we support leveled compaction, we should return the number of - // SSTables in L0. If leveled compaction is enabled in this column family, - // then we should return zero, as we currently do. - return 0; -} - -const sstables::sstable_set& column_family::get_sstable_set() const { - return *_sstables; -} - -lw_shared_ptr column_family::get_sstables() const { - return _sstables->all(); -} - -std::vector column_family::select_sstables(const dht::partition_range& range) const { - return _sstables->select(range); -} - -std::vector column_family::candidates_for_compaction() const { - return boost::copy_range>(*get_sstables() - | boost::adaptors::filtered([this] (auto& sst) { return !_sstables_need_rewrite.count(sst->generation()); })); -} - -std::vector column_family::sstables_need_rewrite() const { - return boost::copy_range>(_sstables_need_rewrite | boost::adaptors::map_values); -} - -// Gets the list of all sstables in the column family, including ones that are -// not used for active queries because they have already been compacted, but are -// waiting for delete_atomically() to return. -// -// As long as we haven't deleted them, compaction needs to ensure it doesn't -// garbage-collect a tombstone that covers data in an sstable that may not be -// successfully deleted. -lw_shared_ptr column_family::get_sstables_including_compacted_undeleted() const { - if (_sstables_compacted_but_not_deleted.empty()) { - return get_sstables(); - } - auto ret = make_lw_shared(*_sstables->all()); - for (auto&& s : _sstables_compacted_but_not_deleted) { - ret->insert(s); - } - return ret; -} - -const std::vector& column_family::compacted_undeleted_sstables() const { - return _sstables_compacted_but_not_deleted; -} - -inline bool column_family::manifest_json_filter(const lister::path&, const directory_entry& entry) { - // Filter out directories. If type of the entry is unknown - check its name. - if (entry.type.value_or(directory_entry_type::regular) != directory_entry_type::directory && entry.name == "manifest.json") { - return false; - } - - return true; -} - -// TODO: possibly move it to seastar -template -static future<> invoke_all_with_ptr(distributed& s, PtrType ptr, Func&& func) { - return parallel_for_each(smp::all_cpus(), [&s, &func, ptr] (unsigned id) { - return s.invoke_on(id, [func, foreign = make_foreign(ptr)] (Service& s) mutable { - return func(s, std::move(foreign)); - }); - }); -} - -future<> distributed_loader::open_sstable(distributed& db, sstables::entry_descriptor comps, - std::function (column_family&, sstables::foreign_sstable_open_info)> func, const io_priority_class& pc) { - // loads components of a sstable from shard S and share it with all other - // shards. Which shard a sstable will be opened at is decided using - // calculate_shard_from_sstable_generation(), which is the inverse of - // calculate_generation_for_new_table(). That ensures every sstable is - // shard-local if reshard wasn't performed. This approach is also expected - // to distribute evenly the resource usage among all shards. - - return db.invoke_on(column_family::calculate_shard_from_sstable_generation(comps.generation), - [&db, comps = std::move(comps), func = std::move(func), pc] (database& local) { - - return with_semaphore(local.sstable_load_concurrency_sem(), 1, [&db, &local, comps = std::move(comps), func = std::move(func), pc] { - auto& cf = local.find_column_family(comps.ks, comps.cf); - - auto f = sstables::sstable::load_shared_components(cf.schema(), cf._config.datadir, comps.generation, comps.version, comps.format, pc); - return f.then([&db, comps = std::move(comps), func = std::move(func)] (sstables::sstable_open_info info) { - // shared components loaded, now opening sstable in all shards with shared components - return do_with(std::move(info), [&db, comps = std::move(comps), func = std::move(func)] (auto& info) { - return invoke_all_with_ptr(db, std::move(info.components), - [owners = info.owners, data = info.data.dup(), index = info.index.dup(), comps, func] (database& db, auto components) { - auto& cf = db.find_column_family(comps.ks, comps.cf); - return func(cf, sstables::foreign_sstable_open_info{std::move(components), owners, data, index}); - }); - }); - }); - }); - }); -} - -// global_column_family_ptr provides a way to easily retrieve local instance of a given column family. -class global_column_family_ptr { - distributed& _db; - utils::UUID _id; -private: - column_family& get() const { return _db.local().find_column_family(_id); } -public: - global_column_family_ptr(distributed& db, sstring ks_name, sstring cf_name) - : _db(db) - , _id(_db.local().find_column_family(ks_name, cf_name).schema()->id()) { - } - - column_family* operator->() const { - return &get(); - } - column_family& operator*() const { - return get(); - } -}; - -template -static future> -load_sstables_with_open_info(std::vector ssts_info, schema_ptr s, sstring dir, Pred&& pred) { - return do_with(std::vector(), [ssts_info = std::move(ssts_info), s, dir, pred] (auto& ssts) mutable { - return parallel_for_each(std::move(ssts_info), [&ssts, s, dir, pred] (auto& info) mutable { - if (!pred(info)) { - return make_ready_future<>(); - } - auto sst = make_lw_shared(s, dir, info.generation, info.version, info.format); - return sst->load(std::move(info)).then([&ssts, sst] { - ssts.push_back(std::move(sst)); - return make_ready_future<>(); - }); - }).then([&ssts] () mutable { - return std::move(ssts); - }); - }); -} - -// Return all sstables that need resharding in the system. Only one instance of a shared sstable is returned. -static future> get_all_shared_sstables(distributed& db, global_column_family_ptr cf) { - class all_shared_sstables { - schema_ptr _schema; - sstring _dir; - std::unordered_map _result; - public: - all_shared_sstables(global_column_family_ptr cf) : _schema(cf->schema()), _dir(cf->dir()) {} - - future<> operator()(std::vector ssts_info) { - return load_sstables_with_open_info(std::move(ssts_info), _schema, _dir, [this] (auto& info) { - // skip loading of shared sstable that is already stored in _result. - return !_result.count(info.generation); - }).then([this] (std::vector sstables) { - for (auto& sst : sstables) { - auto gen = sst->generation(); - _result.emplace(gen, std::move(sst)); - } - return make_ready_future<>(); - }); - } - - std::vector get() && { - return boost::copy_range>(std::move(_result) | boost::adaptors::map_values); - } - }; - - return db.map_reduce(all_shared_sstables(cf), [cf] (database& db) mutable { - return seastar::async([cf] { - return boost::copy_range>(cf->sstables_need_rewrite() - | boost::adaptors::transformed([] (auto&& sst) { return sst->get_open_info().get0(); })); - }); - }); -} - -// checks whether or not a given column family is worth resharding by checking if any of its -// sstables has more than one owner shard. -static future worth_resharding(distributed& db, global_column_family_ptr cf) { - auto has_shared_sstables = [cf] (database& db) { - return cf->has_shared_sstables(); - }; - return db.map_reduce0(has_shared_sstables, bool(false), std::logical_or()); -} - -// make a set of sstables available at another shard. -template -static future<> forward_sstables_to(shard_id shard, std::vector sstables, global_column_family_ptr cf, Func&& func) { - return seastar::async([sstables = std::move(sstables), shard, cf, func] () mutable { - auto infos = boost::copy_range>(sstables - | boost::adaptors::transformed([] (auto&& sst) { return sst->get_open_info().get0(); })); - - smp::submit_to(shard, [cf, func, infos = std::move(infos)] () mutable { - return load_sstables_with_open_info(std::move(infos), cf->schema(), cf->dir(), [] (auto& p) { - return true; - }).then([func] (std::vector sstables) { - return func(std::move(sstables)); - }); - }).get(); - }); -} - -// invokes each descriptor at its target shard, which involves forwarding sstables too. -template -static future<> invoke_all_resharding_jobs(global_column_family_ptr cf, std::vector jobs, Func&& func) { - return parallel_for_each(std::move(jobs), [cf, func] (sstables::resharding_descriptor& job) mutable { - return forward_sstables_to(job.reshard_at, std::move(job.sstables), cf, - [func, level = job.level, max_sstable_bytes = job.max_sstable_bytes] (auto sstables) { - // used to ensure that only one reshard operation will run per shard. - static thread_local semaphore sem(1); - return with_semaphore(sem, 1, [func, sstables = std::move(sstables), level, max_sstable_bytes] () mutable { - return func(std::move(sstables), level, max_sstable_bytes); - }); - }); - }); -} - -static std::vector sstables_for_shard(const std::vector& sstables, shard_id shard) { - auto belongs_to_shard = [] (const sstables::shared_sstable& sst, unsigned shard) { - auto shards = sst->get_shards_for_this_sstable(); - return boost::range::find(shards, shard) != shards.end(); - }; - - return boost::copy_range>(sstables - | boost::adaptors::filtered([&] (auto& sst) { return belongs_to_shard(sst, shard); })); -} - -void distributed_loader::reshard(distributed& db, sstring ks_name, sstring cf_name) { - assert(engine().cpu_id() == 0); // NOTE: should always run on shard 0! - - // ensures that only one column family is resharded at a time (that's okay because - // actual resharding is parallelized), and that's needed to prevent the same column - // family from being resharded in parallel (that could happen, for example, if - // refresh (triggers resharding) is issued by user while resharding is going on). - static semaphore sem(1); - - with_semaphore(sem, 1, [&db, ks_name = std::move(ks_name), cf_name = std::move(cf_name)] () mutable { - return seastar::async([&db, ks_name = std::move(ks_name), cf_name = std::move(cf_name)] () mutable { - global_column_family_ptr cf(db, ks_name, cf_name); - - if (cf->get_compaction_manager().stopped()) { - return; - } - // fast path to detect that this column family doesn't need reshard. - if (!worth_resharding(db, cf).get0()) { - dblog.debug("Nothing to reshard for {}.{}", cf->schema()->ks_name(), cf->schema()->cf_name()); - return; - } - - auto candidates = get_all_shared_sstables(db, cf).get0(); - dblog.debug("{} candidates for resharding for {}.{}", candidates.size(), cf->schema()->ks_name(), cf->schema()->cf_name()); - auto jobs = cf->get_compaction_strategy().get_resharding_jobs(*cf, std::move(candidates)); - dblog.debug("{} resharding jobs for {}.{}", jobs.size(), cf->schema()->ks_name(), cf->schema()->cf_name()); - - invoke_all_resharding_jobs(cf, std::move(jobs), [&cf] (auto sstables, auto level, auto max_sstable_bytes) { - auto creator = [&cf] (shard_id shard) mutable { - // we need generation calculated by instance of cf at requested shard, - // or resource usage wouldn't be fairly distributed among shards. - auto gen = smp::submit_to(shard, [&cf] () { - return cf->calculate_generation_for_new_table(); - }).get0(); - - auto sst = make_lw_shared(cf->schema(), cf->dir(), gen, - sstables::sstable::version_types::ka, sstables::sstable::format_types::big, - gc_clock::now(), default_io_error_handler_gen()); - return sst; - }; - auto f = sstables::reshard_sstables(sstables, *cf, creator, max_sstable_bytes, level, cf->background_writer_scheduling_group()); - - return f.then([&cf, sstables = std::move(sstables)] (std::vector new_sstables) mutable { - // an input sstable may belong to shard 1 and 2 and only have data which - // token belongs to shard 1. That means resharding will only create a - // sstable for shard 1, but both shards opened the sstable. So our code - // below should ask both shards to remove the resharded table, or it - // wouldn't be deleted by our deletion manager, and resharding would be - // triggered again in the subsequent boot. - return parallel_for_each(boost::irange(0u, smp::count), [&cf, sstables, new_sstables] (auto shard) { - auto old_sstables_for_shard = sstables_for_shard(sstables, shard); - // nothing to do if no input sstable belongs to this shard. - if (old_sstables_for_shard.empty()) { - return make_ready_future<>(); - } - auto new_sstables_for_shard = sstables_for_shard(new_sstables, shard); - // sanity checks - for (auto& sst : new_sstables_for_shard) { - auto shards = sst->get_shards_for_this_sstable(); - if (shards.size() != 1) { - throw std::runtime_error(sprint("resharded sstable %s doesn't belong to only one shard", sst->get_filename())); - } - if (shards.front() != shard) { - throw std::runtime_error(sprint("resharded sstable %s should belong to shard %d", sst->get_filename(), shard)); - } - } - - if (new_sstables_for_shard.empty()) { - // handles case where sstable needing rewrite doesn't produce any sstable - // for a shard it belongs to when resharded (the reason is explained above). - std::unordered_set ancestors; - boost::range::transform(old_sstables_for_shard, std::inserter(ancestors, ancestors.end()), - std::mem_fn(&sstables::sstable::generation)); - - return smp::submit_to(shard, [cf, ancestors = std::move(ancestors)] () mutable { - cf->remove_ancestors_needed_rewrite(ancestors); - }); - } else { - return forward_sstables_to(shard, new_sstables_for_shard, cf, [cf] (auto sstables) { - cf->replace_ancestors_needed_rewrite(sstables); - }); - } - }); - }).then_wrapped([] (future<> f) { - try { - f.get(); - } catch (sstables::compaction_stop_exception& e) { - dblog.info("resharding was abruptly stopped, reason: {}", e.what()); - } catch (...) { - dblog.error("resharding failed: {}", std::current_exception()); - } - }); - }).get(); - }); - }); -} - -future<> distributed_loader::load_new_sstables(distributed& db, sstring ks, sstring cf, std::vector new_tables) { - return parallel_for_each(new_tables, [&db] (auto comps) { - auto cf_sstable_open = [comps] (column_family& cf, sstables::foreign_sstable_open_info info) { - auto f = cf.open_sstable(std::move(info), cf._config.datadir, comps.generation, comps.version, comps.format); - return f.then([&cf] (sstables::shared_sstable sst) mutable { - if (sst) { - cf._sstables_opened_but_not_loaded.push_back(sst); - } - return make_ready_future<>(); - }); - }; - return distributed_loader::open_sstable(db, comps, cf_sstable_open, service::get_local_compaction_priority()); - }).then([&db, ks, cf] { - return db.invoke_on_all([ks = std::move(ks), cfname = std::move(cf)] (database& db) { - auto& cf = db.find_column_family(ks, cfname); - return with_semaphore(cf._cache_update_sem, 1, [&cf] { - // atomically load all opened sstables into column family. - for (auto& sst : cf._sstables_opened_but_not_loaded) { - cf.load_sstable(sst, true); - } - cf._sstables_opened_but_not_loaded.clear(); - cf.trigger_compaction(); - // Drop entire cache for this column family because it may be populated - // with stale data. - return cf.get_row_cache().invalidate(); - }); - }); - }).then([&db, ks, cf] () mutable { - return smp::submit_to(0, [&db, ks = std::move(ks), cf = std::move(cf)] () mutable { - distributed_loader::reshard(db, std::move(ks), std::move(cf)); - }); - }); -} - -future distributed_loader::probe_file(distributed& db, sstring sstdir, sstring fname) { - using namespace sstables; - - entry_descriptor comps = entry_descriptor::make_descriptor(fname); - - // Every table will have a TOC. Using a specific file as a criteria, as - // opposed to, say verifying _sstables.count() to be zero is more robust - // against parallel loading of the directory contents. - if (comps.component != sstable::component_type::TOC) { - return make_ready_future(std::move(comps)); - } - auto cf_sstable_open = [sstdir, comps, fname] (column_family& cf, sstables::foreign_sstable_open_info info) { - cf.update_sstables_known_generation(comps.generation); - { - auto i = boost::range::find_if(*cf._sstables->all(), [gen = comps.generation] (sstables::shared_sstable sst) { return sst->generation() == gen; }); - if (i != cf._sstables->all()->end()) { - auto new_toc = sstdir + "/" + fname; - throw std::runtime_error(sprint("Attempted to add sstable generation %d twice: new=%s existing=%s", - comps.generation, new_toc, (*i)->toc_filename())); - } - } - return cf.open_sstable(std::move(info), sstdir, comps.generation, comps.version, comps.format).then([&cf] (sstables::shared_sstable sst) mutable { - if (sst) { - cf.load_sstable(sst); - return cf.get_row_cache().invalidate(); - } - return make_ready_future<>(); - }); - }; - - return distributed_loader::open_sstable(db, comps, cf_sstable_open).then_wrapped([fname] (future<> f) { - try { - f.get(); - } catch (malformed_sstable_exception& e) { - dblog.error("malformed sstable {}: {}. Refusing to boot", fname, e.what()); - throw; - } catch(...) { - dblog.error("Unrecognized error while processing {}: {}. Refusing to boot", - fname, std::current_exception()); - throw; - } - return make_ready_future<>(); - }).then([comps] () mutable { - return make_ready_future(std::move(comps)); - }); -} - -future<> distributed_loader::populate_column_family(distributed& db, sstring sstdir, sstring ks, sstring cf) { - // We can catch most errors when we try to load an sstable. But if the TOC - // file is the one missing, we won't try to load the sstable at all. This - // case is still an invalid case, but it is way easier for us to treat it - // by waiting for all files to be loaded, and then checking if we saw a - // file during scan_dir, without its corresponding TOC. - enum class status { - has_some_file, - has_toc_file, - has_temporary_toc_file, - }; - - struct sstable_descriptor { - std::experimental::optional version; - std::experimental::optional format; - }; - - auto verifier = make_lw_shared>(); - auto descriptor = make_lw_shared(); - - return do_with(std::vector>(), [&db, sstdir = std::move(sstdir), verifier, descriptor, ks, cf] (std::vector>& futures) { - return lister::scan_dir(sstdir, { directory_entry_type::regular }, [&db, verifier, descriptor, &futures] (lister::path sstdir, directory_entry de) { - // FIXME: The secondary indexes are in this level, but with a directory type, (starting with ".") - auto f = distributed_loader::probe_file(db, sstdir.native(), de.name).then([verifier, descriptor, sstdir, de] (auto entry) { - if (entry.component == sstables::sstable::component_type::TemporaryStatistics) { - return remove_file(sstables::sstable::filename(sstdir.native(), entry.ks, entry.cf, entry.version, entry.generation, - entry.format, sstables::sstable::component_type::TemporaryStatistics)); - } - - if (verifier->count(entry.generation)) { - if (verifier->at(entry.generation) == status::has_toc_file) { - lister::path file_path(sstdir / de.name.c_str()); - if (entry.component == sstables::sstable::component_type::TOC) { - throw sstables::malformed_sstable_exception("Invalid State encountered. TOC file already processed", file_path.native()); - } else if (entry.component == sstables::sstable::component_type::TemporaryTOC) { - throw sstables::malformed_sstable_exception("Invalid State encountered. Temporary TOC file found after TOC file was processed", file_path.native()); - } - } else if (entry.component == sstables::sstable::component_type::TOC) { - verifier->at(entry.generation) = status::has_toc_file; - } else if (entry.component == sstables::sstable::component_type::TemporaryTOC) { - verifier->at(entry.generation) = status::has_temporary_toc_file; - } - } else { - if (entry.component == sstables::sstable::component_type::TOC) { - verifier->emplace(entry.generation, status::has_toc_file); - } else if (entry.component == sstables::sstable::component_type::TemporaryTOC) { - verifier->emplace(entry.generation, status::has_temporary_toc_file); - } else { - verifier->emplace(entry.generation, status::has_some_file); - } - } - - // Retrieve both version and format used for this column family. - if (!descriptor->version) { - descriptor->version = entry.version; - } - if (!descriptor->format) { - descriptor->format = entry.format; - } - return make_ready_future<>(); - }); - - // push future returned by probe_file into an array of futures, - // so that the supplied callback will not block scan_dir() from - // reading the next entry in the directory. - futures.push_back(std::move(f)); - - return make_ready_future<>(); - }, &column_family::manifest_json_filter).then([&futures] { - return when_all(futures.begin(), futures.end()).then([] (std::vector> ret) { - std::exception_ptr eptr; - - for (auto& f : ret) { - try { - if (eptr) { - f.ignore_ready_future(); - } else { - f.get(); - } - } catch(...) { - eptr = std::current_exception(); - } - } - - if (eptr) { - return make_exception_future<>(eptr); - } - return make_ready_future<>(); - }); - }).then([verifier, sstdir, descriptor, ks = std::move(ks), cf = std::move(cf)] { - return do_for_each(*verifier, [sstdir = std::move(sstdir), ks = std::move(ks), cf = std::move(cf), descriptor, verifier] (auto v) { - if (v.second == status::has_temporary_toc_file) { - unsigned long gen = v.first; - assert(descriptor->version); - sstables::sstable::version_types version = descriptor->version.value(); - assert(descriptor->format); - sstables::sstable::format_types format = descriptor->format.value(); - - if (engine().cpu_id() != 0) { - dblog.debug("At directory: {}, partial SSTable with generation {} not relevant for this shard, ignoring", sstdir, v.first); - return make_ready_future<>(); - } - // shard 0 is the responsible for removing a partial sstable. - return sstables::sstable::remove_sstable_with_temp_toc(ks, cf, sstdir, gen, version, format); - } else if (v.second != status::has_toc_file) { - throw sstables::malformed_sstable_exception(sprint("At directory: %s: no TOC found for SSTable with generation %d!. Refusing to boot", sstdir, v.first)); - } - return make_ready_future<>(); - }); - }); - }).then([&db, ks, cf] { - return db.invoke_on_all([ks = std::move(ks), cfname = std::move(cf)] (database& db) { - auto& cf = db.find_column_family(ks, cfname); - // Make sure this is called even if CF is empty - cf.mark_ready_for_writes(); - }); - }); - -} - -inline -flush_cpu_controller -make_flush_cpu_controller(db::config& cfg, seastar::thread_scheduling_group* backup, std::function fn) { - if (cfg.auto_adjust_flush_quota()) { - return flush_cpu_controller(250ms, cfg.virtual_dirty_soft_limit(), std::move(fn)); - } - return flush_cpu_controller(flush_cpu_controller::disabled{backup}); -} - -utils::UUID database::empty_version = utils::UUID_gen::get_name_UUID(bytes{}); - -database::database() : database(db::config()) -{} - -database::database(const db::config& cfg) - : _stats(make_lw_shared()) - , _cl_stats(std::make_unique()) - , _cfg(std::make_unique(cfg)) - // Allow system tables a pool of 10 MB memory to write, but never block on other regions. - , _system_dirty_memory_manager(*this, 10 << 20, cfg.virtual_dirty_soft_limit()) - , _dirty_memory_manager(*this, memory::stats().total_memory() * 0.45, cfg.virtual_dirty_soft_limit()) - , _streaming_dirty_memory_manager(*this, memory::stats().total_memory() * 0.10, cfg.virtual_dirty_soft_limit()) - , _background_writer_scheduling_group(1ms, _cfg->background_writer_scheduling_quota()) - , _memtable_cpu_controller(make_flush_cpu_controller(*_cfg, &_background_writer_scheduling_group, [this, limit = 2.0f * _dirty_memory_manager.throttle_threshold()] { - return (_dirty_memory_manager.virtual_dirty_memory()) / limit; - })) - , _version(empty_version) - , _enable_incremental_backups(cfg.incremental_backups()) -{ - _compaction_manager.start(); - setup_metrics(); - - dblog.info("Row: max_vector_size: {}, internal_count: {}", size_t(row::max_vector_size), size_t(row::internal_count)); -} - -void flush_cpu_controller::adjust() { - auto mid = _goal + (hard_dirty_limit - _goal) / 2; - - auto dirty = _current_dirty(); - if (dirty < _goal) { - _current_quota = dirty * q1 / _goal; - } else if ((dirty >= _goal) && (dirty < mid)) { - _current_quota = q1 + (dirty - _goal) * (q2 - q1)/(mid - _goal); - } else { - _current_quota = q2 + (dirty - mid) * (qmax - q2) / (hard_dirty_limit - mid); - } - - dblog.trace("dirty {}, goal {}, mid {} quota {}", dirty, _goal, mid, _current_quota); - _scheduling_group.update_usage(_current_quota); -} - -flush_cpu_controller::flush_cpu_controller(std::chrono::milliseconds interval, float soft_limit, std::function current_dirty) - : _goal(soft_limit / 2) - , _current_dirty(std::move(current_dirty)) - , _interval(interval) - , _update_timer([this] { adjust(); }) - , _scheduling_group(1ms, 0.0f) - , _current_scheduling_group(&_scheduling_group) -{ - _update_timer.arm_periodic(_interval); -} - -void -dirty_memory_manager::setup_collectd(sstring namestr) { - namespace sm = seastar::metrics; - - _metrics.add_group("memory", { - sm::make_gauge(namestr + "_dirty_bytes", [this] { return real_dirty_memory(); }, - sm::description("Holds the current size of a all non-free memory in bytes: used memory + released memory that hasn't been returned to a free memory pool yet. " - "Total memory size minus this value represents the amount of available memory. " - "If this value minus virtual_dirty_bytes is too high then this means that the dirty memory eviction lags behind.")), - - sm::make_gauge(namestr +"_virtual_dirty_bytes", [this] { return virtual_dirty_memory(); }, - sm::description("Holds the size of used memory in bytes. Compare it to \"dirty_bytes\" to see how many memory is wasted (neither used nor available).")), - }); -} - -void -database::setup_metrics() { - _dirty_memory_manager.setup_collectd("regular"); - _system_dirty_memory_manager.setup_collectd("system"); - _streaming_dirty_memory_manager.setup_collectd("streaming"); - - namespace sm = seastar::metrics; - - _metrics.add_group("memory", { - sm::make_gauge("dirty_bytes", [this] { return _dirty_memory_manager.real_dirty_memory() + _system_dirty_memory_manager.real_dirty_memory() + _streaming_dirty_memory_manager.real_dirty_memory(); }, - sm::description("Holds the current size of all (\"regular\", \"system\" and \"streaming\") non-free memory in bytes: used memory + released memory that hasn't been returned to a free memory pool yet. " - "Total memory size minus this value represents the amount of available memory. " - "If this value minus virtual_dirty_bytes is too high then this means that the dirty memory eviction lags behind.")), - - sm::make_gauge("virtual_dirty_bytes", [this] { return _dirty_memory_manager.virtual_dirty_memory() + _system_dirty_memory_manager.virtual_dirty_memory() + _streaming_dirty_memory_manager.virtual_dirty_memory(); }, - sm::description("Holds the size of all (\"regular\", \"system\" and \"streaming\") used memory in bytes. Compare it to \"dirty_bytes\" to see how many memory is wasted (neither used nor available).")), - }); - - _metrics.add_group("memtables", { - sm::make_gauge("pending_flushes", _cf_stats.pending_memtables_flushes_count, - sm::description("Holds the current number of memtables that are currently being flushed to sstables. " - "High value in this mertic may be an indication of storage being a bottleneck.")), - - sm::make_gauge("pending_flushes_bytes", _cf_stats.pending_memtables_flushes_bytes, - sm::description("Holds the current number of bytes in memtables that are currently being flushed to sstables. " - "High value in this mertic may be an indication of storage being a bottleneck.")), - }); - - _metrics.add_group("database", { - sm::make_gauge("requests_blocked_memory_current", [this] { return _dirty_memory_manager.region_group().blocked_requests(); }, - sm::description( - seastar::format("Holds the current number of requests blocked due to reaching the memory quota ({}B). " - "Non-zero value indicates that our bottleneck is memory and more specifically - the memory quota allocated for the \"database\" component.", _dirty_memory_manager.throttle_threshold()))), - - sm::make_derive("requests_blocked_memory", [this] { return _dirty_memory_manager.region_group().blocked_requests_counter(); }, - sm::description(seastar::format("Holds the current number of requests blocked due to reaching the memory quota ({}B). " - "Non-zero value indicates that our bottleneck is memory and more specifically - the memory quota allocated for the \"database\" component.", _dirty_memory_manager.throttle_threshold()))), - - sm::make_derive("clustering_filter_count", _cf_stats.clustering_filter_count, - sm::description("Counts bloom filter invocations.")), - - sm::make_derive("clustering_filter_sstables_checked", _cf_stats.sstables_checked_by_clustering_filter, - sm::description("Counts sstables checked after applying the bloom filter. " - "High value indicates that bloom filter is not very efficient.")), - - sm::make_derive("clustering_filter_fast_path_count", _cf_stats.clustering_filter_fast_path_count, - sm::description("Counts number of times bloom filtering short cut to include all sstables when only one full range was specified.")), - - sm::make_derive("clustering_filter_surviving_sstables", _cf_stats.surviving_sstables_after_clustering_filter, - sm::description("Counts sstables that survived the clustering key filtering. " - "High value indicates that bloom filter is not very efficient and still have to access a lot of sstables to get data.")), - - sm::make_derive("total_writes", _stats->total_writes, - sm::description("Counts the total number of successful write operations performed by this shard.")), - - sm::make_derive("total_writes_failed", _stats->total_writes_failed, - sm::description("Counts the total number of failed write operations. " - "A sum of this value plus total_writes represents a total amount of writes attempted on this shard.")), - - sm::make_derive("total_writes_timedout", _stats->total_writes_timedout, - sm::description("Counts write operations failed due to a timeout. None zero value is a sign of storage being overloaded.")), - - sm::make_derive("total_reads", _stats->total_reads, - sm::description("Counts the total number of successful reads on this shard.")), - - sm::make_derive("total_reads_failed", _stats->total_reads_failed, - sm::description("Counts the total number of failed read operations. " - "Add the total_reads to this value to get the total amount of reads issued on this shard.")), - - sm::make_derive("sstable_read_queue_overloads", _stats->sstable_read_queue_overloaded, - sm::description("Counts the number of times the sstable read queue was overloaded. " - "A non-zero value indicates that we have to drop read requests because they arrive faster than we can serve them.")), - - sm::make_gauge("active_reads", [this] { return max_concurrent_reads() - _read_concurrency_sem.current(); }, - sm::description(seastar::format("Holds the number of currently active read operations. " - "If this vlaue gets close to {} we are likely to start dropping new read requests. " - "In that case sstable_read_queue_overloads is going to get a non-zero value.", max_concurrent_reads()))), - - sm::make_gauge("queued_reads", [this] { return _read_concurrency_sem.waiters(); }, - sm::description("Holds the number of currently queued read operations.")), - - sm::make_gauge("active_reads_streaming", [this] { return max_streaming_concurrent_reads() - _streaming_concurrency_sem.current(); }, - sm::description(seastar::format("Holds the number of currently active read operations issued on behalf of streaming " - "If this value gets close to {} we are likely to start dropping new read requests. " - "In that case sstable_read_queue_overloads is going to get a non-zero value.", max_streaming_concurrent_reads()))), - - sm::make_gauge("queued_reads_streaming", [this] { return _streaming_concurrency_sem.waiters(); }, - sm::description("Holds the number of currently queued read operations on behalf of streaming.")), - - sm::make_gauge("active_reads_system_keyspace", [this] { return max_system_concurrent_reads() - _system_read_concurrency_sem.current(); }, - sm::description(seastar::format("Holds the number of currently active read operations from \"system\" keyspace tables. " - "If this vlaue gets close to {} we are likely to start dropping new read requests. " - "In that case sstable_read_queue_overloads is going to get a non-zero value.", max_system_concurrent_reads()))), - - sm::make_gauge("queued_reads_system_keyspace", [this] { return _system_read_concurrency_sem.waiters(); }, - sm::description("Holds the number of currently queued read operations from \"system\" keyspace tables.")), - - sm::make_gauge("total_result_bytes", [this] { return get_result_memory_limiter().total_used_memory(); }, - sm::description("Holds the current amount of memory used for results.")), - - sm::make_gauge("cpu_flush_quota", [this] { return _memtable_cpu_controller.current_quota(); }, - sm::description("The current quota for memtable CPU scheduling group")), - - sm::make_derive("short_data_queries", _stats->short_data_queries, - sm::description("The rate of data queries (data or digest reads) that returned less rows than requested due to result size limiting.")), - - sm::make_derive("short_mutation_queries", _stats->short_mutation_queries, - sm::description("The rate of mutation queries that returned less rows than requested due to result size limiting.")), - - sm::make_total_operations("counter_cell_lock_acquisition", _cl_stats->lock_acquisitions, - sm::description("The number of acquired counter cell locks.")), - - sm::make_queue_length("counter_cell_lock_pending", _cl_stats->operations_waiting_for_lock, - sm::description("The number of counter updates waiting for a lock.")), - }); -} - -database::~database() { -} - -void database::update_version(const utils::UUID& version) { - _version = version; -} - -const utils::UUID& database::get_version() const { - return _version; -} - -future<> distributed_loader::populate_keyspace(distributed& db, sstring datadir, sstring ks_name) { - auto ksdir = datadir + "/" + ks_name; - auto& keyspaces = db.local().get_keyspaces(); - auto i = keyspaces.find(ks_name); - if (i == keyspaces.end()) { - dblog.warn("Skipping undefined keyspace: {}", ks_name); - return make_ready_future<>(); - } else { - dblog.info("Populating Keyspace {}", ks_name); - auto& ks = i->second; - auto& column_families = db.local().get_column_families(); - - return parallel_for_each(ks.metadata()->cf_meta_data() | boost::adaptors::map_values, - [ks_name, &ks, &column_families, &db] (schema_ptr s) { - utils::UUID uuid = s->id(); - lw_shared_ptr cf = column_families[uuid]; - sstring cfname = cf->schema()->cf_name(); - auto sstdir = ks.column_family_directory(cfname, uuid); - dblog.info("Keyspace {}: Reading CF {} ", ks_name, cfname); - return ks.make_directory_for_column_family(cfname, uuid).then([&db, sstdir, uuid, ks_name, cfname] { - return distributed_loader::populate_column_family(db, sstdir, ks_name, cfname); - }).handle_exception([ks_name, cfname, sstdir](std::exception_ptr eptr) { - std::string msg = - sprint("Exception while populating keyspace '%s' with column family '%s' from file '%s': %s", - ks_name, cfname, sstdir, eptr); - dblog.error("Exception while populating keyspace '{}' with column family '{}' from file '{}': {}", - ks_name, cfname, sstdir, eptr); - throw std::runtime_error(msg.c_str()); - }); - }); - } -} - -static future<> populate(distributed& db, sstring datadir) { - return lister::scan_dir(datadir, { directory_entry_type::directory }, [&db] (lister::path datadir, directory_entry de) { - auto& ks_name = de.name; - if (is_system_keyspace(ks_name)) { - return make_ready_future<>(); - } - return distributed_loader::populate_keyspace(db, datadir.native(), ks_name); - }); -} - -template -static future<> -do_parse_schema_tables(distributed& proxy, const sstring& _cf_name, Func&& func) { - using namespace db::schema_tables; - static_assert(std::is_same, std::result_of_t>::value, - "bad Func signature"); - - - auto cf_name = make_lw_shared(_cf_name); - return db::system_keyspace::query(proxy, db::schema_tables::NAME, *cf_name).then([] (auto rs) { - auto names = std::set(); - for (auto& r : rs->rows()) { - auto keyspace_name = r.template get_nonnull("keyspace_name"); - names.emplace(keyspace_name); - } - return std::move(names); - }).then([&proxy, cf_name, func = std::forward(func)] (std::set&& names) mutable { - return parallel_for_each(names.begin(), names.end(), [&proxy, cf_name, func = std::forward(func)] (sstring name) mutable { - if (is_system_keyspace(name)) { - return make_ready_future<>(); - } - - return read_schema_partition_for_keyspace(proxy, *cf_name, name).then([func, cf_name] (auto&& v) mutable { - return do_with(std::move(v), [func = std::forward(func), cf_name] (auto& v) { - return func(v).then_wrapped([cf_name, &v] (future<> f) { - try { - f.get(); - } catch (std::exception& e) { - dblog.error("Skipping: {}. Exception occurred when loading system table {}: {}", v.first, *cf_name, e.what()); - } - }); - }); - }); - }); - }); -} - -future<> database::parse_system_tables(distributed& proxy) { - using namespace db::schema_tables; - return do_parse_schema_tables(proxy, db::schema_tables::KEYSPACES, [this] (schema_result_value_type &v) { - auto ksm = create_keyspace_from_schema_partition(v); - return create_keyspace(ksm); - }).then([&proxy, this] { - return do_parse_schema_tables(proxy, db::schema_tables::TYPES, [this, &proxy] (schema_result_value_type &v) { - auto&& user_types = create_types_from_schema_partition(v); - auto& ks = this->find_keyspace(v.first); - for (auto&& type : user_types) { - ks.add_user_type(type); - } - return make_ready_future<>(); - }); - }).then([&proxy, this] { - return do_parse_schema_tables(proxy, db::schema_tables::VIEWS, [this, &proxy] (schema_result_value_type &v) { - return create_views_from_schema_partition(proxy, v.second).then([this] (std::vector views) { - return parallel_for_each(views.begin(), views.end(), [this] (auto&& v) { - return this->add_column_family_and_make_directory(v); - }); - }); - }); - }).then([&proxy, this] { - return do_parse_schema_tables(proxy, db::schema_tables::TABLES, [this, &proxy] (schema_result_value_type &v) { - return create_tables_from_tables_partition(proxy, v.second).then([this] (std::map tables) { - return parallel_for_each(tables.begin(), tables.end(), [this] (auto& t) { - return this->add_column_family_and_make_directory(t.second); - }); - }); - }); - }); -} - -future<> distributed_loader::init_system_keyspace(distributed& db) { - return seastar::async([&db] { - // We need to init commitlog on shard0 before it is inited on other shards - // because it obtains the list of pre-existing segments for replay, which must - // not include reserve segments created by active commitlogs. - db.invoke_on(0, [] (database& db) { - return db.init_commitlog(); - }).get(); - db.invoke_on_all([] (database& db) { - if (engine().cpu_id() == 0) { - return make_ready_future<>(); - } - return db.init_commitlog(); - }).get(); - - db.invoke_on_all([] (database& db) { - auto& cfg = db.get_config(); - bool durable = cfg.data_file_directories().size() > 0; - db::system_keyspace::make(db, durable, cfg.volatile_system_keyspace_for_testing()); - }).get(); - - // FIXME support multiple directories - const auto& cfg = db.local().get_config(); - auto data_dir = cfg.data_file_directories()[0]; - - for (auto ksname : system_keyspaces) { - io_check(touch_directory, data_dir + "/" + ksname).get(); - distributed_loader::populate_keyspace(db, data_dir, ksname).get(); - - db.invoke_on_all([ksname] (database& db) { - auto& ks = db.find_keyspace(ksname); - for (auto& pair : ks.metadata()->cf_meta_data()) { - auto cfm = pair.second; - auto& cf = db.find_column_family(cfm); - cf.mark_ready_for_writes(); - } - return make_ready_future<>(); - }).get(); - } - }); -} - -future<> distributed_loader::ensure_system_table_directories(distributed& db) { - return parallel_for_each(system_keyspaces, [&db](sstring ksname) { - auto& ks = db.local().find_keyspace(ksname); - return parallel_for_each(ks.metadata()->cf_meta_data(), [&ks] (auto& pair) { - auto cfm = pair.second; - return ks.make_directory_for_column_family(cfm->cf_name(), cfm->id()); - }); - }); -} - -future<> distributed_loader::init_non_system_keyspaces(distributed& db, distributed& proxy) { - return seastar::async([&db, &proxy] { - db.invoke_on_all([&proxy] (database& db) { - return db.parse_system_tables(proxy); - }).get(); - - const auto& cfg = db.local().get_config(); - populate(db, cfg.data_file_directories()[0]).get(); - }); -} - -future<> -database::init_commitlog() { - return db::commitlog::create_commitlog(*_cfg).then([this](db::commitlog&& log) { - _commitlog = std::make_unique(std::move(log)); - _commitlog->add_flush_handler([this](db::cf_id_type id, db::replay_position pos) { - if (_column_families.count(id) == 0) { - // the CF has been removed. - _commitlog->discard_completed_segments(id); - return; - } - _column_families[id]->flush(); - }).release(); // we have longer life time than CL. Ignore reg anchor - }); -} - -unsigned -database::shard_of(const dht::token& t) { - return dht::shard_of(t); -} - -unsigned -database::shard_of(const mutation& m) { - return shard_of(m.token()); -} - -unsigned -database::shard_of(const frozen_mutation& m) { - // FIXME: This lookup wouldn't be necessary if we - // sent the partition key in legacy form or together - // with token. - schema_ptr schema = find_schema(m.column_family_id()); - return shard_of(dht::global_partitioner().get_token(*schema, m.key(*schema))); -} - -void database::add_keyspace(sstring name, keyspace k) { - if (_keyspaces.count(name) != 0) { - throw std::invalid_argument("Keyspace " + name + " already exists"); - } - _keyspaces.emplace(std::move(name), std::move(k)); -} - -future<> database::update_keyspace(const sstring& name) { - auto& proxy = service::get_storage_proxy(); - return db::schema_tables::read_schema_partition_for_keyspace(proxy, db::schema_tables::KEYSPACES, name).then([this, name](db::schema_tables::schema_result_value_type&& v) { - auto& ks = find_keyspace(name); - - auto tmp_ksm = db::schema_tables::create_keyspace_from_schema_partition(v); - auto new_ksm = ::make_lw_shared(tmp_ksm->name(), tmp_ksm->strategy_name(), tmp_ksm->strategy_options(), tmp_ksm->durable_writes(), - boost::copy_range>(ks.metadata()->cf_meta_data() | boost::adaptors::map_values), ks.metadata()->user_types()); - ks.update_from(std::move(new_ksm)); - return service::get_local_migration_manager().notify_update_keyspace(ks.metadata()); - }); -} - -void database::drop_keyspace(const sstring& name) { - _keyspaces.erase(name); -} - -void database::add_column_family(keyspace& ks, schema_ptr schema, column_family::config cfg) { - schema = local_schema_registry().learn(schema); - schema->registry_entry()->mark_synced(); - - lw_shared_ptr cf; - if (cfg.enable_commitlog && _commitlog) { - cf = make_lw_shared(schema, std::move(cfg), *_commitlog, _compaction_manager, *_cl_stats); - } else { - cf = make_lw_shared(schema, std::move(cfg), column_family::no_commitlog(), _compaction_manager, *_cl_stats); - } - - auto uuid = schema->id(); - if (_column_families.count(uuid) != 0) { - throw std::invalid_argument("UUID " + uuid.to_sstring() + " already mapped"); - } - auto kscf = std::make_pair(schema->ks_name(), schema->cf_name()); - if (_ks_cf_to_uuid.count(kscf) != 0) { - throw std::invalid_argument("Column family " + schema->cf_name() + " exists"); - } - ks.add_or_update_column_family(schema); - cf->start(); - _column_families.emplace(uuid, std::move(cf)); - _ks_cf_to_uuid.emplace(std::move(kscf), uuid); - if (schema->is_view()) { - find_column_family(schema->view_info()->base_id()).add_or_update_view(view_ptr(schema)); - } -} - -future<> database::add_column_family_and_make_directory(schema_ptr schema) { - auto& ks = find_keyspace(schema->ks_name()); - add_column_family(ks, schema, ks.make_column_family_config(*schema, get_config())); - return ks.make_directory_for_column_family(schema->cf_name(), schema->id()); -} - -bool database::update_column_family(schema_ptr new_schema) { - column_family& cfm = find_column_family(new_schema->id()); - bool columns_changed = !cfm.schema()->equal_columns(*new_schema); - auto s = local_schema_registry().learn(new_schema); - s->registry_entry()->mark_synced(); - cfm.set_schema(s); - find_keyspace(s->ks_name()).metadata()->add_or_update_column_family(s); - if (s->is_view()) { - try { - find_column_family(s->view_info()->base_id()).add_or_update_view(view_ptr(s)); - } catch (no_such_column_family&) { - // Update view mutations received after base table drop. - } - } - return columns_changed; -} - -void database::remove(const column_family& cf) { - auto s = cf.schema(); - auto& ks = find_keyspace(s->ks_name()); - _column_families.erase(s->id()); - ks.metadata()->remove_column_family(s); - _ks_cf_to_uuid.erase(std::make_pair(s->ks_name(), s->cf_name())); - if (s->is_view()) { - try { - find_column_family(s->view_info()->base_id()).remove_view(view_ptr(s)); - } catch (no_such_column_family&) { - // Drop view mutations received after base table drop. - } - } -} - -future<> database::drop_column_family(const sstring& ks_name, const sstring& cf_name, timestamp_func tsf, bool snapshot) { - auto uuid = find_uuid(ks_name, cf_name); - auto cf = _column_families.at(uuid); - remove(*cf); - auto& ks = find_keyspace(ks_name); - return truncate(ks, *cf, std::move(tsf), snapshot).then([this, cf] { - return cf->stop(); - }).then([this, cf] { - return make_ready_future<>(); - }); -} - -const utils::UUID& database::find_uuid(const sstring& ks, const sstring& cf) const { - try { - return _ks_cf_to_uuid.at(std::make_pair(ks, cf)); - } catch (...) { - throw std::out_of_range(""); - } -} - -const utils::UUID& database::find_uuid(const schema_ptr& schema) const { - return find_uuid(schema->ks_name(), schema->cf_name()); -} - -keyspace& database::find_keyspace(const sstring& name) { - try { - return _keyspaces.at(name); - } catch (...) { - std::throw_with_nested(no_such_keyspace(name)); - } -} - -const keyspace& database::find_keyspace(const sstring& name) const { - try { - return _keyspaces.at(name); - } catch (...) { - std::throw_with_nested(no_such_keyspace(name)); - } -} - -bool database::has_keyspace(const sstring& name) const { - return _keyspaces.count(name) != 0; -} - -std::vector database::get_non_system_keyspaces() const { - std::vector res; - for (auto const &i : _keyspaces) { - if (!is_system_keyspace(i.first)) { - res.push_back(i.first); - } - } - return res; -} - -std::vector> database::get_non_system_column_families() const { - return boost::copy_range>>( - get_column_families() - | boost::adaptors::map_values - | boost::adaptors::filtered([](const lw_shared_ptr& cf) { - return !is_system_keyspace(cf->schema()->ks_name()); - })); -} - -column_family& database::find_column_family(const sstring& ks_name, const sstring& cf_name) { - try { - return find_column_family(find_uuid(ks_name, cf_name)); - } catch (...) { - std::throw_with_nested(no_such_column_family(ks_name, cf_name)); - } -} - -const column_family& database::find_column_family(const sstring& ks_name, const sstring& cf_name) const { - try { - return find_column_family(find_uuid(ks_name, cf_name)); - } catch (...) { - std::throw_with_nested(no_such_column_family(ks_name, cf_name)); - } -} - -column_family& database::find_column_family(const utils::UUID& uuid) { - try { - return *_column_families.at(uuid); - } catch (...) { - std::throw_with_nested(no_such_column_family(uuid)); - } -} - -const column_family& database::find_column_family(const utils::UUID& uuid) const { - try { - return *_column_families.at(uuid); - } catch (...) { - std::throw_with_nested(no_such_column_family(uuid)); - } -} - -bool database::column_family_exists(const utils::UUID& uuid) const { - return _column_families.count(uuid); -} - -void -keyspace::create_replication_strategy(const std::map& options) { - using namespace locator; - - auto& ss = service::get_local_storage_service(); - _replication_strategy = - abstract_replication_strategy::create_replication_strategy( - _metadata->name(), _metadata->strategy_name(), - ss.get_token_metadata(), options); -} - -locator::abstract_replication_strategy& -keyspace::get_replication_strategy() { - return *_replication_strategy; -} - - -const locator::abstract_replication_strategy& -keyspace::get_replication_strategy() const { - return *_replication_strategy; -} - -void -keyspace::set_replication_strategy(std::unique_ptr replication_strategy) { - _replication_strategy = std::move(replication_strategy); -} - -void keyspace::update_from(::lw_shared_ptr ksm) { - _metadata = std::move(ksm); - create_replication_strategy(_metadata->strategy_options()); -} - -column_family::config -keyspace::make_column_family_config(const schema& s, const db::config& db_config) const { - column_family::config cfg; - cfg.datadir = column_family_directory(s.cf_name(), s.id()); - cfg.enable_disk_reads = _config.enable_disk_reads; - cfg.enable_disk_writes = _config.enable_disk_writes; - cfg.enable_commitlog = _config.enable_commitlog; - cfg.enable_cache = _config.enable_cache; - cfg.dirty_memory_manager = _config.dirty_memory_manager; - cfg.streaming_dirty_memory_manager = _config.streaming_dirty_memory_manager; - cfg.read_concurrency_config = _config.read_concurrency_config; - cfg.streaming_read_concurrency_config = _config.streaming_read_concurrency_config; - cfg.cf_stats = _config.cf_stats; - cfg.enable_incremental_backups = _config.enable_incremental_backups; - cfg.background_writer_scheduling_group = _config.background_writer_scheduling_group; - cfg.memtable_scheduling_group = _config.memtable_scheduling_group; - - return cfg; -} - -sstring -keyspace::column_family_directory(const sstring& name, utils::UUID uuid) const { - auto uuid_sstring = uuid.to_sstring(); - boost::erase_all(uuid_sstring, "-"); - return sprint("%s/%s-%s", _config.datadir, name, uuid_sstring); -} - -future<> -keyspace::make_directory_for_column_family(const sstring& name, utils::UUID uuid) { - auto cfdir = column_family_directory(name, uuid); - return seastar::async([cfdir = std::move(cfdir)] { - io_check(touch_directory, cfdir).get(); - io_check(touch_directory, cfdir + "/upload").get(); - }); -} - -no_such_keyspace::no_such_keyspace(const sstring& ks_name) - : runtime_error{sprint("Can't find a keyspace %s", ks_name)} -{ -} - -no_such_column_family::no_such_column_family(const utils::UUID& uuid) - : runtime_error{sprint("Can't find a column family with UUID %s", uuid)} -{ -} - -no_such_column_family::no_such_column_family(const sstring& ks_name, const sstring& cf_name) - : runtime_error{sprint("Can't find a column family %s in keyspace %s", cf_name, ks_name)} -{ -} - -column_family& database::find_column_family(const schema_ptr& schema) { - return find_column_family(schema->id()); -} - -const column_family& database::find_column_family(const schema_ptr& schema) const { - return find_column_family(schema->id()); -} - -void keyspace_metadata::validate() const { - using namespace locator; - - auto& ss = service::get_local_storage_service(); - abstract_replication_strategy::validate_replication_strategy(name(), strategy_name(), ss.get_token_metadata(), strategy_options()); -} - -std::vector keyspace_metadata::tables() const { - return boost::copy_range>(_cf_meta_data - | boost::adaptors::map_values - | boost::adaptors::filtered([] (auto&& s) { return !s->is_view(); })); -} - -std::vector keyspace_metadata::views() const { - return boost::copy_range>(_cf_meta_data - | boost::adaptors::map_values - | boost::adaptors::filtered(std::mem_fn(&schema::is_view)) - | boost::adaptors::transformed([] (auto&& s) { return view_ptr(s); })); -} - -schema_ptr database::find_schema(const sstring& ks_name, const sstring& cf_name) const { - try { - return find_schema(find_uuid(ks_name, cf_name)); - } catch (std::out_of_range&) { - std::throw_with_nested(no_such_column_family(ks_name, cf_name)); - } -} - -schema_ptr database::find_schema(const utils::UUID& uuid) const { - return find_column_family(uuid).schema(); -} - -bool database::has_schema(const sstring& ks_name, const sstring& cf_name) const { - return _ks_cf_to_uuid.count(std::make_pair(ks_name, cf_name)) > 0; -} - - -void database::create_in_memory_keyspace(const lw_shared_ptr& ksm) { - keyspace ks(ksm, std::move(make_keyspace_config(*ksm))); - ks.create_replication_strategy(ksm->strategy_options()); - _keyspaces.emplace(ksm->name(), std::move(ks)); -} - -future<> -database::create_keyspace(const lw_shared_ptr& ksm) { - auto i = _keyspaces.find(ksm->name()); - if (i != _keyspaces.end()) { - return make_ready_future<>(); - } - - create_in_memory_keyspace(ksm); - auto& datadir = _keyspaces.at(ksm->name()).datadir(); - if (datadir != "") { - return io_check(touch_directory, datadir); - } else { - return make_ready_future<>(); - } -} - -std::set -database::existing_index_names(const sstring& ks_name, const sstring& cf_to_exclude) const { - std::set names; - for (auto& schema : find_keyspace(ks_name).metadata()->tables()) { - if (!cf_to_exclude.empty() && schema->cf_name() == cf_to_exclude) { - continue; - } - for (const auto& index_name : schema->index_names()) { - names.emplace(index_name); - } - } - return names; -} - -// Based on: -// - org.apache.cassandra.db.AbstractCell#reconcile() -// - org.apache.cassandra.db.BufferExpiringCell#reconcile() -// - org.apache.cassandra.db.BufferDeletedCell#reconcile() -int -compare_atomic_cell_for_merge(atomic_cell_view left, atomic_cell_view right) { - if (left.timestamp() != right.timestamp()) { - return left.timestamp() > right.timestamp() ? 1 : -1; - } - if (left.is_live() != right.is_live()) { - return left.is_live() ? -1 : 1; - } - if (left.is_live()) { - auto c = compare_unsigned(left.value(), right.value()); - if (c != 0) { - return c; - } - if (left.is_live_and_has_ttl() != right.is_live_and_has_ttl()) { - // prefer expiring cells. - return left.is_live_and_has_ttl() ? 1 : -1; - } - if (left.is_live_and_has_ttl() && left.expiry() != right.expiry()) { - return left.expiry() < right.expiry() ? -1 : 1; - } - } else { - // Both are deleted - if (left.deletion_time() != right.deletion_time()) { - // Origin compares big-endian serialized deletion time. That's because it - // delegates to AbstractCell.reconcile() which compares values after - // comparing timestamps, which in case of deleted cells will hold - // serialized expiry. - return (uint32_t) left.deletion_time().time_since_epoch().count() - < (uint32_t) right.deletion_time().time_since_epoch().count() ? -1 : 1; - } - } - return 0; -} - -struct query_state { - explicit query_state(schema_ptr s, - const query::read_command& cmd, - query::result_request request, - const dht::partition_range_vector& ranges, - query::result_memory_accounter memory_accounter = { }) - : schema(std::move(s)) - , cmd(cmd) - , builder(cmd.slice, request, std::move(memory_accounter)) - , limit(cmd.row_limit) - , partition_limit(cmd.partition_limit) - , current_partition_range(ranges.begin()) - , range_end(ranges.end()){ - } - schema_ptr schema; - const query::read_command& cmd; - query::result::builder builder; - uint32_t limit; - uint32_t partition_limit; - bool range_empty = false; // Avoid ubsan false-positive when moving after construction - dht::partition_range_vector::const_iterator current_partition_range; - dht::partition_range_vector::const_iterator range_end; - mutation_reader reader; - uint32_t remaining_rows() const { - return limit - builder.row_count(); - } - uint32_t remaining_partitions() const { - return partition_limit - builder.partition_count(); - } - bool done() const { - return !remaining_rows() || !remaining_partitions() || current_partition_range == range_end || builder.is_short_read(); - } -}; - -future> -column_family::query(schema_ptr s, const query::read_command& cmd, query::result_request request, - const dht::partition_range_vector& partition_ranges, - tracing::trace_state_ptr trace_state, query::result_memory_limiter& memory_limiter, - uint64_t max_size) { - utils::latency_counter lc; - _stats.reads.set_latency(lc); - auto f = request == query::result_request::only_digest - ? memory_limiter.new_digest_read(max_size) : memory_limiter.new_data_read(max_size); - return f.then([this, lc, s = std::move(s), &cmd, request, &partition_ranges, trace_state = std::move(trace_state)] (query::result_memory_accounter accounter) mutable { - auto qs_ptr = std::make_unique(std::move(s), cmd, request, partition_ranges, std::move(accounter)); - auto& qs = *qs_ptr; - return do_until(std::bind(&query_state::done, &qs), [this, &qs, trace_state = std::move(trace_state)] { - auto&& range = *qs.current_partition_range++; - return data_query(qs.schema, as_mutation_source(), range, qs.cmd.slice, qs.remaining_rows(), - qs.remaining_partitions(), qs.cmd.timestamp, qs.builder, trace_state); - }).then([qs_ptr = std::move(qs_ptr), &qs] { - return make_ready_future>( - make_lw_shared(qs.builder.build())); - }).finally([lc, this]() mutable { - _stats.reads.mark(lc); - if (lc.is_start()) { - _stats.estimated_read.add(lc.latency(), _stats.reads.hist.count); - } - }); - }); -} - -mutation_source -column_family::as_mutation_source() const { - return mutation_source([this] (schema_ptr s, - const dht::partition_range& range, - const query::partition_slice& slice, - const io_priority_class& pc, - tracing::trace_state_ptr trace_state, - streamed_mutation::forwarding fwd, - mutation_reader::forwarding fwd_mr) { - return this->make_reader(std::move(s), range, slice, pc, std::move(trace_state), fwd, fwd_mr); - }); -} - -static thread_local auto data_query_stage = seastar::make_execution_stage("data_query", &column_family::query); - -future, cache_temperature> -database::query(schema_ptr s, const query::read_command& cmd, query::result_request request, const dht::partition_range_vector& ranges, tracing::trace_state_ptr trace_state, - uint64_t max_result_size) { - column_family& cf = find_column_family(cmd.cf_id); - return data_query_stage(&cf, std::move(s), seastar::cref(cmd), request, seastar::cref(ranges), - std::move(trace_state), seastar::ref(get_result_memory_limiter()), - max_result_size).then_wrapped([this, s = _stats, hit_rate = cf.get_global_cache_hit_rate()] (auto f) { - if (f.failed()) { - ++s->total_reads_failed; - return make_exception_future, cache_temperature>(f.get_exception()); - } else { - ++s->total_reads; - auto result = f.get0(); - s->short_data_queries += bool(result->is_short_read()); - return make_ready_future, cache_temperature>(std::move(result), hit_rate); - } - }); -} - -future -database::query_mutations(schema_ptr s, const query::read_command& cmd, const dht::partition_range& range, - query::result_memory_accounter&& accounter, tracing::trace_state_ptr trace_state) { - column_family& cf = find_column_family(cmd.cf_id); - return mutation_query(std::move(s), cf.as_mutation_source(), range, cmd.slice, cmd.row_limit, cmd.partition_limit, - cmd.timestamp, std::move(accounter), std::move(trace_state)).then_wrapped([this, s = _stats, hit_rate = cf.get_global_cache_hit_rate()] (auto f) { - if (f.failed()) { - ++s->total_reads_failed; - return make_exception_future(f.get_exception()); - } else { - ++s->total_reads; - auto result = f.get0(); - s->short_mutation_queries += bool(result.is_short_read()); - return make_ready_future(std::move(result), hit_rate); - } - }); -} - -std::unordered_set database::get_initial_tokens() { - std::unordered_set tokens; - sstring tokens_string = get_config().initial_token(); - try { - boost::split(tokens, tokens_string, boost::is_any_of(sstring(", "))); - } catch (...) { - throw std::runtime_error(sprint("Unable to parse initial_token=%s", tokens_string)); - } - tokens.erase(""); - return tokens; -} - -std::experimental::optional database::get_replace_address() { - auto& cfg = get_config(); - sstring replace_address = cfg.replace_address(); - sstring replace_address_first_boot = cfg.replace_address_first_boot(); - try { - if (!replace_address.empty()) { - return gms::inet_address(replace_address); - } else if (!replace_address_first_boot.empty()) { - return gms::inet_address(replace_address_first_boot); - } - return std::experimental::nullopt; - } catch (...) { - return std::experimental::nullopt; - } -} - -bool database::is_replacing() { - sstring replace_address_first_boot = get_config().replace_address_first_boot(); - if (!replace_address_first_boot.empty() && db::system_keyspace::bootstrap_complete()) { - dblog.info("Replace address on first boot requested; this node is already bootstrapped"); - return false; - } - return bool(get_replace_address()); -} - -void database::register_connection_drop_notifier(netw::messaging_service& ms) { - ms.register_connection_drop_notifier([this] (gms::inet_address ep) { - dblog.debug("Drop hit rate info for {} because of disconnect", ep); - for (auto&& cf : get_non_system_column_families()) { - cf->drop_hit_rate(ep); - } - }); -} - -std::ostream& operator<<(std::ostream& out, const atomic_cell_or_collection& c) { - return out << to_hex(c._data); -} - -std::ostream& operator<<(std::ostream& os, const mutation& m) { - const ::schema& s = *m.schema(); - fprint(os, "{%s.%s key %s data ", s.ks_name(), s.cf_name(), m.decorated_key()); - os << m.partition() << "}"; - return os; -} - -std::ostream& operator<<(std::ostream& out, const column_family& cf) { - return fprint(out, "{column_family: %s/%s}", cf._schema->ks_name(), cf._schema->cf_name()); -} - -std::ostream& operator<<(std::ostream& out, const database& db) { - out << "{\n"; - for (auto&& e : db._column_families) { - auto&& cf = *e.second; - out << "(" << e.first.to_sstring() << ", " << cf.schema()->cf_name() << ", " << cf.schema()->ks_name() << "): " << cf << "\n"; - } - out << "}"; - return out; -} - -template -void column_family::do_apply(db::rp_handle&& h, Args&&... args) { - utils::latency_counter lc; - _stats.writes.set_latency(lc); - db::replay_position rp = h; - check_valid_rp(rp); - try { - _memtables->active_memtable().apply(std::forward(args)..., std::move(h)); - _highest_rp = std::max(_highest_rp, rp); - } catch (...) { - _failed_counter_applies_to_memtable++; - throw; - } - _stats.writes.mark(lc); - if (lc.is_start()) { - _stats.estimated_write.add(lc.latency(), _stats.writes.hist.count); - } -} - -void -column_family::apply(const mutation& m, db::rp_handle&& h) { - do_apply(std::move(h), m); -} - -void -column_family::apply(const frozen_mutation& m, const schema_ptr& m_schema, db::rp_handle&& h) { - do_apply(std::move(h), m, m_schema); -} - -future database::do_apply_counter_update(column_family& cf, const frozen_mutation& fm, schema_ptr m_schema, - timeout_clock::time_point timeout,tracing::trace_state_ptr trace_state) { - auto m = fm.unfreeze(m_schema); - m.upgrade(cf.schema()); - - // prepare partition slice - std::vector static_columns; - static_columns.reserve(m.partition().static_row().size()); - m.partition().static_row().for_each_cell([&] (auto id, auto&&) { - static_columns.emplace_back(id); - }); - - query::clustering_row_ranges cr_ranges; - cr_ranges.reserve(8); - std::vector regular_columns; - regular_columns.reserve(32); - - for (auto&& cr : m.partition().clustered_rows()) { - cr_ranges.emplace_back(query::clustering_range::make_singular(cr.key())); - cr.row().cells().for_each_cell([&] (auto id, auto&&) { - regular_columns.emplace_back(id); - }); - } - - boost::sort(regular_columns); - regular_columns.erase(std::unique(regular_columns.begin(), regular_columns.end()), - regular_columns.end()); - - auto slice = query::partition_slice(std::move(cr_ranges), std::move(static_columns), - std::move(regular_columns), { }, { }, cql_serialization_format::internal(), query::max_rows); - - return do_with(std::move(slice), std::move(m), std::vector(), - [this, &cf, timeout, trace_state = std::move(trace_state)] (const query::partition_slice& slice, mutation& m, std::vector& locks) mutable { - tracing::trace(trace_state, "Acquiring counter locks"); - return cf.lock_counter_cells(m, timeout).then([&, m_schema = cf.schema(), trace_state = std::move(trace_state), timeout, this] (std::vector lcs) mutable { - locks = std::move(lcs); - - // Before counter update is applied it needs to be transformed from - // deltas to counter shards. To do that, we need to read the current - // counter state for each modified cell... - - tracing::trace(trace_state, "Reading counter values from the CF"); - return counter_write_query(m_schema, cf.as_mutation_source(), m.decorated_key(), slice, trace_state) - .then([this, &cf, &m, m_schema, timeout, trace_state] (auto mopt) { - // ...now, that we got existing state of all affected counter - // cells we can look for our shard in each of them, increment - // its clock and apply the delta. - transform_counter_updates_to_shards(m, mopt ? &*mopt : nullptr, cf.failed_counter_applies_to_memtable()); - tracing::trace(trace_state, "Applying counter update"); - return this->apply_with_commitlog(cf, m, timeout); - }).then([&m] { - return std::move(m); - }); - }); - }); -} - -void column_family::apply_streaming_mutation(schema_ptr m_schema, utils::UUID plan_id, const frozen_mutation& m, bool fragmented) { - if (fragmented) { - apply_streaming_big_mutation(std::move(m_schema), plan_id, m); - return; - } - _streaming_memtables->active_memtable().apply(m, m_schema); -} - -void column_family::apply_streaming_big_mutation(schema_ptr m_schema, utils::UUID plan_id, const frozen_mutation& m) { - auto it = _streaming_memtables_big.find(plan_id); - if (it == _streaming_memtables_big.end()) { - it = _streaming_memtables_big.emplace(plan_id, make_lw_shared()).first; - it->second->memtables = _config.enable_disk_writes ? make_streaming_memtable_big_list(*it->second) : make_memory_only_memtable_list(); - } - auto entry = it->second; - entry->memtables->active_memtable().apply(m, m_schema); -} - -void -column_family::check_valid_rp(const db::replay_position& rp) const { - if (rp != db::replay_position() && rp < _lowest_allowed_rp) { - throw mutation_reordered_with_truncate_exception(); - } -} - -db::replay_position column_family::set_low_replay_position_mark() { - _lowest_allowed_rp = _highest_rp; - return _lowest_allowed_rp; -} - - -future<> dirty_memory_manager::shutdown() { - _db_shutdown_requested = true; - _should_flush.signal(); - return std::move(_waiting_flush).then([this] { - return _region_group.shutdown(); - }); -} - -future<> memtable_list::request_flush() { - if (!may_flush()) { - return make_ready_future<>(); - } else if (!_flush_coalescing) { - _flush_coalescing = shared_promise<>(); - return _dirty_memory_manager->get_flush_permit().then([this] (auto permit) { - auto current_flush = std::move(*_flush_coalescing); - _flush_coalescing = {}; - return _dirty_memory_manager->flush_one(*this, std::move(permit)).then_wrapped([this, current_flush = std::move(current_flush)] (auto f) mutable { - if (f.failed()) { - current_flush.set_exception(f.get_exception()); - } else { - current_flush.set_value(); - } - }); - }); - } else { - return _flush_coalescing->get_shared_future(); - } -} - -lw_shared_ptr memtable_list::new_memtable() { - return make_lw_shared(_current_schema(), *_dirty_memory_manager, this); -} - -future<> dirty_memory_manager::flush_one(memtable_list& mtlist, semaphore_units<> permit) { - auto* region = &(mtlist.back()->region()); - auto schema = mtlist.back()->schema(); - - add_to_flush_manager(region, std::move(permit)); - return get_units(_background_work_flush_serializer, 1).then([this, &mtlist, region, schema] (auto permit) mutable { - return mtlist.seal_active_memtable(memtable_list::flush_behavior::immediate).then_wrapped([this, region, schema, permit = std::move(permit)] (auto f) { - // There are two cases in which we may still need to remove the permits from here. - // - // 1) Some exception happenend, and we can't know at which point. It could be that because - // of that, the permits are still dangling. We have to remove it. - // 2) If we are using a memory-only Column Family. That will never create a memtable - // flush object, and we'll never get rid of the permits. So we have to remove it - // here. - this->remove_from_flush_manager(region); - if (f.failed()) { - dblog.error("Failed to flush memtable, {}:{}", schema->ks_name(), schema->cf_name()); - } - return std::move(f); - }); - }); -} - -future<> dirty_memory_manager::flush_when_needed() { - if (!_db) { - return make_ready_future<>(); - } - // If there are explicit flushes requested, we must wait for them to finish before we stop. - return do_until([this] { return _db_shutdown_requested; }, [this] { - auto has_work = [this] { return has_pressure() || _db_shutdown_requested; }; - return _should_flush.wait(std::move(has_work)).then([this] { - return get_flush_permit().then([this] (auto permit) { - // We give priority to explicit flushes. They are mainly user-initiated flushes, - // flushes coming from a DROP statement, or commitlog flushes. - if (_flush_serializer.waiters()) { - return make_ready_future<>(); - } - // condition abated while we waited for the semaphore - if (!this->has_pressure() || _db_shutdown_requested) { - return make_ready_future<>(); - } - // There are many criteria that can be used to select what is the best memtable to - // flush. Most of the time we want some coordination with the commitlog to allow us to - // release commitlog segments as early as we can. - // - // But during pressure condition, we'll just pick the CF that holds the largest - // memtable. The advantage of doing this is that this is objectively the one that will - // release the biggest amount of memory and is less likely to be generating tiny - // SSTables. - memtable& candidate_memtable = memtable::from_region(*(this->_region_group.get_largest_region())); - dirty_memory_manager* candidate_dirty_manager = &(dirty_memory_manager::from_region_group(candidate_memtable.region_group())); - // Do not wait. The semaphore will protect us against a concurrent flush. But we - // want to start a new one as soon as the permits are destroyed and the semaphore is - // made ready again, not when we are done with the current one. - candidate_dirty_manager->flush_one(*(candidate_memtable.get_memtable_list()), std::move(permit)); - return make_ready_future<>(); - }); - }); - }).finally([this] { - // We'll try to acquire the permit here to make sure we only really stop when there are no - // in-flight flushes. Our stop condition checks for the presence of waiters, but it could be - // that we have no waiters, but a flush still in flight. We wait for all background work to - // stop. When that stops, we know that the foreground work in the _flush_serializer has - // stopped as well. - return get_units(_background_work_flush_serializer, _max_background_work).discard_result(); - }); -} - -void dirty_memory_manager::start_reclaiming() noexcept { - _should_flush.signal(); -} - -future<> database::apply_in_memory(const frozen_mutation& m, schema_ptr m_schema, db::rp_handle&& h, timeout_clock::time_point timeout) { - return _dirty_memory_manager.region_group().run_when_memory_available([this, &m, m_schema = std::move(m_schema), h = std::move(h)]() mutable { - try { - auto& cf = find_column_family(m.column_family_id()); - cf.apply(m, m_schema, std::move(h)); - } catch (no_such_column_family&) { - dblog.error("Attempting to mutate non-existent table {}", m.column_family_id()); - } - }, timeout); -} - -future<> database::apply_in_memory(const mutation& m, column_family& cf, db::rp_handle&& h, timeout_clock::time_point timeout) { - return _dirty_memory_manager.region_group().run_when_memory_available([this, &m, &cf, h = std::move(h)]() mutable { - cf.apply(m, std::move(h)); - }, timeout); -} - -future database::apply_counter_update(schema_ptr s, const frozen_mutation& m, timeout_clock::time_point timeout, tracing::trace_state_ptr trace_state) { - if (!s->is_synced()) { - throw std::runtime_error(sprint("attempted to mutate using not synced schema of %s.%s, version=%s", - s->ks_name(), s->cf_name(), s->version())); - } - try { - auto& cf = find_column_family(m.column_family_id()); - return do_apply_counter_update(cf, m, s, timeout, std::move(trace_state)); - } catch (no_such_column_family&) { - dblog.error("Attempting to mutate non-existent table {}", m.column_family_id()); - throw; - } -} - -static future<> maybe_handle_reorder(std::exception_ptr exp) { - try { - std::rethrow_exception(exp); - return make_exception_future(exp); - } catch (mutation_reordered_with_truncate_exception&) { - // This mutation raced with a truncate, so we can just drop it. - dblog.debug("replay_position reordering detected"); - return make_ready_future<>(); - } -} - -future<> database::apply_with_commitlog(column_family& cf, const mutation& m, timeout_clock::time_point timeout) { - if (cf.commitlog() != nullptr) { - return do_with(freeze(m), [this, &m, &cf, timeout] (frozen_mutation& fm) { - commitlog_entry_writer cew(m.schema(), fm); - return cf.commitlog()->add_entry(m.schema()->id(), cew, timeout); - }).then([this, &m, &cf, timeout] (db::rp_handle h) { - return apply_in_memory(m, cf, std::move(h), timeout).handle_exception(maybe_handle_reorder); - }); - } - return apply_in_memory(m, cf, {}, timeout); -} - -future<> database::apply_with_commitlog(schema_ptr s, column_family& cf, utils::UUID uuid, const frozen_mutation& m, timeout_clock::time_point timeout) { - auto cl = cf.commitlog(); - if (cl != nullptr) { - commitlog_entry_writer cew(s, m); - return cf.commitlog()->add_entry(uuid, cew, timeout).then([&m, this, s, timeout, cl](db::rp_handle h) { - return this->apply_in_memory(m, s, std::move(h), timeout).handle_exception(maybe_handle_reorder); - }); - } - return apply_in_memory(m, std::move(s), {}, timeout); -} - -future<> database::do_apply(schema_ptr s, const frozen_mutation& m, timeout_clock::time_point timeout) { - // I'm doing a nullcheck here since the init code path for db etc - // is a little in flux and commitlog is created only when db is - // initied from datadir. - auto uuid = m.column_family_id(); - auto& cf = find_column_family(uuid); - if (!s->is_synced()) { - throw std::runtime_error(sprint("attempted to mutate using not synced schema of %s.%s, version=%s", - s->ks_name(), s->cf_name(), s->version())); - } - if (cf.views().empty()) { - return apply_with_commitlog(std::move(s), cf, std::move(uuid), m, timeout); - } - auto f = cf.push_view_replica_updates(s, m); - return f.then([this, s = std::move(s), uuid = std::move(uuid), &m, timeout] { - auto& cf = find_column_family(uuid); - return apply_with_commitlog(std::move(s), cf, std::move(uuid), m, timeout); - }); -} - -struct db_apply_executor { - static auto get() { return &database::do_apply; } -}; -static thread_local auto apply_stage = seastar::make_execution_stage("db_apply", db_apply_executor::get()); - -future<> database::apply(schema_ptr s, const frozen_mutation& m, timeout_clock::time_point timeout) { - if (dblog.is_enabled(logging::log_level::trace)) { - dblog.trace("apply {}", m.pretty_printer(s)); - } - return apply_stage(this, std::move(s), seastar::cref(m), timeout).then_wrapped([this, s = _stats] (auto f) { - if (f.failed()) { - ++s->total_writes_failed; - try { - f.get(); - } catch (const timed_out_error&) { - ++s->total_writes_timedout; - throw; - } - assert(0 && "should not reach"); - } - ++s->total_writes; - return f; - }); -} - -future<> database::apply_streaming_mutation(schema_ptr s, utils::UUID plan_id, const frozen_mutation& m, bool fragmented) { - if (!s->is_synced()) { - throw std::runtime_error(sprint("attempted to mutate using not synced schema of %s.%s, version=%s", - s->ks_name(), s->cf_name(), s->version())); - } - return _streaming_dirty_memory_manager.region_group().run_when_memory_available([this, &m, plan_id, fragmented, s = std::move(s)] { - auto uuid = m.column_family_id(); - auto& cf = find_column_family(uuid); - cf.apply_streaming_mutation(s, plan_id, std::move(m), fragmented); - }); -} - -keyspace::config -database::make_keyspace_config(const keyspace_metadata& ksm) { - // FIXME support multiple directories - keyspace::config cfg; - if (_cfg->data_file_directories().size() > 0) { - cfg.datadir = sprint("%s/%s", _cfg->data_file_directories()[0], ksm.name()); - cfg.enable_disk_writes = !_cfg->enable_in_memory_data_store(); - cfg.enable_disk_reads = true; // we allways read from disk - cfg.enable_commitlog = ksm.durable_writes() && _cfg->enable_commitlog() && !_cfg->enable_in_memory_data_store(); - cfg.enable_cache = _cfg->enable_cache(); - - } else { - cfg.datadir = ""; - cfg.enable_disk_writes = false; - cfg.enable_disk_reads = false; - cfg.enable_commitlog = false; - cfg.enable_cache = false; - } - cfg.dirty_memory_manager = &_dirty_memory_manager; - cfg.streaming_dirty_memory_manager = &_streaming_dirty_memory_manager; - cfg.read_concurrency_config.sem = &_read_concurrency_sem; - cfg.read_concurrency_config.timeout = _cfg->read_request_timeout_in_ms() * 1ms; - // Assume a queued read takes up 10kB of memory, and allow 2% of memory to be filled up with such reads. - cfg.read_concurrency_config.max_queue_length = memory::stats().total_memory() * 0.02 / 10000; - cfg.read_concurrency_config.raise_queue_overloaded_exception = [this] { - ++_stats->sstable_read_queue_overloaded; - throw std::runtime_error("sstable inactive read queue overloaded"); - }; - // No timeouts or queue length limits - a failure here can kill an entire repair. - // Trust the caller to limit concurrency. - cfg.streaming_read_concurrency_config.sem = &_streaming_concurrency_sem; - cfg.cf_stats = &_cf_stats; - cfg.enable_incremental_backups = _enable_incremental_backups; - - if (_cfg->background_writer_scheduling_quota() < 1.0f) { - cfg.background_writer_scheduling_group = &_background_writer_scheduling_group; - cfg.memtable_scheduling_group = _memtable_cpu_controller.scheduling_group(); - } - - return cfg; -} - -namespace db { - -std::ostream& operator<<(std::ostream& os, const write_type& t) { - switch(t) { - case write_type::SIMPLE: return os << "SIMPLE"; - case write_type::BATCH: return os << "BATCH"; - case write_type::UNLOGGED_BATCH: return os << "UNLOGGED_BATCH"; - case write_type::COUNTER: return os << "COUNTER"; - case write_type::BATCH_LOG: return os << "BATCH_LOG"; - case write_type::CAS: return os << "CAS"; - case write_type::VIEW: return os << "VIEW"; - } - abort(); -} - -std::ostream& operator<<(std::ostream& os, db::consistency_level cl) { - switch (cl) { - case db::consistency_level::ANY: return os << "ANY"; - case db::consistency_level::ONE: return os << "ONE"; - case db::consistency_level::TWO: return os << "TWO"; - case db::consistency_level::THREE: return os << "THREE"; - case db::consistency_level::QUORUM: return os << "QUORUM"; - case db::consistency_level::ALL: return os << "ALL"; - case db::consistency_level::LOCAL_QUORUM: return os << "LOCAL_QUORUM"; - case db::consistency_level::EACH_QUORUM: return os << "EACH_QUORUM"; - case db::consistency_level::SERIAL: return os << "SERIAL"; - case db::consistency_level::LOCAL_SERIAL: return os << "LOCAL_SERIAL"; - case db::consistency_level::LOCAL_ONE: return os << "LOCAL_ONE"; - default: abort(); - } -} - -} - -std::ostream& -operator<<(std::ostream& os, const exploded_clustering_prefix& ecp) { - // Can't pass to_hex() to transformed(), since it is overloaded, so wrap: - auto enhex = [] (auto&& x) { return to_hex(x); }; - return fprint(os, "prefix{%s}", ::join(":", ecp._v | boost::adaptors::transformed(enhex))); -} - -std::ostream& -operator<<(std::ostream& os, const atomic_cell_view& acv) { - if (acv.is_live()) { - return fprint(os, "atomic_cell{%s;ts=%d;expiry=%d,ttl=%d}", - to_hex(acv.value()), - acv.timestamp(), - acv.is_live_and_has_ttl() ? acv.expiry().time_since_epoch().count() : -1, - acv.is_live_and_has_ttl() ? acv.ttl().count() : 0); - } else { - return fprint(os, "atomic_cell{DEAD;ts=%d;deletion_time=%d}", - acv.timestamp(), acv.deletion_time().time_since_epoch().count()); - } -} - -std::ostream& -operator<<(std::ostream& os, const atomic_cell& ac) { - return os << atomic_cell_view(ac); -} - -sstring database::get_available_index_name(const sstring &ks_name, const sstring &cf_name, - std::experimental::optional index_name_root) const -{ - auto existing_names = existing_index_names(ks_name); - auto base_name = index_metadata::get_default_index_name(cf_name, index_name_root); - sstring accepted_name = base_name; - int i = 0; - while (existing_names.count(accepted_name) > 0) { - accepted_name = base_name + "_" + std::to_string(++i); - } - return accepted_name; -} - -schema_ptr database::find_indexed_table(const sstring& ks_name, const sstring& index_name) const { - for (auto& schema : find_keyspace(ks_name).metadata()->tables()) { - if (schema->has_index(index_name)) { - return schema; - } - } - return nullptr; -} - -future<> -database::stop() { - return _compaction_manager.stop().then([this] { - // try to ensure that CL has done disk flushing - if (_commitlog != nullptr) { - return _commitlog->shutdown(); - } - return make_ready_future<>(); - }).then([this] { - return parallel_for_each(_column_families, [this] (auto& val_pair) { - return val_pair.second->stop(); - }); - }).then([this] { - if (_commitlog != nullptr) { - return _commitlog->release(); - } - return make_ready_future<>(); - }).then([this] { - return _system_dirty_memory_manager.shutdown(); - }).then([this] { - return _dirty_memory_manager.shutdown(); - }).then([this] { - return _streaming_dirty_memory_manager.shutdown(); - }); -} - -future<> database::flush_all_memtables() { - return parallel_for_each(_column_families, [this] (auto& cfp) { - return cfp.second->flush(); - }); -} - -future<> database::truncate(sstring ksname, sstring cfname, timestamp_func tsf) { - auto& ks = find_keyspace(ksname); - auto& cf = find_column_family(ksname, cfname); - return truncate(ks, cf, std::move(tsf)); -} - -future<> database::truncate(const keyspace& ks, column_family& cf, timestamp_func tsf, bool with_snapshot) -{ - const auto durable = ks.metadata()->durable_writes(); - const auto auto_snapshot = with_snapshot && get_config().auto_snapshot(); - - // Force mutations coming in to re-acquire higher rp:s - // This creates a "soft" ordering, in that we will guarantee that - // any sstable written _after_ we issue the flush below will - // only have higher rp:s than we will get from the discard_sstable - // call. - auto low_mark = cf.set_low_replay_position_mark(); - - future<> f = make_ready_future<>(); - if (durable || auto_snapshot) { - // TODO: - // this is not really a guarantee at all that we've actually - // gotten all things to disk. Again, need queue-ish or something. - f = cf.flush(); - } else { - f = cf.clear(); - } - - return cf.run_with_compaction_disabled([f = std::move(f), &cf, auto_snapshot, tsf = std::move(tsf), low_mark]() mutable { - return f.then([&cf, auto_snapshot, tsf = std::move(tsf), low_mark] { - dblog.debug("Discarding sstable data for truncated CF + indexes"); - // TODO: notify truncation - - return tsf().then([&cf, auto_snapshot, low_mark](db_clock::time_point truncated_at) { - future<> f = make_ready_future<>(); - if (auto_snapshot) { - auto name = sprint("%d-%s", truncated_at.time_since_epoch().count(), cf.schema()->cf_name()); - f = cf.snapshot(name); - } - return f.then([&cf, truncated_at, low_mark] { - return cf.discard_sstables(truncated_at).then([&cf, truncated_at, low_mark](db::replay_position rp) { - // TODO: verify that rp == db::replay_position is because we have no sstables (and no data flushed) - if (rp == db::replay_position()) { - return make_ready_future(); - } - // TODO: indexes. - assert(low_mark <= rp); - return db::system_keyspace::save_truncation_record(cf, truncated_at, rp); - }); - }); - }); - }); - }); -} - -const sstring& database::get_snitch_name() const { - return _cfg->endpoint_snitch(); -} - -// For the filesystem operations, this code will assume that all keyspaces are visible in all shards -// (as we have been doing for a lot of the other operations, like the snapshot itself). -future<> database::clear_snapshot(sstring tag, std::vector keyspace_names) { - namespace bf = boost::filesystem; - - std::vector data_dirs = _cfg->data_file_directories(); - lw_shared_ptr dirs_only_entries_ptr = make_lw_shared({ directory_entry_type::directory }); - lw_shared_ptr tag_ptr = make_lw_shared(std::move(tag)); - std::unordered_set ks_names_set(keyspace_names.begin(), keyspace_names.end()); - - return parallel_for_each(data_dirs, [this, tag_ptr, ks_names_set = std::move(ks_names_set), dirs_only_entries_ptr] (const sstring& parent_dir) { - std::unique_ptr filter = std::make_unique([] (const lister::path& parent_dir, const directory_entry& dir_entry) { return true; }); - - // if specific keyspaces names were given - filter only these keyspaces directories - if (!ks_names_set.empty()) { - filter = std::make_unique([ks_names_set = std::move(ks_names_set)] (const lister::path& parent_dir, const directory_entry& dir_entry) { - return ks_names_set.find(dir_entry.name) != ks_names_set.end(); - }); - } - - // - // The keyspace data directories and their snapshots are arranged as follows: - // - // - // |- - // | |- - // | |- snapshots - // | |- - // | |- - // | |- - // | |- ... - // | |- - // | |- ... - // | |- - // | |- ... - // |- - // |- ... - // - return lister::scan_dir(parent_dir, *dirs_only_entries_ptr, [this, tag_ptr, dirs_only_entries_ptr] (lister::path parent_dir, directory_entry de) { - // KS directory - return lister::scan_dir(parent_dir / de.name.c_str(), *dirs_only_entries_ptr, [this, tag_ptr, dirs_only_entries_ptr] (lister::path parent_dir, directory_entry de) mutable { - // CF directory - return lister::scan_dir(parent_dir / de.name.c_str(), *dirs_only_entries_ptr, [this, tag_ptr, dirs_only_entries_ptr] (lister::path parent_dir, directory_entry de) mutable { - // "snapshots" directory - lister::path snapshots_dir(parent_dir / de.name.c_str()); - if (tag_ptr->empty()) { - dblog.info("Removing {}", snapshots_dir.native()); - // kill the whole "snapshots" subdirectory - return lister::rmdir(std::move(snapshots_dir)); - } else { - return lister::scan_dir(std::move(snapshots_dir), *dirs_only_entries_ptr, [this, tag_ptr] (lister::path parent_dir, directory_entry de) { - lister::path snapshot_dir(parent_dir / de.name.c_str()); - dblog.info("Removing {}", snapshot_dir.native()); - return lister::rmdir(std::move(snapshot_dir)); - }, [tag_ptr] (const lister::path& parent_dir, const directory_entry& dir_entry) { return dir_entry.name == *tag_ptr; }); - } - }, [] (const lister::path& parent_dir, const directory_entry& dir_entry) { return dir_entry.name == "snapshots"; }); - }); - }, *filter); - }); -} - -future<> update_schema_version_and_announce(distributed& proxy) -{ - return db::schema_tables::calculate_schema_digest(proxy).then([&proxy] (utils::UUID uuid) { - return proxy.local().get_db().invoke_on_all([uuid] (database& db) { - db.update_version(uuid); - return make_ready_future<>(); - }).then([uuid] { - return db::system_keyspace::update_schema_version(uuid).then([uuid] { - dblog.info("Schema version changed to {}", uuid); - return service::get_local_migration_manager().passive_announce(uuid); - }); - }); - }); -} - -// Snapshots: snapshotting the files themselves is easy: if more than one CF -// happens to link an SSTable twice, all but one will fail, and we will end up -// with one copy. -// -// The problem for us, is that the snapshot procedure is supposed to leave a -// manifest file inside its directory. So if we just call snapshot() from -// multiple shards, only the last one will succeed, writing its own SSTables to -// the manifest leaving all other shards' SSTables unaccounted for. -// -// Moreover, for things like drop table, the operation should only proceed when the -// snapshot is complete. That includes the manifest file being correctly written, -// and for this reason we need to wait for all shards to finish their snapshotting -// before we can move on. -// -// To know which files we must account for in the manifest, we will keep an -// SSTable set. Theoretically, we could just rescan the snapshot directory and -// see what's in there. But we would need to wait for all shards to finish -// before we can do that anyway. That is the hard part, and once that is done -// keeping the files set is not really a big deal. -// -// This code assumes that all shards will be snapshotting at the same time. So -// far this is a safe assumption, but if we ever want to take snapshots from a -// group of shards only, this code will have to be updated to account for that. -struct snapshot_manager { - std::unordered_set files; - semaphore requests; - semaphore manifest_write; - snapshot_manager() : requests(0), manifest_write(0) {} -}; -static thread_local std::unordered_map> pending_snapshots; - -static future<> -seal_snapshot(sstring jsondir) { - std::ostringstream ss; - int n = 0; - ss << "{" << std::endl << "\t\"files\" : [ "; - for (auto&& rf: pending_snapshots.at(jsondir)->files) { - if (n++ > 0) { - ss << ", "; - } - ss << "\"" << rf << "\""; - } - ss << " ]" << std::endl << "}" << std::endl; - - auto json = ss.str(); - auto jsonfile = jsondir + "/manifest.json"; - - dblog.debug("Storing manifest {}", jsonfile); - - return io_check(recursive_touch_directory, jsondir).then([jsonfile, json = std::move(json)] { - return open_checked_file_dma(general_disk_error_handler, jsonfile, open_flags::wo | open_flags::create | open_flags::truncate).then([json](file f) { - return do_with(make_file_output_stream(std::move(f)), [json] (output_stream& out) { - return out.write(json.c_str(), json.size()).then([&out] { - return out.flush(); - }).then([&out] { - return out.close(); - }); - }); - }); - }).then([jsondir] { - return io_check(sync_directory, std::move(jsondir)); - }).finally([jsondir] { - pending_snapshots.erase(jsondir); - return make_ready_future<>(); - }); -} - -future<> column_family::snapshot(sstring name) { - return flush().then([this, name = std::move(name)]() { - auto tables = boost::copy_range>(*_sstables->all()); - return do_with(std::move(tables), [this, name](std::vector & tables) { - auto jsondir = _config.datadir + "/snapshots/" + name; - - return parallel_for_each(tables, [name](sstables::shared_sstable sstable) { - auto dir = sstable->get_dir() + "/snapshots/" + name; - return io_check(recursive_touch_directory, dir).then([sstable, dir] { - return sstable->create_links(dir).then_wrapped([] (future<> f) { - // If the SSTables are shared, one of the CPUs will fail here. - // That is completely fine, though. We only need one link. - try { - f.get(); - } catch (std::system_error& e) { - if (e.code() != std::error_code(EEXIST, std::system_category())) { - throw; - } - } - return make_ready_future<>(); - }); - }); - }).then([jsondir, &tables] { - // This is not just an optimization. If we have no files, jsondir may not have been created, - // and sync_directory would throw. - if (tables.size()) { - return io_check(sync_directory, std::move(jsondir)); - } else { - return make_ready_future<>(); - } - }).finally([this, &tables, jsondir] { - auto shard = std::hash()(jsondir) % smp::count; - std::unordered_set table_names; - for (auto& sst : tables) { - auto f = sst->get_filename(); - auto rf = f.substr(sst->get_dir().size() + 1); - table_names.insert(std::move(rf)); - } - return smp::submit_to(shard, [requester = engine().cpu_id(), jsondir = std::move(jsondir), - tables = std::move(table_names), datadir = _config.datadir] { - - if (pending_snapshots.count(jsondir) == 0) { - pending_snapshots.emplace(jsondir, make_lw_shared()); - } - auto snapshot = pending_snapshots.at(jsondir); - for (auto&& sst: tables) { - snapshot->files.insert(std::move(sst)); - } - - snapshot->requests.signal(1); - auto my_work = make_ready_future<>(); - if (requester == engine().cpu_id()) { - my_work = snapshot->requests.wait(smp::count).then([jsondir = std::move(jsondir), - snapshot] () mutable { - return seal_snapshot(jsondir).then([snapshot] { - snapshot->manifest_write.signal(smp::count); - return make_ready_future<>(); - }); - }); - } - return my_work.then([snapshot] { - return snapshot->manifest_write.wait(1); - }).then([snapshot] {}); - }); - }); - }); - }); -} - -future column_family::snapshot_exists(sstring tag) { - sstring jsondir = _config.datadir + "/snapshots/" + tag; - return open_checked_directory(general_disk_error_handler, std::move(jsondir)).then_wrapped([] (future f) { - try { - f.get0(); - return make_ready_future(true); - } catch (std::system_error& e) { - if (e.code() != std::error_code(ENOENT, std::system_category())) { - throw; - } - return make_ready_future(false); - } - }); -} - -future> column_family::get_snapshot_details() { - std::unordered_map all_snapshots; - return do_with(std::move(all_snapshots), lister::path(_config.datadir) / "snapshots", [this] (auto& all_snapshots, const lister::path& snapshots_dir) { - return io_check([&] { return engine().file_exists(snapshots_dir.native()); }).then([this, &all_snapshots, &snapshots_dir](bool file_exists) { - if (!file_exists) { - return make_ready_future<>(); - } - return lister::scan_dir(snapshots_dir, { directory_entry_type::directory }, [this, &all_snapshots] (lister::path snapshots_dir, directory_entry de) { - auto snapshot_name = de.name; - all_snapshots.emplace(snapshot_name, snapshot_details()); - return lister::scan_dir(snapshots_dir / snapshot_name.c_str(), { directory_entry_type::regular }, [this, &all_snapshots, snapshot_name = std::move(snapshot_name)] (lister::path snapshot_dir, directory_entry de) { - return io_check(file_size, (snapshot_dir / de.name.c_str()).native()).then([this, &all_snapshots, snapshot_name, name = de.name] (auto size) { - // The manifest is the only file expected to be in this directory not belonging to the SSTable. - // For it, we account the total size, but zero it for the true size calculation. - // - // All the others should just generate an exception: there is something wrong, so don't blindly - // add it to the size. - if (name != "manifest.json") { - sstables::entry_descriptor::make_descriptor(name); - all_snapshots.at(snapshot_name).total += size; - } else { - size = 0; - } - return make_ready_future(size); - }).then([this, &all_snapshots, snapshot_name, name = de.name] (auto size) { - // FIXME: When we support multiple data directories, the file may not necessarily - // live in this same location. May have to test others as well. - return io_check(file_size, (lister::path(_config.datadir) / name.c_str()).native()).then_wrapped([&all_snapshots, snapshot_name, size] (auto fut) { - try { - // File exists in the main SSTable directory. Snapshots are not contributing to size - fut.get0(); - } catch (std::system_error& e) { - if (e.code() != std::error_code(ENOENT, std::system_category())) { - throw; - } - all_snapshots.at(snapshot_name).live += size; - } - return make_ready_future<>(); - }); - }); - }); - }); - }).then([&all_snapshots] { - return std::move(all_snapshots); - }); - }); -} - -future<> column_family::flush() { - return _memtables->request_flush(); -} - -// FIXME: We can do much better than this in terms of cache management. Right -// now, we only have to flush the touched ranges because of the possibility of -// streaming containing token ownership changes. -// -// Right now we can't differentiate between that and a normal repair process, -// so we always flush. When we can differentiate those streams, we should not -// be indiscriminately touching the cache during repair. We will just have to -// invalidate the entries that are relevant to things we already have in the cache. -future<> column_family::flush_streaming_mutations(utils::UUID plan_id, dht::partition_range_vector ranges) { - // This will effectively take the gate twice for this call. The proper way to fix that would - // be to change seal_active_streaming_memtable_delayed to take a range parameter. However, we - // need this code to go away as soon as we can (see FIXME above). So the double gate is a better - // temporary counter measure. - return with_gate(_streaming_flush_gate, [this, plan_id, ranges = std::move(ranges)] () mutable { - return flush_streaming_big_mutations(plan_id).then([this, ranges = std::move(ranges)] (auto sstables) mutable { - return _streaming_memtables->seal_active_memtable(memtable_list::flush_behavior::delayed).then([this] { - return _streaming_flush_phaser.advance_and_await(); - }).then([this, sstables = std::move(sstables), ranges = std::move(ranges)] () mutable { - return with_semaphore(_cache_update_sem, 1, [this, sstables = std::move(sstables), ranges = std::move(ranges)] () mutable { - for (auto&& sst : sstables) { - // seal_active_streaming_memtable_big() ensures sst is unshared. - this->add_sstable(sst, {engine().cpu_id()}); - } - this->trigger_compaction(); - return _cache.invalidate(std::move(ranges)); - }); - }); - }); - }); -} - -future> column_family::flush_streaming_big_mutations(utils::UUID plan_id) { - auto it = _streaming_memtables_big.find(plan_id); - if (it == _streaming_memtables_big.end()) { - return make_ready_future>(std::vector()); - } - auto entry = it->second; - _streaming_memtables_big.erase(it); - return entry->memtables->request_flush().then([entry] { - return entry->flush_in_progress.close(); - }).then([this, entry] { - return parallel_for_each(entry->sstables, [this] (auto& sst) { - return sst->seal_sstable(this->incremental_backups_enabled()).then([sst] { - return sst->open_data(); - }); - }).then([this, entry] { - return std::move(entry->sstables); - }); - }); -} - -future<> column_family::fail_streaming_mutations(utils::UUID plan_id) { - auto it = _streaming_memtables_big.find(plan_id); - if (it == _streaming_memtables_big.end()) { - return make_ready_future<>(); - } - auto entry = it->second; - _streaming_memtables_big.erase(it); - return entry->flush_in_progress.close().then([this, entry] { - for (auto&& sst : entry->sstables) { - sst->mark_for_deletion(); - } - }); -} - -future<> column_family::clear() { - if (_commitlog) { - _commitlog->discard_completed_segments(_schema->id()); - } - _memtables->clear(); - _memtables->add_memtable(); - _streaming_memtables->clear(); - _streaming_memtables->add_memtable(); - _streaming_memtables_big.clear(); - return _cache.invalidate(); -} - -// NOTE: does not need to be futurized, but might eventually, depending on -// if we implement notifications, whatnot. -future column_family::discard_sstables(db_clock::time_point truncated_at) { - assert(_compaction_disabled > 0); - - return with_lock(_sstables_lock.for_read(), [this, truncated_at] { - db::replay_position rp; - auto gc_trunc = to_gc_clock(truncated_at); - - auto pruned = make_lw_shared(_compaction_strategy.make_sstable_set(_schema)); - std::vector remove; - - for (auto&p : *_sstables->all()) { - if (p->max_data_age() <= gc_trunc) { - rp = std::max(p->get_stats_metadata().position, rp); - remove.emplace_back(p); - continue; - } - pruned->insert(p); - } - - _sstables = std::move(pruned); - dblog.debug("cleaning out row cache"); - return _cache.invalidate().then([rp, remove = std::move(remove)] () mutable { - return parallel_for_each(remove, [](sstables::shared_sstable s) { - return sstables::delete_atomically({s}); - }).then([rp] { - return make_ready_future(rp); - }).finally([remove] {}); // keep the objects alive until here. - }); - }); -} - - -std::ostream& operator<<(std::ostream& os, const user_types_metadata& m) { - os << "org.apache.cassandra.config.UTMetaData@" << &m; - return os; -} - -std::ostream& operator<<(std::ostream& os, const keyspace_metadata& m) { - os << "KSMetaData{"; - os << "name=" << m._name; - os << ", strategyClass=" << m._strategy_name; - os << ", strategyOptions={"; - int n = 0; - for (auto& p : m._strategy_options) { - if (n++ != 0) { - os << ", "; - } - os << p.first << "=" << p.second; - } - os << "}"; - os << ", cfMetaData={"; - n = 0; - for (auto& p : m._cf_meta_data) { - if (n++ != 0) { - os << ", "; - } - os << p.first << "=" << p.second; - } - os << "}"; - os << ", durable_writes=" << m._durable_writes; - os << ", userTypes=" << m._user_types; - os << "}"; - return os; -} - -void column_family::set_schema(schema_ptr s) { - dblog.debug("Changing schema version of {}.{} ({}) from {} to {}", - _schema->ks_name(), _schema->cf_name(), _schema->id(), _schema->version(), s->version()); - - for (auto& m : *_memtables) { - m->set_schema(s); - } - - for (auto& m : *_streaming_memtables) { - m->set_schema(s); - } - - for (auto smb : _streaming_memtables_big) { - for (auto m : *smb.second->memtables) { - m->set_schema(s); - } - } - - _cache.set_schema(s); - _counter_cell_locks->set_schema(s); - _schema = std::move(s); - - set_compaction_strategy(_schema->compaction_strategy()); - trigger_compaction(); -} - -static std::vector::iterator find_view(std::vector& views, const view_ptr& v) { - return std::find_if(views.begin(), views.end(), [&v] (auto&& e) { - return e->cf_name() == v->cf_name(); - }); -} -void column_family::add_or_update_view(view_ptr v) { - auto existing = find_view(_views, v); - if (existing != _views.end()) { - *existing = std::move(v); - } else { - _views.push_back(std::move(v)); - } -} - -void column_family::remove_view(view_ptr v) { - auto existing = find_view(_views, v); - if (existing != _views.end()) { - _views.erase(existing); - } -} - -const std::vector& column_family::views() const { - return _views; -} - -std::vector column_family::affected_views(const schema_ptr& base, const mutation& update) const { - //FIXME: Avoid allocating a vector here; consider returning the boost iterator. - return boost::copy_range>(_views | boost::adaptors::filtered([&, this] (auto&& view) { - return db::view::partition_key_matches(*base, *view->view_info(), update.decorated_key()); - })); -} - -/** - * Given some updates on the base table and the existing values for the rows affected by that update, generates the - * mutations to be applied to the base table's views, and sends them to the paired view replicas. - * - * @param base the base schema at a particular version. - * @param views the affected views which need to be updated. - * @param updates the base table updates being applied. - * @param existings the existing values for the rows affected by updates. This is used to decide if a view is - * obsoleted by the update and should be removed, gather the values for columns that may not be part of the update if - * a new view entry needs to be created, and compute the minimal updates to be applied if the view entry isn't changed - * but has simply some updated values. - * @return a future resolving to the mutations to apply to the views, which can be empty. - */ -future<> column_family::generate_and_propagate_view_updates(const schema_ptr& base, - std::vector&& views, - mutation&& m, - streamed_mutation_opt existings) const { - auto base_token = m.token(); - return db::view::generate_view_updates(base, - std::move(views), - streamed_mutation_from_mutation(std::move(m)), - std::move(existings)).then([base_token = std::move(base_token)] (auto&& updates) { - db::view::mutate_MV(std::move(base_token), std::move(updates)); - }); -} - -/** - * Given an update for the base table, calculates the set of potentially affected views, - * generates the relevant updates, and sends them to the paired view replicas. - */ -future<> column_family::push_view_replica_updates(const schema_ptr& s, const frozen_mutation& fm) const { - //FIXME: Avoid unfreezing here. - auto m = fm.unfreeze(s); - auto& base = schema(); - m.upgrade(base); - auto views = affected_views(base, m); - if (views.empty()) { - return make_ready_future<>(); - } - auto cr_ranges = db::view::calculate_affected_clustering_ranges(*base, m.decorated_key(), m.partition(), views); - if (cr_ranges.empty()) { - return generate_and_propagate_view_updates(base, std::move(views), std::move(m), { }); - } - // We read the whole set of regular columns in case the update now causes a base row to pass - // a view's filters, and a view happens to include columns that have no value in this update. - // Also, one of those columns can determine the lifetime of the base row, if it has a TTL. - auto columns = boost::copy_range>( - base->regular_columns() | boost::adaptors::transformed(std::mem_fn(&column_definition::id))); - query::partition_slice::option_set opts; - opts.set(query::partition_slice::option::send_partition_key); - opts.set(query::partition_slice::option::send_clustering_key); - opts.set(query::partition_slice::option::send_timestamp); - opts.set(query::partition_slice::option::send_ttl); - auto slice = query::partition_slice( - std::move(cr_ranges), { }, std::move(columns), std::move(opts), { }, cql_serialization_format::internal(), query::max_rows); - return do_with( - dht::partition_range::make_singular(m.decorated_key()), - std::move(slice), - std::move(m), - [base, views = std::move(views), this] (auto& pk, auto& slice, auto& m) mutable { - auto reader = this->as_mutation_source()( - base, - pk, - slice, - service::get_local_sstable_query_read_priority()); - auto f = reader(); - return f.then([&m, reader = std::move(reader), base = std::move(base), views = std::move(views), this] (auto&& existing) mutable { - return this->generate_and_propagate_view_updates(base, std::move(views), std::move(m), std::move(existing)); - }); - }); -} - -void column_family::set_hit_rate(gms::inet_address addr, cache_temperature rate) { - auto& e = _cluster_cache_hit_rates[addr]; - e.rate = rate; - e.last_updated = lowres_clock::now(); -} - -column_family::cache_hit_rate column_family::get_hit_rate(gms::inet_address addr) { - auto it = _cluster_cache_hit_rates.find(addr); - if (utils::fb_utilities::get_broadcast_address() == addr) { - return cache_hit_rate { _global_cache_hit_rate, lowres_clock::now()}; - } - if (it == _cluster_cache_hit_rates.end()) { - // no data yet, get it from the gossiper - auto& gossiper = gms::get_local_gossiper(); - auto eps = gossiper.get_endpoint_state_for_endpoint(addr); - if (eps) { - auto state = eps->get_application_state(gms::application_state::CACHE_HITRATES); - float f = -1.0f; // missing state means old node - if (state) { - sstring me = sprint("%s.%s", _schema->ks_name(), _schema->cf_name()); - auto i = state->value.find(me); - if (i != sstring::npos) { - f = strtof(&state->value[i + me.size() + 1], nullptr); - } else { - f = 0.0f; // empty state means that node has rebooted - } - set_hit_rate(addr, cache_temperature(f)); - return cache_hit_rate{cache_temperature(f), lowres_clock::now()}; - } - } - return cache_hit_rate {cache_temperature(0.0f), lowres_clock::now()}; - } else { - return it->second; - } -} - -void column_family::drop_hit_rate(gms::inet_address addr) { - _cluster_cache_hit_rates.erase(addr); -} - -future<> -write_memtable_to_sstable(memtable& mt, sstables::shared_sstable sst, bool backup, const io_priority_class& pc, bool leave_unsealed, seastar::thread_scheduling_group *tsg) { - sstables::sstable_writer_config cfg; - cfg.replay_position = mt.replay_position(); - cfg.backup = backup; - cfg.leave_unsealed = leave_unsealed; - cfg.thread_scheduling_group = tsg; - return sst->write_components(mt.make_flush_reader(mt.schema(), pc), mt.partition_count(), mt.schema(), cfg, pc); -} - diff --git a/scylla/database.hh b/scylla/database.hh deleted file mode 100644 index 6666d4a..0000000 --- a/scylla/database.hh +++ /dev/null @@ -1,1385 +0,0 @@ -/* - * Copyright (C) 2014 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#ifndef DATABASE_HH_ -#define DATABASE_HH_ - -#include "dht/i_partitioner.hh" -#include "locator/abstract_replication_strategy.hh" -#include "core/sstring.hh" -#include "core/shared_ptr.hh" -#include "net/byteorder.hh" -#include "utils/UUID_gen.hh" -#include "utils/UUID.hh" -#include "utils/hash.hh" -#include "db_clock.hh" -#include "gc_clock.hh" -#include -#include "core/distributed.hh" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "types.hh" -#include "compound.hh" -#include "core/future.hh" -#include "core/gate.hh" -#include "cql3/column_specification.hh" -#include "db/commitlog/replay_position.hh" -#include -#include -#include "schema.hh" -#include "timestamp.hh" -#include "tombstone.hh" -#include "atomic_cell.hh" -#include "query-request.hh" -#include "keys.hh" -#include "mutation.hh" -#include "memtable.hh" -#include -#include "mutation_reader.hh" -#include "row_cache.hh" -#include "compaction_strategy.hh" -#include "sstables/compaction_manager.hh" -#include "utils/exponential_backoff_retry.hh" -#include "utils/histogram.hh" -#include "utils/estimated_histogram.hh" -#include "sstables/compaction.hh" -#include "sstables/sstable_set.hh" -#include -#include -#include -#include "tracing/trace_state.hh" -#include -#include "db/view/view.hh" -#include "lister.hh" -#include "utils/phased_barrier.hh" -#include "cpu_controller.hh" - -class cell_locker; -class cell_locker_stats; -class locked_cell; - -class frozen_mutation; -class reconcilable_result; - -namespace service { -class storage_proxy; -} - -namespace netw { -class messaging_service; -} - -namespace sstables { - -class sstable; -class entry_descriptor; -} - -namespace ser { -template -class serializer; -} - -namespace db { -class commitlog; -class config; -class rp_handle; - -namespace system_keyspace { -void make(database& db, bool durable, bool volatile_testing_only); -} -} - -class mutation_reordered_with_truncate_exception : public std::exception {}; - -using shared_memtable = lw_shared_ptr; -class memtable_list; - -class dirty_memory_manager: public logalloc::region_group_reclaimer { - // We need a separate boolean, because from the LSA point of view, pressure may still be - // mounting, in which case the pressure flag could be set back on if we force it off. - bool _db_shutdown_requested = false; - - database* _db; - logalloc::region_group _region_group; - - // We would like to serialize the flushing of memtables. While flushing many memtables - // simultaneously can sustain high levels of throughput, the memory is not freed until the - // memtable is totally gone. That means that if we have throttled requests, they will stay - // throttled for a long time. Even when we have virtual dirty, that only provides a rough - // estimate, and we can't release requests that early. - semaphore _flush_serializer; - // We will accept a new flush before another one ends, once it is done with the data write. - // That is so we can keep the disk always busy. But there is still some background work that is - // left to be done. Mostly, update the caches and seal the auxiliary components of the SSTable. - // This semaphore will cap the amount of background work that we have. Note that we're not - // overly concerned about memtable memory, because dirty memory will put a limit to that. This - // is mostly about dangling continuations. So that doesn't have to be a small number. - static constexpr unsigned _max_background_work = 20; - semaphore _background_work_flush_serializer = { _max_background_work }; - condition_variable _should_flush; - int64_t _dirty_bytes_released_pre_accounted = 0; - - future<> flush_when_needed(); - struct flush_permit { - semaphore_units<> permit; - - flush_permit(semaphore_units<>&& permit) : permit(std::move(permit)) {} - }; - - // We need to start a flush before the current one finishes, otherwise - // we'll have a period without significant disk activity when the current - // SSTable is being sealed, the caches are being updated, etc. To do that - // we need to keep track of who is it that we are flushing this memory from. - std::unordered_map _flush_manager; - - future<> _waiting_flush; - virtual void start_reclaiming() noexcept override; - - bool has_pressure() const { - return over_soft_limit(); - } - - seastar::metrics::metric_groups _metrics; -public: - void setup_collectd(sstring namestr); - - future<> shutdown(); - - // Limits and pressure conditions: - // =============================== - // - // Virtual Dirty - // ------------- - // We can't free memory until the whole memtable is flushed because we need to keep it in memory - // until the end, but we can fake freeing memory. When we are done with an element of the - // memtable, we will update the region group pretending memory just went down by that amount. - // - // Because the amount of memory that we pretend to free should be close enough to the actual - // memory used by the memtables, that effectively creates two sub-regions inside the dirty - // region group, of equal size. In the worst case, we will have dirty - // bytes used, and half of that already virtually freed. - // - // Hard Limit - // ---------- - // The total space that can be used by memtables in each group is defined by the threshold, but - // we will only allow the region_group to grow to half of that. This is because of virtual_dirty - // as explained above. Because virtual dirty is implemented by reducing the usage in the - // region_group directly on partition written, we want to throttle every time half of the memory - // as seen by the region_group. To achieve that we need to set the hard limit (first parameter - // of the region_group_reclaimer) to 1/2 of the user-supplied threshold - // - // Soft Limit - // ---------- - // When the soft limit is hit, no throttle happens. The soft limit exists because we don't want - // to start flushing only when the limit is hit, but a bit earlier instead. If we were to start - // flushing only when the hard limit is hit, workloads in which the disk is fast enough to cope - // would see latency added to some requests unnecessarily. - // - // We then set the soft limit to 80 % of the virtual dirty hard limit, which is equal to 40 % of - // the user-supplied threshold. - dirty_memory_manager(database& db, size_t threshold, double soft_limit) - : logalloc::region_group_reclaimer(threshold / 2, threshold * soft_limit / 2) - , _db(&db) - , _region_group(*this) - , _flush_serializer(1) - , _waiting_flush(flush_when_needed()) {} - - dirty_memory_manager() : logalloc::region_group_reclaimer() - , _db(nullptr) - , _region_group(*this) - , _flush_serializer(1) - , _waiting_flush(make_ready_future<>()) {} - - static dirty_memory_manager& from_region_group(logalloc::region_group *rg) { - return *(boost::intrusive::get_parent_from_member(rg, &dirty_memory_manager::_region_group)); - } - - logalloc::region_group& region_group() { - return _region_group; - } - - const logalloc::region_group& region_group() const { - return _region_group; - } - - void revert_potentially_cleaned_up_memory(logalloc::region* from, int64_t delta) { - _region_group.update(delta); - _dirty_bytes_released_pre_accounted -= delta; - } - - void account_potentially_cleaned_up_memory(logalloc::region* from, int64_t delta) { - _region_group.update(-delta); - _dirty_bytes_released_pre_accounted += delta; - } - - // This can be called multiple times during the lifetime of the region, and should always - // ultimately be called after the flush ends. However, some flushers may decide to call it - // earlier. For instance, the normal memtables sealing function will call this before updating - // the cache. - // - // Also, for sealing methods like the normal memtable sealing method - that may retry after a - // failed write, calling this method after the attempt is completed with success or failure is - // mandatory. That's because the new attempt will create a new flush reader for the same - // SSTable, so we need to make sure that we revert the old charges. - void remove_from_flush_manager(const logalloc::region *region) { - auto it = _flush_manager.find(region); - if (it != _flush_manager.end()) { - _flush_manager.erase(it); - } - } - - void add_to_flush_manager(const logalloc::region *region, flush_permit&& permit) { - _flush_manager.emplace(region, std::move(permit)); - } - - size_t real_dirty_memory() const { - return _region_group.memory_used() + _dirty_bytes_released_pre_accounted; - } - - size_t virtual_dirty_memory() const { - return _region_group.memory_used(); - } - - future<> flush_one(memtable_list& cf, semaphore_units<> permit); - - future> get_flush_permit() { - return get_units(_flush_serializer, 1); - } -}; - -extern thread_local dirty_memory_manager default_dirty_memory_manager; - -// We could just add all memtables, regardless of types, to a single list, and -// then filter them out when we read them. Here's why I have chosen not to do -// it: -// -// First, some of the methods in which a memtable is involved (like seal) are -// assume a commitlog, and go through great care of updating the replay -// position, flushing the log, etc. We want to bypass those, and that has to -// be done either by sprikling the seal code with conditionals, or having a -// separate method for each seal. -// -// Also, if we ever want to put some of the memtables in as separate allocator -// region group to provide for extra QoS, having the classes properly wrapped -// will make that trivial: just pass a version of new_memtable() that puts it -// in a different region, while the list approach would require a lot of -// conditionals as well. -// -// If we are going to have different methods, better have different instances -// of a common class. -class memtable_list { -public: - enum class flush_behavior { delayed, immediate }; -private: - std::vector _memtables; - std::function (flush_behavior)> _seal_fn; - std::function _current_schema; - dirty_memory_manager* _dirty_memory_manager; - std::experimental::optional> _flush_coalescing; -public: - memtable_list(std::function (flush_behavior)> seal_fn, std::function cs, dirty_memory_manager* dirty_memory_manager) - : _memtables({}) - , _seal_fn(seal_fn) - , _current_schema(cs) - , _dirty_memory_manager(dirty_memory_manager) { - add_memtable(); - } - - memtable_list(std::function cs, dirty_memory_manager* dirty_memory_manager) - : _memtables({}) - , _seal_fn() - , _current_schema(cs) - , _dirty_memory_manager(dirty_memory_manager) { - add_memtable(); - } - - bool may_flush() const { - return bool(_seal_fn); - } - - shared_memtable back() { - return _memtables.back(); - } - - // The caller has to make sure the element exist before calling this. - void erase(const shared_memtable& element) { - _memtables.erase(boost::range::find(_memtables, element)); - } - void clear() { - _memtables.clear(); - } - - size_t size() const { - return _memtables.size(); - } - - future<> seal_active_memtable(flush_behavior behavior) { - return _seal_fn(behavior); - } - - auto begin() noexcept { - return _memtables.begin(); - } - - auto begin() const noexcept { - return _memtables.begin(); - } - - auto end() noexcept { - return _memtables.end(); - } - - auto end() const noexcept { - return _memtables.end(); - } - - memtable& active_memtable() { - return *_memtables.back(); - } - - void add_memtable() { - _memtables.emplace_back(new_memtable()); - } - - logalloc::region_group& region_group() { - return _dirty_memory_manager->region_group(); - } - // This is used for explicit flushes. Will queue the memtable for flushing and proceed when the - // dirty_memory_manager allows us to. We will not seal at this time since the flush itself - // wouldn't happen anyway. Keeping the memtable in memory will potentially increase the time it - // spends in memory allowing for more coalescing opportunities. - future<> request_flush(); -private: - lw_shared_ptr new_memtable(); -}; - -using sstable_list = sstables::sstable_list; - -// The CF has a "stats" structure. But we don't want all fields here, -// since some of them are fairly complex for exporting to collectd. Also, -// that structure matches what we export via the API, so better leave it -// untouched. And we need more fields. We will summarize it in here what -// we need. -struct cf_stats { - int64_t pending_memtables_flushes_count = 0; - int64_t pending_memtables_flushes_bytes = 0; - - // number of time the clustering filter was executed - int64_t clustering_filter_count = 0; - // sstables considered by the filter (so dividing this by the previous one we get average sstables per read) - int64_t sstables_checked_by_clustering_filter = 0; - // number of times the filter passed the fast-path checks - int64_t clustering_filter_fast_path_count = 0; - // how many sstables survived the clustering key checks - int64_t surviving_sstables_after_clustering_filter = 0; -}; - -class cache_temperature { - float hit_rate; - explicit cache_temperature(uint8_t hr) : hit_rate(hr/255.0f) {} -public: - uint8_t get_serialized_temperature() const { - return hit_rate * 255; - } - cache_temperature() : hit_rate(0) {} - explicit cache_temperature(float hr) : hit_rate(hr) {} - explicit operator float() const { return hit_rate; } - static cache_temperature invalid() { return cache_temperature(-1.0f); } - friend struct ser::serializer; -}; - -class column_family : public enable_lw_shared_from_this { -public: - using timeout_clock = lowres_clock; - - struct config { - sstring datadir; - bool enable_disk_writes = true; - bool enable_disk_reads = true; - bool enable_cache = true; - bool enable_commitlog = true; - bool enable_incremental_backups = false; - ::dirty_memory_manager* dirty_memory_manager = &default_dirty_memory_manager; - ::dirty_memory_manager* streaming_dirty_memory_manager = &default_dirty_memory_manager; - restricted_mutation_reader_config read_concurrency_config; - restricted_mutation_reader_config streaming_read_concurrency_config; - ::cf_stats* cf_stats = nullptr; - seastar::thread_scheduling_group* background_writer_scheduling_group = nullptr; - seastar::thread_scheduling_group* memtable_scheduling_group = nullptr; - }; - struct no_commitlog {}; - struct stats { - /** Number of times flush has resulted in the memtable being switched out. */ - int64_t memtable_switch_count = 0; - /** Estimated number of tasks pending for this column family */ - int64_t pending_flushes = 0; - int64_t live_disk_space_used = 0; - int64_t total_disk_space_used = 0; - int64_t live_sstable_count = 0; - /** Estimated number of compactions pending for this column family */ - int64_t pending_compactions = 0; - utils::timed_rate_moving_average_and_histogram reads{256}; - utils::timed_rate_moving_average_and_histogram writes{256}; - utils::estimated_histogram estimated_read; - utils::estimated_histogram estimated_write; - utils::estimated_histogram estimated_sstable_per_read{35}; - utils::timed_rate_moving_average_and_histogram tombstone_scanned; - utils::timed_rate_moving_average_and_histogram live_scanned; - }; - - struct snapshot_details { - int64_t total; - int64_t live; - }; - struct cache_hit_rate { - cache_temperature rate; - lowres_clock::time_point last_updated; - }; -private: - schema_ptr _schema; - config _config; - mutable stats _stats; - - uint64_t _failed_counter_applies_to_memtable = 0; - - template - void do_apply(db::rp_handle&&, Args&&... args); - - lw_shared_ptr _memtables; - - // In older incarnations, we simply commited the mutations to memtables. - // However, doing that makes it harder for us to provide QoS within the - // disk subsystem. Keeping them in separate memtables allow us to properly - // classify those streams into its own I/O class - // - // We could write those directly to disk, but we still want the mutations - // coming through the wire to go to a memtable staging area. This has two - // major advantages: - // - // first, it will allow us to properly order the partitions. They are - // hopefuly sent in order but we can't really guarantee that without - // sacrificing sender-side parallelism. - // - // second, we will be able to coalesce writes from multiple plan_id's and - // even multiple senders, as well as automatically tapping into the dirty - // memory throttling mechanism, guaranteeing we will not overload the - // server. - lw_shared_ptr _streaming_memtables; - utils::phased_barrier _streaming_flush_phaser; - - // If mutations are fragmented during streaming the sstables cannot be made - // visible immediately after memtable flush, because that could cause - // readers to see only a part of a partition thus violating isolation - // guarantees. - // Mutations that are sent in fragments are kept separately in per-streaming - // plan memtables and the resulting sstables are not made visible until - // the streaming is complete. - struct streaming_memtable_big { - lw_shared_ptr memtables; - std::vector sstables; - seastar::gate flush_in_progress; - }; - std::unordered_map> _streaming_memtables_big; - - future> flush_streaming_big_mutations(utils::UUID plan_id); - void apply_streaming_big_mutation(schema_ptr m_schema, utils::UUID plan_id, const frozen_mutation& m); - future<> seal_active_streaming_memtable_big(streaming_memtable_big& smb); - - lw_shared_ptr make_memory_only_memtable_list(); - lw_shared_ptr make_memtable_list(); - lw_shared_ptr make_streaming_memtable_list(); - lw_shared_ptr make_streaming_memtable_big_list(streaming_memtable_big& smb); - - sstables::compaction_strategy _compaction_strategy; - // generation -> sstable. Ordered by key so we can easily get the most recent. - lw_shared_ptr _sstables; - // sstables that have been compacted (so don't look up in query) but - // have not been deleted yet, so must not GC any tombstones in other sstables - // that may delete data in these sstables: - std::vector _sstables_compacted_but_not_deleted; - // sstables that have been opened but not loaded yet, that's because refresh - // needs to load all opened sstables atomically, and now, we open a sstable - // in all shards at the same time, which makes it hard to store all sstables - // we need to load later on for all shards. - std::vector _sstables_opened_but_not_loaded; - // sstables that are shared between several shards so we want to rewrite - // them (split the data belonging to this shard to a separate sstable), - // but for correct compaction we need to start the compaction only after - // reading all sstables. - std::unordered_map _sstables_need_rewrite; - // Control background fibers waiting for sstables to be deleted - seastar::gate _sstable_deletion_gate; - // There are situations in which we need to stop writing sstables. Flushers will take - // the read lock, and the ones that wish to stop that process will take the write lock. - rwlock _sstables_lock; - mutable row_cache _cache; // Cache covers only sstables. - std::experimental::optional _sstable_generation = {}; - - db::replay_position _highest_rp; - db::replay_position _lowest_allowed_rp; - - // Provided by the database that owns this commitlog - db::commitlog* _commitlog; - compaction_manager& _compaction_manager; - int _compaction_disabled = 0; - utils::phased_barrier _flush_barrier; - seastar::gate _streaming_flush_gate; - std::vector _views; - semaphore _cache_update_sem{1}; - - std::unique_ptr _counter_cell_locks; - void set_metrics(); - seastar::metrics::metric_groups _metrics; - - // holds average cache hit rate of all shards - // recalculated periodically - cache_temperature _global_cache_hit_rate = cache_temperature(0.0f); - - // holds cache hit rates per each node in a cluster - // may not have information for some node, since it fills - // in dynamically - std::unordered_map _cluster_cache_hit_rates; -private: - void update_stats_for_new_sstable(uint64_t disk_space_used_by_sstable, std::vector&& shards_for_the_sstable); - // Adds new sstable to the set of sstables - // Doesn't update the cache. The cache must be synchronized in order for reads to see - // the writes contained in this sstable. - // Cache must be synchronized atomically with this, otherwise write atomicity may not be respected. - // Doesn't trigger compaction. - void add_sstable(lw_shared_ptr sstable, std::vector&& shards_for_the_sstable); - // returns an empty pointer if sstable doesn't belong to current shard. - future> open_sstable(sstables::foreign_sstable_open_info info, sstring dir, - int64_t generation, sstables::sstable::version_types v, sstables::sstable::format_types f); - void load_sstable(lw_shared_ptr& sstable, bool reset_level = false); - lw_shared_ptr new_memtable(); - lw_shared_ptr new_streaming_memtable(); - future try_flush_memtable_to_sstable(lw_shared_ptr memt); - future<> update_cache(memtable&, lw_shared_ptr old_sstables); - struct merge_comparator; - - // update the sstable generation, making sure that new new sstables don't overwrite this one. - void update_sstables_known_generation(unsigned generation) { - if (!_sstable_generation) { - _sstable_generation = 1; - } - _sstable_generation = std::max(*_sstable_generation, generation / smp::count + 1); - } - - uint64_t calculate_generation_for_new_table() { - assert(_sstable_generation); - // FIXME: better way of ensuring we don't attempt to - // overwrite an existing table. - return (*_sstable_generation)++ * smp::count + engine().cpu_id(); - } - - // inverse of calculate_generation_for_new_table(), used to determine which - // shard a sstable should be opened at. - static int64_t calculate_shard_from_sstable_generation(int64_t sstable_generation) { - return sstable_generation % smp::count; - } - - // Rebuild existing _sstables with new_sstables added to it and sstables_to_remove removed from it. - void rebuild_sstable_list(const std::vector& new_sstables, - const std::vector& sstables_to_remove); - void rebuild_statistics(); - - // This function replaces new sstables by their ancestors, which are sstables that needed resharding. - void replace_ancestors_needed_rewrite(std::vector new_sstables); - void remove_ancestors_needed_rewrite(std::unordered_set ancestors); -private: - mutation_source_opt _virtual_reader; - // Creates a mutation reader which covers given sstables. - // Caller needs to ensure that column_family remains live (FIXME: relax this). - // The 'range' parameter must be live as long as the reader is used. - // Mutations returned by the reader will all have given schema. - mutation_reader make_sstable_reader(schema_ptr schema, - lw_shared_ptr sstables, - const dht::partition_range& range, - const query::partition_slice& slice, - const io_priority_class& pc, - tracing::trace_state_ptr trace_state, - streamed_mutation::forwarding fwd, - mutation_reader::forwarding fwd_mr) const; - - mutation_source sstables_as_mutation_source(); - snapshot_source sstables_as_snapshot_source(); - partition_presence_checker make_partition_presence_checker(lw_shared_ptr); - std::chrono::steady_clock::time_point _sstable_writes_disabled_at; - void do_trigger_compaction(); -public: - bool has_shared_sstables() const { - return bool(_sstables_need_rewrite.size()); - } - - sstring dir() const { - return _config.datadir; - } - - uint64_t failed_counter_applies_to_memtable() const { - return _failed_counter_applies_to_memtable; - } - - // This function should be called when this column family is ready for writes, IOW, - // to produce SSTables. Extensive details about why this is important can be found - // in Scylla's Github Issue #1014 - // - // Nothing should be writing to SSTables before we have the chance to populate the - // existing SSTables and calculate what should the next generation number be. - // - // However, if that happens, we want to protect against it in a way that does not - // involve overwriting existing tables. This is one of the ways to do it: every - // column family starts in an unwriteable state, and when it can finally be written - // to, we mark it as writeable. - // - // Note that this *cannot* be a part of add_column_family. That adds a column family - // to a db in memory only, and if anybody is about to write to a CF, that was most - // likely already called. We need to call this explicitly when we are sure we're ready - // to issue disk operations safely. - void mark_ready_for_writes() { - update_sstables_known_generation(0); - } - - // Creates a mutation reader which covers all data sources for this column family. - // Caller needs to ensure that column_family remains live (FIXME: relax this). - // Note: for data queries use query() instead. - // The 'range' parameter must be live as long as the reader is used. - // Mutations returned by the reader will all have given schema. - // If I/O needs to be issued to read anything in the specified range, the operations - // will be scheduled under the priority class given by pc. - mutation_reader make_reader(schema_ptr schema, - const dht::partition_range& range = query::full_partition_range, - const query::partition_slice& slice = query::full_slice, - const io_priority_class& pc = default_priority_class(), - tracing::trace_state_ptr trace_state = nullptr, - streamed_mutation::forwarding fwd = streamed_mutation::forwarding::no, - mutation_reader::forwarding fwd_mr = mutation_reader::forwarding::yes) const; - - // The streaming mutation reader differs from the regular mutation reader in that: - // - Reflects all writes accepted by replica prior to creation of the - // reader and a _bounded_ amount of writes which arrive later. - // - Does not populate the cache - mutation_reader make_streaming_reader(schema_ptr schema, - const dht::partition_range& range = query::full_partition_range) const; - - // Requires ranges to be sorted and disjoint. - mutation_reader make_streaming_reader(schema_ptr schema, - const dht::partition_range_vector& ranges) const; - - mutation_source as_mutation_source() const; - - void set_virtual_reader(mutation_source virtual_reader) { - _virtual_reader = std::move(virtual_reader); - } - - // Queries can be satisfied from multiple data sources, so they are returned - // as temporaries. - // - // FIXME: in case a query is satisfied from a single memtable, avoid a copy - using const_mutation_partition_ptr = std::unique_ptr; - using const_row_ptr = std::unique_ptr; - memtable& active_memtable() { return _memtables->active_memtable(); } - const row_cache& get_row_cache() const { - return _cache; - } - - row_cache& get_row_cache() { - return _cache; - } - - future> lock_counter_cells(const mutation& m, timeout_clock::time_point timeout); - - logalloc::occupancy_stats occupancy() const; -private: - column_family(schema_ptr schema, config cfg, db::commitlog* cl, compaction_manager&, cell_locker_stats& cl_stats); -public: - column_family(schema_ptr schema, config cfg, db::commitlog& cl, compaction_manager& cm, cell_locker_stats& cl_stats) - : column_family(schema, std::move(cfg), &cl, cm, cl_stats) {} - column_family(schema_ptr schema, config cfg, no_commitlog, compaction_manager& cm, cell_locker_stats& cl_stats) - : column_family(schema, std::move(cfg), nullptr, cm, cl_stats) {} - column_family(column_family&&) = delete; // 'this' is being captured during construction - ~column_family(); - const schema_ptr& schema() const { return _schema; } - void set_schema(schema_ptr); - db::commitlog* commitlog() { return _commitlog; } - future find_partition(schema_ptr, const dht::decorated_key& key) const; - future find_partition_slow(schema_ptr, const partition_key& key) const; - future find_row(schema_ptr, const dht::decorated_key& partition_key, clustering_key clustering_key) const; - // Applies given mutation to this column family - // The mutation is always upgraded to current schema. - void apply(const frozen_mutation& m, const schema_ptr& m_schema, db::rp_handle&& = {}); - void apply(const mutation& m, db::rp_handle&& = {}); - void apply_streaming_mutation(schema_ptr, utils::UUID plan_id, const frozen_mutation&, bool fragmented); - - // Returns at most "cmd.limit" rows - future> query(schema_ptr, - const query::read_command& cmd, query::result_request request, - const dht::partition_range_vector& ranges, - tracing::trace_state_ptr trace_state, - query::result_memory_limiter& memory_limiter, - uint64_t max_result_size); - - void start(); - future<> stop(); - future<> flush(); - future<> flush_streaming_mutations(utils::UUID plan_id, dht::partition_range_vector ranges = dht::partition_range_vector{}); - future<> fail_streaming_mutations(utils::UUID plan_id); - future<> clear(); // discards memtable(s) without flushing them to disk. - future discard_sstables(db_clock::time_point); - - // Important warning: disabling writes will only have an effect in the current shard. - // The other shards will keep writing tables at will. Therefore, you very likely need - // to call this separately in all shards first, to guarantee that none of them are writing - // new data before you can safely assume that the whole node is disabled. - future disable_sstable_write() { - _sstable_writes_disabled_at = std::chrono::steady_clock::now(); - return _sstables_lock.write_lock().then([this] { - if (_sstables->all()->empty()) { - return make_ready_future(0); - } - int64_t max = 0; - for (auto&& s : *_sstables->all()) { - max = std::max(max, s->generation()); - } - return make_ready_future(max); - }); - } - - // SSTable writes are now allowed again, and generation is updated to new_generation if != -1 - // returns the amount of microseconds elapsed since we disabled writes. - std::chrono::steady_clock::duration enable_sstable_write(int64_t new_generation) { - if (new_generation != -1) { - update_sstables_known_generation(new_generation); - } - _sstables_lock.write_unlock(); - return std::chrono::steady_clock::now() - _sstable_writes_disabled_at; - } - - // Make sure the generation numbers are sequential, starting from "start". - // Generations before "start" are left untouched. - // - // Return the highest generation number seen so far - // - // Word of warning: although this function will reshuffle anything over "start", it is - // very dangerous to do that with live SSTables. This is meant to be used with SSTables - // that are not yet managed by the system. - // - // Parameter all_generations stores the generation of all SSTables in the system, so it - // will be easy to determine which SSTable is new. - // An example usage would query all shards asking what is the highest SSTable number known - // to them, and then pass that + 1 as "start". - future> reshuffle_sstables(std::set all_generations, int64_t start); - - // FIXME: this is just an example, should be changed to something more - // general. compact_all_sstables() starts a compaction of all sstables. - // It doesn't flush the current memtable first. It's just a ad-hoc method, - // not a real compaction policy. - future<> compact_all_sstables(); - // Compact all sstables provided in the vector. - // If cleanup is set to true, compaction_sstables will run on behalf of a cleanup job, - // meaning that irrelevant keys will be discarded. - future<> compact_sstables(sstables::compaction_descriptor descriptor, bool cleanup = false); - // Performs a cleanup on each sstable of this column family, excluding - // those ones that are irrelevant to this node or being compacted. - // Cleanup is about discarding keys that are no longer relevant for a - // given sstable, e.g. after node loses part of its token range because - // of a newly added node. - future<> cleanup_sstables(sstables::compaction_descriptor descriptor); - - future snapshot_exists(sstring name); - - db::replay_position set_low_replay_position_mark(); - - future<> snapshot(sstring name); - future> get_snapshot_details(); - - const bool incremental_backups_enabled() const { - return _config.enable_incremental_backups; - } - - void set_incremental_backups(bool val) { - _config.enable_incremental_backups = val; - } - - const sstables::sstable_set& get_sstable_set() const; - lw_shared_ptr get_sstables() const; - lw_shared_ptr get_sstables_including_compacted_undeleted() const; - const std::vector& compacted_undeleted_sstables() const; - std::vector select_sstables(const dht::partition_range& range) const; - std::vector candidates_for_compaction() const; - std::vector sstables_need_rewrite() const; - size_t sstables_count() const; - std::vector sstable_count_per_level() const; - int64_t get_unleveled_sstables() const; - - void start_compaction(); - void trigger_compaction(); - future<> run_compaction(sstables::compaction_descriptor descriptor); - void set_compaction_strategy(sstables::compaction_strategy_type strategy); - const sstables::compaction_strategy& get_compaction_strategy() const { - return _compaction_strategy; - } - - sstables::compaction_strategy& get_compaction_strategy() { - return _compaction_strategy; - } - - const stats& get_stats() const { - return _stats; - } - - ::cf_stats* cf_stats() { - return _config.cf_stats; - } - - seastar::thread_scheduling_group* background_writer_scheduling_group() { - return _config.background_writer_scheduling_group; - } - - compaction_manager& get_compaction_manager() const { - return _compaction_manager; - } - - cache_temperature get_global_cache_hit_rate() const { - return _global_cache_hit_rate; - } - - void set_global_cache_hit_rate(cache_temperature rate) { - _global_cache_hit_rate = rate; - } - - void set_hit_rate(gms::inet_address addr, cache_temperature rate); - cache_hit_rate get_hit_rate(gms::inet_address addr); - void drop_hit_rate(gms::inet_address addr); - - template>> - Result run_with_compaction_disabled(Func && func) { - ++_compaction_disabled; - return _compaction_manager.remove(this).then(std::forward(func)).finally([this] { - if (--_compaction_disabled == 0) { - // we're turning if on again, use function that does not increment - // the counter further. - do_trigger_compaction(); - } - }); - } - - void add_or_update_view(view_ptr v); - void remove_view(view_ptr v); - const std::vector& views() const; - future<> push_view_replica_updates(const schema_ptr& s, const frozen_mutation& fm) const; -private: - std::vector affected_views(const schema_ptr& base, const mutation& update) const; - future<> generate_and_propagate_view_updates(const schema_ptr& base, - std::vector&& views, - mutation&& m, - streamed_mutation_opt existings) const; - - // One does not need to wait on this future if all we are interested in, is - // initiating the write. The writes initiated here will eventually - // complete, and the seastar::gate below will make sure they are all - // completed before we stop() this column_family. - // - // But it is possible to synchronously wait for the seal to complete by - // waiting on this future. This is useful in situations where we want to - // synchronously flush data to disk. - future<> seal_active_memtable(memtable_list::flush_behavior behavior = memtable_list::flush_behavior::delayed); - - // I am assuming here that the repair process will potentially send ranges containing - // few mutations, definitely not enough to fill a memtable. It wants to know whether or - // not each of those ranges individually succeeded or failed, so we need a future for - // each. - // - // One of the ways to fix that, is changing the repair itself to send more mutations at - // a single batch. But relying on that is a bad idea for two reasons: - // - // First, the goals of the SSTable writer and the repair sender are at odds. The SSTable - // writer wants to write as few SSTables as possible, while the repair sender wants to - // break down the range in pieces as small as it can and checksum them individually, so - // it doesn't have to send a lot of mutations for no reason. - // - // Second, even if the repair process wants to process larger ranges at once, some ranges - // themselves may be small. So while most ranges would be large, we would still have - // potentially some fairly small SSTables lying around. - // - // The best course of action in this case is to coalesce the incoming streams write-side. - // repair can now choose whatever strategy - small or big ranges - it wants, resting assure - // that the incoming memtables will be coalesced together. - shared_promise<> _waiting_streaming_flushes; - timer<> _delayed_streaming_flush{[this] { _streaming_memtables->request_flush(); }}; - future<> seal_active_streaming_memtable_delayed(); - future<> seal_active_streaming_memtable_immediate(); - future<> seal_active_streaming_memtable(memtable_list::flush_behavior behavior) { - if (behavior == memtable_list::flush_behavior::delayed) { - return seal_active_streaming_memtable_delayed(); - } else if (behavior == memtable_list::flush_behavior::immediate) { - return seal_active_streaming_memtable_immediate(); - } else { - // Impossible - assert(0); - } - } - - // filter manifest.json files out - static bool manifest_json_filter(const lister::path&, const directory_entry& entry); - - // Iterate over all partitions. Protocol is the same as std::all_of(), - // so that iteration can be stopped by returning false. - // Func signature: bool (const decorated_key& dk, const mutation_partition& mp) - template - future for_all_partitions(schema_ptr, Func&& func) const; - void check_valid_rp(const db::replay_position&) const; -public: - // Iterate over all partitions. Protocol is the same as std::all_of(), - // so that iteration can be stopped by returning false. - future for_all_partitions_slow(schema_ptr, std::function func) const; - - friend std::ostream& operator<<(std::ostream& out, const column_family& cf); - // Testing purposes. - friend class column_family_test; - - friend class distributed_loader; -}; - -class user_types_metadata { - std::unordered_map _user_types; -public: - user_type get_type(const bytes& name) const { - return _user_types.at(name); - } - const std::unordered_map& get_all_types() const { - return _user_types; - } - void add_type(user_type type) { - auto i = _user_types.find(type->_name); - assert(i == _user_types.end() || type->is_compatible_with(*i->second)); - _user_types[type->_name] = std::move(type); - } - void remove_type(user_type type) { - _user_types.erase(type->_name); - } - friend std::ostream& operator<<(std::ostream& os, const user_types_metadata& m); -}; - -class keyspace_metadata final { - sstring _name; - sstring _strategy_name; - std::map _strategy_options; - std::unordered_map _cf_meta_data; - bool _durable_writes; - lw_shared_ptr _user_types; -public: - keyspace_metadata(sstring name, - sstring strategy_name, - std::map strategy_options, - bool durable_writes, - std::vector cf_defs = std::vector{}, - lw_shared_ptr user_types = make_lw_shared()) - : _name{std::move(name)} - , _strategy_name{strategy_name.empty() ? "NetworkTopologyStrategy" : strategy_name} - , _strategy_options{std::move(strategy_options)} - , _durable_writes{durable_writes} - , _user_types{std::move(user_types)} - { - for (auto&& s : cf_defs) { - _cf_meta_data.emplace(s->cf_name(), s); - } - } - static lw_shared_ptr - new_keyspace(sstring name, - sstring strategy_name, - std::map options, - bool durables_writes, - std::vector cf_defs = std::vector{}) - { - return ::make_lw_shared(name, strategy_name, options, durables_writes, cf_defs); - } - void validate() const; - const sstring& name() const { - return _name; - } - const sstring& strategy_name() const { - return _strategy_name; - } - const std::map& strategy_options() const { - return _strategy_options; - } - const std::unordered_map& cf_meta_data() const { - return _cf_meta_data; - } - bool durable_writes() const { - return _durable_writes; - } - const lw_shared_ptr& user_types() const { - return _user_types; - } - void add_or_update_column_family(const schema_ptr& s) { - _cf_meta_data[s->cf_name()] = s; - } - void remove_column_family(const schema_ptr& s) { - _cf_meta_data.erase(s->cf_name()); - } - void add_user_type(const user_type ut) { - _user_types->add_type(ut); - } - void remove_user_type(const user_type ut) { - _user_types->remove_type(ut); - } - std::vector tables() const; - std::vector views() const; - friend std::ostream& operator<<(std::ostream& os, const keyspace_metadata& m); -}; - -class keyspace { -public: - struct config { - sstring datadir; - bool enable_commitlog = true; - bool enable_disk_reads = true; - bool enable_disk_writes = true; - bool enable_cache = true; - bool enable_incremental_backups = false; - ::dirty_memory_manager* dirty_memory_manager = &default_dirty_memory_manager; - ::dirty_memory_manager* streaming_dirty_memory_manager = &default_dirty_memory_manager; - restricted_mutation_reader_config read_concurrency_config; - restricted_mutation_reader_config streaming_read_concurrency_config; - ::cf_stats* cf_stats = nullptr; - seastar::thread_scheduling_group* background_writer_scheduling_group = nullptr; - seastar::thread_scheduling_group* memtable_scheduling_group = nullptr; - }; -private: - std::unique_ptr _replication_strategy; - lw_shared_ptr _metadata; - config _config; -public: - explicit keyspace(lw_shared_ptr metadata, config cfg) - : _metadata(std::move(metadata)) - , _config(std::move(cfg)) - {} - - void update_from(lw_shared_ptr); - - /** Note: return by shared pointer value, since the meta data is - * semi-volatile. I.e. we could do alter keyspace at any time, and - * boom, it is replaced. - */ - lw_shared_ptr metadata() const { - return _metadata; - } - void create_replication_strategy(const std::map& options); - /** - * This should not really be return by reference, since replication - * strategy is also volatile in that it could be replaced at "any" time. - * However, all current uses at least are "instantateous", i.e. does not - * carry it across a continuation. So it is sort of same for now, but - * should eventually be refactored. - */ - locator::abstract_replication_strategy& get_replication_strategy(); - const locator::abstract_replication_strategy& get_replication_strategy() const; - column_family::config make_column_family_config(const schema& s, const db::config& db_config) const; - future<> make_directory_for_column_family(const sstring& name, utils::UUID uuid); - void add_or_update_column_family(const schema_ptr& s) { - _metadata->add_or_update_column_family(s); - } - void add_user_type(const user_type ut) { - _metadata->add_user_type(ut); - } - void remove_user_type(const user_type ut) { - _metadata->remove_user_type(ut); - } - - // FIXME to allow simple registration at boostrap - void set_replication_strategy(std::unique_ptr replication_strategy); - - const bool incremental_backups_enabled() const { - return _config.enable_incremental_backups; - } - - void set_incremental_backups(bool val) { - _config.enable_incremental_backups = val; - } - - const sstring& datadir() const { - return _config.datadir; - } - - sstring column_family_directory(const sstring& name, utils::UUID uuid) const; -}; - -class no_such_keyspace : public std::runtime_error { -public: - no_such_keyspace(const sstring& ks_name); -}; - -class no_such_column_family : public std::runtime_error { -public: - no_such_column_family(const utils::UUID& uuid); - no_such_column_family(const sstring& ks_name, const sstring& cf_name); -}; - -// Policy for distributed: -// broadcast metadata writes -// local metadata reads -// use shard_of() for data - -class database { -public: - using timeout_clock = lowres_clock; -private: - ::cf_stats _cf_stats; - static constexpr size_t max_concurrent_reads() { return 100; } - static constexpr size_t max_streaming_concurrent_reads() { return 10; } // They're rather heavyweight, so limit more - static constexpr size_t max_system_concurrent_reads() { return 10; } - static constexpr size_t max_concurrent_sstable_loads() { return 3; } - struct db_stats { - uint64_t total_writes = 0; - uint64_t total_writes_failed = 0; - uint64_t total_writes_timedout = 0; - uint64_t total_reads = 0; - uint64_t total_reads_failed = 0; - uint64_t sstable_read_queue_overloaded = 0; - - uint64_t short_data_queries = 0; - uint64_t short_mutation_queries = 0; - }; - - lw_shared_ptr _stats; - std::unique_ptr _cl_stats; - - std::unique_ptr _cfg; - - dirty_memory_manager _system_dirty_memory_manager; - dirty_memory_manager _dirty_memory_manager; - dirty_memory_manager _streaming_dirty_memory_manager; - - seastar::thread_scheduling_group _background_writer_scheduling_group; - flush_cpu_controller _memtable_cpu_controller; - - semaphore _read_concurrency_sem{max_concurrent_reads()}; - semaphore _streaming_concurrency_sem{max_streaming_concurrent_reads()}; - restricted_mutation_reader_config _read_concurrency_config; - semaphore _system_read_concurrency_sem{max_system_concurrent_reads()}; - restricted_mutation_reader_config _system_read_concurrency_config; - - semaphore _sstable_load_concurrency_sem{max_concurrent_sstable_loads()}; - - std::unordered_map _keyspaces; - std::unordered_map> _column_families; - std::unordered_map, utils::UUID, utils::tuple_hash> _ks_cf_to_uuid; - std::unique_ptr _commitlog; - utils::UUID _version; - // compaction_manager object is referenced by all column families of a database. - compaction_manager _compaction_manager; - seastar::metrics::metric_groups _metrics; - bool _enable_incremental_backups = false; - - future<> init_commitlog(); - future<> apply_in_memory(const frozen_mutation& m, schema_ptr m_schema, db::rp_handle&&, timeout_clock::time_point timeout); - future<> apply_in_memory(const mutation& m, column_family& cf, db::rp_handle&&, timeout_clock::time_point timeout); -private: - // Unless you are an earlier boostraper or the database itself, you should - // not be using this directly. Go for the public create_keyspace instead. - void add_keyspace(sstring name, keyspace k); - void create_in_memory_keyspace(const lw_shared_ptr& ksm); - friend void db::system_keyspace::make(database& db, bool durable, bool volatile_testing_only); - void setup_metrics(); - - friend class db_apply_executor; - future<> do_apply(schema_ptr, const frozen_mutation&, timeout_clock::time_point timeout); - future<> apply_with_commitlog(schema_ptr, column_family&, utils::UUID, const frozen_mutation&, timeout_clock::time_point timeout); - future<> apply_with_commitlog(column_family& cf, const mutation& m, timeout_clock::time_point timeout); - - query::result_memory_limiter _result_memory_limiter; - - future do_apply_counter_update(column_family& cf, const frozen_mutation& fm, schema_ptr m_schema, timeout_clock::time_point timeout, - tracing::trace_state_ptr trace_state); -public: - static utils::UUID empty_version; - - query::result_memory_limiter& get_result_memory_limiter() { - return _result_memory_limiter; - } - - void set_enable_incremental_backups(bool val) { _enable_incremental_backups = val; } - - future<> parse_system_tables(distributed&); - database(); - database(const db::config&); - database(database&&) = delete; - ~database(); - - void update_version(const utils::UUID& version); - - const utils::UUID& get_version() const; - - db::commitlog* commitlog() const { - return _commitlog.get(); - } - - compaction_manager& get_compaction_manager() { - return _compaction_manager; - } - const compaction_manager& get_compaction_manager() const { - return _compaction_manager; - } - - void add_column_family(keyspace& ks, schema_ptr schema, column_family::config cfg); - future<> add_column_family_and_make_directory(schema_ptr schema); - - /* throws std::out_of_range if missing */ - const utils::UUID& find_uuid(const sstring& ks, const sstring& cf) const; - const utils::UUID& find_uuid(const schema_ptr&) const; - - /** - * Creates a keyspace for a given metadata if it still doesn't exist. - * - * @return ready future when the operation is complete - */ - future<> create_keyspace(const lw_shared_ptr&); - /* below, find_keyspace throws no_such_ on fail */ - keyspace& find_keyspace(const sstring& name); - const keyspace& find_keyspace(const sstring& name) const; - bool has_keyspace(const sstring& name) const; - future<> update_keyspace(const sstring& name); - void drop_keyspace(const sstring& name); - const auto& keyspaces() const { return _keyspaces; } - std::vector get_non_system_keyspaces() const; - column_family& find_column_family(const sstring& ks, const sstring& name); - const column_family& find_column_family(const sstring& ks, const sstring& name) const; - column_family& find_column_family(const utils::UUID&); - const column_family& find_column_family(const utils::UUID&) const; - column_family& find_column_family(const schema_ptr&); - const column_family& find_column_family(const schema_ptr&) const; - bool column_family_exists(const utils::UUID& uuid) const; - schema_ptr find_schema(const sstring& ks_name, const sstring& cf_name) const; - schema_ptr find_schema(const utils::UUID&) const; - bool has_schema(const sstring& ks_name, const sstring& cf_name) const; - std::set existing_index_names(const sstring& ks_name, const sstring& cf_to_exclude = sstring()) const; - sstring get_available_index_name(const sstring& ks_name, const sstring& cf_name, - std::experimental::optional index_name_root) const; - schema_ptr find_indexed_table(const sstring& ks_name, const sstring& index_name) const; - future<> stop(); - unsigned shard_of(const dht::token& t); - unsigned shard_of(const mutation& m); - unsigned shard_of(const frozen_mutation& m); - future, cache_temperature> query(schema_ptr, const query::read_command& cmd, query::result_request request, const dht::partition_range_vector& ranges, - tracing::trace_state_ptr trace_state, uint64_t max_result_size); - future query_mutations(schema_ptr, const query::read_command& cmd, const dht::partition_range& range, - query::result_memory_accounter&& accounter, tracing::trace_state_ptr trace_state); - // Apply the mutation atomically. - // Throws timed_out_error when timeout is reached. - future<> apply(schema_ptr, const frozen_mutation&, timeout_clock::time_point timeout = timeout_clock::time_point::max()); - future<> apply_streaming_mutation(schema_ptr, utils::UUID plan_id, const frozen_mutation&, bool fragmented); - future apply_counter_update(schema_ptr, const frozen_mutation& m, timeout_clock::time_point timeout, tracing::trace_state_ptr trace_state); - keyspace::config make_keyspace_config(const keyspace_metadata& ksm); - const sstring& get_snitch_name() const; - future<> clear_snapshot(sstring tag, std::vector keyspace_names); - - friend std::ostream& operator<<(std::ostream& out, const database& db); - const std::unordered_map& get_keyspaces() const { - return _keyspaces; - } - - std::unordered_map& get_keyspaces() { - return _keyspaces; - } - - const std::unordered_map>& get_column_families() const { - return _column_families; - } - - std::unordered_map>& get_column_families() { - return _column_families; - } - - std::vector> get_non_system_column_families() const; - - const std::unordered_map, utils::UUID, utils::tuple_hash>& - get_column_families_mapping() const { - return _ks_cf_to_uuid; - } - - const db::config& get_config() const { - return *_cfg; - } - - future<> flush_all_memtables(); - - // See #937. Truncation now requires a callback to get a time stamp - // that must be guaranteed to be the same for all shards. - typedef std::function()> timestamp_func; - - /** Truncates the given column family */ - future<> truncate(sstring ksname, sstring cfname, timestamp_func); - future<> truncate(const keyspace& ks, column_family& cf, timestamp_func, bool with_snapshot = true); - - bool update_column_family(schema_ptr s); - future<> drop_column_family(const sstring& ks_name, const sstring& cf_name, timestamp_func, bool with_snapshot = true); - void remove(const column_family&); - - const logalloc::region_group& dirty_memory_region_group() const { - return _dirty_memory_manager.region_group(); - } - - std::unordered_set get_initial_tokens(); - std::experimental::optional get_replace_address(); - bool is_replacing(); - semaphore& system_keyspace_read_concurrency_sem() { - return _system_read_concurrency_sem; - } - semaphore& sstable_load_concurrency_sem() { - return _sstable_load_concurrency_sem; - } - void register_connection_drop_notifier(netw::messaging_service& ms); - - friend class distributed_loader; -}; - -// FIXME: stub -class secondary_index_manager {}; - -future<> update_schema_version_and_announce(distributed& proxy); - -class distributed_loader { -public: - static void reshard(distributed& db, sstring ks_name, sstring cf_name); - static future<> open_sstable(distributed& db, sstables::entry_descriptor comps, - std::function (column_family&, sstables::foreign_sstable_open_info)> func, - const io_priority_class& pc = default_priority_class()); - static future<> load_new_sstables(distributed& db, sstring ks, sstring cf, std::vector new_tables); - static future> flush_upload_dir(distributed& db, sstring ks_name, sstring cf_name); - static future probe_file(distributed& db, sstring sstdir, sstring fname); - static future<> populate_column_family(distributed& db, sstring sstdir, sstring ks, sstring cf); - static future<> populate_keyspace(distributed& db, sstring datadir, sstring ks_name); - static future<> init_system_keyspace(distributed& db); - static future<> ensure_system_table_directories(distributed& db); - static future<> init_non_system_keyspaces(distributed& db, distributed& proxy); -}; - -#endif /* DATABASE_HH_ */ diff --git a/scylla/database_fwd.hh b/scylla/database_fwd.hh deleted file mode 100644 index c2e3734..0000000 --- a/scylla/database_fwd.hh +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -// database.hh -class database; -class memtable_list; - -// mutation.hh -class mutation; -class mutation_partition; - -// schema.hh -class schema; -class column_definition; -class column_mapping; - -// schema_mutations.hh -class schema_mutations; - -// keys.hh -class exploded_clustering_prefix; -class partition_key; -class partition_key_view; -class clustering_key_prefix; -class clustering_key_prefix_view; -using clustering_key = clustering_key_prefix; -using clustering_key_view = clustering_key_prefix_view; - -// memtable.hh -class memtable; diff --git a/scylla/db/batchlog_manager.cc b/scylla/db/batchlog_manager.cc deleted file mode 100644 index b54da5d..0000000 --- a/scylla/db/batchlog_manager.cc +++ /dev/null @@ -1,409 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include -#include -#include -#include -#include -#include -#include - -#include "batchlog_manager.hh" -#include "canonical_mutation.hh" -#include "service/storage_service.hh" -#include "service/storage_proxy.hh" -#include "system_keyspace.hh" -#include "utils/rate_limiter.hh" -#include "log.hh" -#include "serializer.hh" -#include "db_clock.hh" -#include "database.hh" -#include "unimplemented.hh" -#include "db/config.hh" -#include "gms/failure_detector.hh" -#include "service/storage_service.hh" -#include "schema_registry.hh" -#include "idl/uuid.dist.hh" -#include "idl/frozen_schema.dist.hh" -#include "serializer_impl.hh" -#include "serialization_visitors.hh" -#include "idl/uuid.dist.impl.hh" -#include "idl/frozen_schema.dist.impl.hh" -#include "message/messaging_service.hh" - -static logging::logger blogger("batchlog_manager"); - -const uint32_t db::batchlog_manager::replay_interval; -const uint32_t db::batchlog_manager::page_size; - -db::batchlog_manager::batchlog_manager(distributed& qp) - : _qp(qp.local()) - , _e1(_rd()) { - namespace sm = seastar::metrics; - - _metrics.add_group("batchlog_manager", { - sm::make_derive("total_write_replay_attempts", _stats.write_attempts, - sm::description("Counts write operations issued in a batchlog replay flow. " - "The high value of this metric indicates that we have a long batch replay list.")), - }); -} - -future<> db::batchlog_manager::do_batch_log_replay() { - // Use with_semaphore is much simpler, but nested invoke_on can - // cause deadlock. - return get_batchlog_manager().invoke_on(0, [] (auto& bm) { - return bm._sem.wait().then([&bm] { - return bm._cpu++ % smp::count; - }); - }).then([] (auto dest) { - blogger.debug("Batchlog replay on shard {}: starts", dest); - return get_batchlog_manager().invoke_on(dest, [] (auto& bm) { - return bm.replay_all_failed_batches(); - }).then([dest] { - blogger.debug("Batchlog replay on shard {}: done", dest); - }); - }).finally([] { - return get_batchlog_manager().invoke_on(0, [] (auto& bm) { - return bm._sem.signal(); - }); - }); -} - -future<> db::batchlog_manager::start() { - // Since replay is a "node global" operation, we should not attempt to do - // it in parallel on each shard. It will just overlap/interfere. To - // simplify syncing between the timer and user initiated replay operations, - // we use the _timer and _sem on shard zero only. Replaying batchlog can - // generate a lot of work, so we distrute the real work on all cpus with - // round-robin scheduling. - if (engine().cpu_id() == 0) { - _timer.set_callback([this] { - return do_batch_log_replay().handle_exception([] (auto ep) { - blogger.error("Exception in batch replay: {}", ep); - }).finally([this] { - _timer.arm(lowres_clock::now() + std::chrono::milliseconds(replay_interval)); - }); - }); - auto ring_delay = service::get_local_storage_service().get_ring_delay(); - _timer.arm(lowres_clock::now() + ring_delay); - } - return make_ready_future<>(); -} - -future<> db::batchlog_manager::stop() { - if (_stop) { - return make_ready_future<>(); - } - _stop = true; - _timer.cancel(); - return _gate.close(); -} - -future db::batchlog_manager::count_all_batches() const { - sstring query = sprint("SELECT count(*) FROM %s.%s", system_keyspace::NAME, system_keyspace::BATCHLOG); - return _qp.execute_internal(query).then([](::shared_ptr rs) { - return size_t(rs->one().get_as("count")); - }); -} - -mutation db::batchlog_manager::get_batch_log_mutation_for(const std::vector& mutations, const utils::UUID& id, int32_t version) { - return get_batch_log_mutation_for(mutations, id, version, db_clock::now()); -} - -mutation db::batchlog_manager::get_batch_log_mutation_for(const std::vector& mutations, const utils::UUID& id, int32_t version, db_clock::time_point now) { - auto schema = _qp.db().local().find_schema(system_keyspace::NAME, system_keyspace::BATCHLOG); - auto key = partition_key::from_singular(*schema, id); - auto timestamp = api::new_timestamp(); - auto data = [this, &mutations] { - std::vector fm(mutations.begin(), mutations.end()); - bytes_ostream out; - for (auto& m : fm) { - ser::serialize(out, m); - } - return to_bytes(out.linearize()); - }(); - - mutation m(key, schema); - m.set_cell(clustering_key_prefix::make_empty(), to_bytes("version"), version, timestamp); - m.set_cell(clustering_key_prefix::make_empty(), to_bytes("written_at"), now, timestamp); - m.set_cell(clustering_key_prefix::make_empty(), to_bytes("data"), data_value(std::move(data)), timestamp); - - return m; -} - -db_clock::duration db::batchlog_manager::get_batch_log_timeout() const { - // enough time for the actual write + BM removal mutation - return db_clock::duration(_qp.db().local().get_config().write_request_timeout_in_ms()) * 2; -} - -future<> db::batchlog_manager::replay_all_failed_batches() { - typedef db_clock::rep clock_type; - - // rate limit is in bytes per second. Uses Double.MAX_VALUE if disabled (set to 0 in cassandra.yaml). - // max rate is scaled by the number of nodes in the cluster (same as for HHOM - see CASSANDRA-5272). - auto throttle_in_kb = _qp.db().local().get_config().batchlog_replay_throttle_in_kb() / service::get_storage_service().local().get_token_metadata().get_all_endpoints().size(); - auto limiter = make_lw_shared(throttle_in_kb * 1000); - - auto batch = [this, limiter](const cql3::untyped_result_set::row& row) { - auto written_at = row.get_as("written_at"); - auto id = row.get_as("id"); - // enough time for the actual write + batchlog entry mutation delivery (two separate requests). - auto timeout = get_batch_log_timeout(); - if (db_clock::now() < written_at + timeout) { - blogger.debug("Skipping replay of {}, too fresh", id); - return make_ready_future<>(); - } - - // check version of serialization format - if (!row.has("version")) { - blogger.warn("Skipping logged batch because of unknown version"); - return make_ready_future<>(); - } - - auto version = row.get_as("version"); - if (version != netw::messaging_service::current_version) { - blogger.warn("Skipping logged batch because of incorrect version"); - return make_ready_future<>(); - } - - auto data = row.get_blob("data"); - - blogger.debug("Replaying batch {}", id); - - auto fms = make_lw_shared>(); - auto in = ser::as_input_stream(data); - while (in.size()) { - fms->emplace_back(ser::deserialize(in, boost::type())); - } - - auto size = data.size(); - - return map_reduce(*fms, [this, written_at] (canonical_mutation& fm) { - return system_keyspace::get_truncated_at(fm.column_family_id()).then([written_at, &fm] (db_clock::time_point t) -> - std::experimental::optional> { - if (written_at > t) { - return { std::ref(fm) }; - } else { - return {}; - } - }); - }, - std::vector(), - [this] (std::vector mutations, std::experimental::optional> fm) { - if (fm) { - schema_ptr s = _qp.db().local().find_schema(fm.value().get().column_family_id()); - mutations.emplace_back(fm.value().get().to_mutation(s)); - } - return mutations; - }).then([this, id, limiter, written_at, size, fms] (std::vector mutations) { - if (mutations.empty()) { - return make_ready_future<>(); - } - const auto ttl = [this, &mutations, written_at]() -> clock_type { - /* - * Calculate ttl for the mutations' hints (and reduce ttl by the time the mutations spent in the batchlog). - * This ensures that deletes aren't "undone" by an old batch replay. - */ - auto unadjusted_ttl = std::numeric_limits::max(); - warn(unimplemented::cause::HINT); -#if 0 - for (auto& m : *mutations) { - unadjustedTTL = Math.min(unadjustedTTL, HintedHandOffManager.calculateHintTTL(mutation)); - } -#endif - return unadjusted_ttl - std::chrono::duration_cast(db_clock::now() - written_at).count(); - }(); - - if (ttl <= 0) { - return make_ready_future<>(); - } - // Origin does the send manually, however I can't see a super great reason to do so. - // Our normal write path does not add much redundancy to the dispatch, and rate is handled after send - // in both cases. - // FIXME: verify that the above is reasonably true. - return limiter->reserve(size).then([this, mutations = std::move(mutations), id] { - _stats.write_attempts += mutations.size(); - // #1222 - change cl level to ALL, emulating origins behaviour of sending/hinting - // to all natural end points. - // Note however that origin uses hints here, and actually allows for this - // send to partially or wholly fail in actually sending stuff. Since we don't - // have hints (yet), send with CL=ALL, and hope we can re-do this soon. - // See below, we use retry on write failure. - return _qp.proxy().local().mutate(mutations, db::consistency_level::ALL, nullptr); - }); - }).then_wrapped([this, id](future<> batch_result) { - try { - batch_result.get(); - } catch (no_such_keyspace& ex) { - // should probably ignore and drop the batch - } catch (...) { - // timeout, overload etc. - // Do _not_ remove the batch, assuning we got a node write error. - // Since we don't have hints (which origin is satisfied with), - // we have to resort to keeping this batch to next lap. - return make_ready_future<>(); - } - // delete batch - auto schema = _qp.db().local().find_schema(system_keyspace::NAME, system_keyspace::BATCHLOG); - auto key = partition_key::from_singular(*schema, id); - mutation m(key, schema); - auto now = service::client_state(service::client_state::internal_tag()).get_timestamp(); - m.partition().apply_delete(*schema, clustering_key_prefix::make_empty(), tombstone(now, gc_clock::now())); - return _qp.proxy().local().mutate_locally(m); - }); - }; - - return seastar::with_gate(_gate, [this, batch = std::move(batch)] { - blogger.debug("Started replayAllFailedBatches (cpu {})", engine().cpu_id()); - - typedef ::shared_ptr page_ptr; - sstring query = sprint("SELECT id, data, written_at, version FROM %s.%s LIMIT %d", system_keyspace::NAME, system_keyspace::BATCHLOG, page_size); - return _qp.execute_internal(query).then([this, batch = std::move(batch)](page_ptr page) { - return do_with(std::move(page), [this, batch = std::move(batch)](page_ptr & page) mutable { - return repeat([this, &page, batch = std::move(batch)]() mutable { - if (page->empty()) { - return make_ready_future(stop_iteration::yes); - } - auto id = page->back().get_as("id"); - return parallel_for_each(*page, batch).then([this, &page, id]() { - if (page->size() < page_size) { - return make_ready_future(stop_iteration::yes); // we've exhausted the batchlog, next query would be empty. - } - sstring query = sprint("SELECT id, data, written_at, version FROM %s.%s WHERE token(id) > token(?) LIMIT %d", - system_keyspace::NAME, - system_keyspace::BATCHLOG, - page_size); - return _qp.execute_internal(query, {id}).then([&page](auto res) { - page = std::move(res); - return make_ready_future(stop_iteration::no); - }); - }); - }); - }); - }).then([this] { - // TODO FIXME : cleanup() -#if 0 - ColumnFamilyStore cfs = Keyspace.open(SystemKeyspace.NAME).getColumnFamilyStore(SystemKeyspace.BATCHLOG); - cfs.forceBlockingFlush(); - Collection descriptors = new ArrayList<>(); - for (SSTableReader sstr : cfs.getSSTables()) - descriptors.add(sstr.descriptor); - if (!descriptors.isEmpty()) // don't pollute the logs if there is nothing to compact. - CompactionManager.instance.submitUserDefined(cfs, descriptors, Integer.MAX_VALUE).get(); - -#endif - - }).then([this] { - blogger.debug("Finished replayAllFailedBatches"); - }); - }); -} - -std::unordered_set db::batchlog_manager::endpoint_filter(const sstring& local_rack, const std::unordered_map>& endpoints) { - // special case for single-node data centers - if (endpoints.size() == 1 && endpoints.begin()->second.size() == 1) { - return endpoints.begin()->second; - } - - // strip out dead endpoints and localhost - std::unordered_multimap validated; - - auto is_valid = [](gms::inet_address input) { - return input != utils::fb_utilities::get_broadcast_address() - && gms::get_local_failure_detector().is_alive(input) - ; - }; - - for (auto& e : endpoints) { - for (auto& a : e.second) { - if (is_valid(a)) { - validated.emplace(e.first, a); - } - } - } - - typedef std::unordered_set return_type; - - if (validated.size() <= 2) { - return boost::copy_range(validated | boost::adaptors::map_values); - } - - if (validated.size() - validated.count(local_rack) >= 2) { - // we have enough endpoints in other racks - validated.erase(local_rack); - } - - if (validated.bucket_count() == 1) { - // we have only 1 `other` rack - auto res = validated | boost::adaptors::map_values; - if (validated.size() > 2) { - return boost::copy_range( - boost::copy_range>(res) - | boost::adaptors::sliced(0, 2)); - } - return boost::copy_range(res); - } - - // randomize which racks we pick from if more than 2 remaining - - std::vector racks = boost::copy_range>(validated | boost::adaptors::map_keys); - - if (validated.bucket_count() > 2) { - std::shuffle(racks.begin(), racks.end(), _e1); - racks.resize(2); - } - - std::unordered_set result; - - // grab a random member of up to two racks - for (auto& rack : racks) { - auto rack_members = validated.bucket(rack); - auto n = validated.bucket_size(rack_members); - auto cpy = boost::copy_range>(validated.equal_range(rack) | boost::adaptors::map_values); - std::uniform_int_distribution rdist(0, n - 1); - result.emplace(cpy[rdist(_e1)]); - } - - return result; -} - - -distributed db::_the_batchlog_manager; diff --git a/scylla/db/batchlog_manager.hh b/scylla/db/batchlog_manager.hh deleted file mode 100644 index 62322b8..0000000 --- a/scylla/db/batchlog_manager.hh +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include -#include -#include -#include -#include -#include - -#include "cql3/query_processor.hh" -#include "gms/inet_address.hh" -#include "db_clock.hh" - -namespace db { - -class batchlog_manager { -private: - static constexpr uint32_t replay_interval = 60 * 1000; // milliseconds - static constexpr uint32_t page_size = 128; // same as HHOM, for now, w/out using any heuristics. TODO: set based on avg batch size. - - using clock_type = lowres_clock; - - struct stats { - uint64_t write_attempts = 0; - } _stats; - - seastar::metrics::metric_groups _metrics; - - size_t _total_batches_replayed = 0; - cql3::query_processor& _qp; - timer _timer; - semaphore _sem{1}; - seastar::gate _gate; - unsigned _cpu = 0; - bool _stop = false; - - std::random_device _rd; - std::default_random_engine _e1; - - future<> replay_all_failed_batches(); -public: - // Takes a QP, not a distributes. Because this object is supposed - // to be per shard and does no dispatching beyond delegating the the - // shard qp (which is what you feed here). - //batchlog_manager(cql3::query_processor&); - /* - batchlog_manager(distributed& qp) - : batchlog_manager(qp.local()) - {} - */ - batchlog_manager(distributed& qp); - future<> start(); - future<> stop(); - - future<> do_batch_log_replay(); - - future count_all_batches() const; - size_t get_total_batches_replayed() const { - return _total_batches_replayed; - } - mutation get_batch_log_mutation_for(const std::vector&, const utils::UUID&, int32_t); - mutation get_batch_log_mutation_for(const std::vector&, const utils::UUID&, int32_t, db_clock::time_point); - db_clock::duration get_batch_log_timeout() const; - - std::unordered_set endpoint_filter(const sstring&, const std::unordered_map>&); -}; - -extern distributed _the_batchlog_manager; - -inline distributed& get_batchlog_manager() { - return _the_batchlog_manager; -} - -inline batchlog_manager& get_local_batchlog_manager() { - return _the_batchlog_manager.local(); -} - -} diff --git a/scylla/db/commitlog/commitlog.cc b/scylla/db/commitlog/commitlog.cc deleted file mode 100644 index c2fe024..0000000 --- a/scylla/db/commitlog/commitlog.cc +++ /dev/null @@ -1,1948 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "seastarx.hh" - -#include "commitlog.hh" -#include "rp_set.hh" -#include "db/config.hh" -#include "utils/data_input.hh" -#include "utils/crc.hh" -#include "utils/runtime.hh" -#include "utils/flush_queue.hh" -#include "log.hh" -#include "commitlog_entry.hh" -#include "service/priority_manager.hh" - -#include -#include - -#include "checked-file-impl.hh" -#include "disk-error-handler.hh" - -static logging::logger clogger("commitlog"); - -using namespace std::chrono_literals; - -class crc32_nbo { - utils::crc32 _c; -public: - template - void process(T t) { - _c.process(net::hton(t)); - } - uint32_t checksum() const { - return _c.get(); - } - void process_bytes(const uint8_t* data, size_t size) { - return _c.process(data, size); - } - void process_bytes(const int8_t* data, size_t size) { - return _c.process(reinterpret_cast(data), size); - } - void process_bytes(const char* data, size_t size) { - return _c.process(reinterpret_cast(data), size); - } -}; - -class db::cf_holder { -public: - virtual ~cf_holder() {}; - virtual void release_cf_count(const cf_id_type&) = 0; -}; - -db::commitlog::config::config(const db::config& cfg) - : commit_log_location(cfg.commitlog_directory()) - , commitlog_total_space_in_mb(cfg.commitlog_total_space_in_mb() >= 0 ? cfg.commitlog_total_space_in_mb() : (memory::stats().total_memory() * smp::count) >> 20) - , commitlog_segment_size_in_mb(cfg.commitlog_segment_size_in_mb()) - , commitlog_sync_period_in_ms(cfg.commitlog_sync_period_in_ms()) - , mode(cfg.commitlog_sync() == "batch" ? sync_mode::BATCH : sync_mode::PERIODIC) -{} - -db::commitlog::descriptor::descriptor(segment_id_type i, uint32_t v) - : id(i), ver(v) { -} - -db::commitlog::descriptor::descriptor(replay_position p) - : descriptor(p.id) { -} - -db::commitlog::descriptor::descriptor(std::pair p) - : descriptor(p.first, p.second) { -} - -db::commitlog::descriptor::descriptor(sstring filename) - : descriptor([filename]() { - std::smatch m; - // match both legacy and new version of commitlogs Ex: CommitLog-12345.log and CommitLog-4-12345.log. - std::regex rx("(?:.*/)?" + FILENAME_PREFIX + "((\\d+)(" + SEPARATOR + "\\d+)?)" + FILENAME_EXTENSION); - std::string sfilename = filename; - if (!std::regex_match(sfilename, m, rx)) { - throw std::domain_error("Cannot parse the version of the file: " + filename); - } - if (m[3].length() == 0) { - // CMH. Can most likely ignore this - throw std::domain_error("Commitlog segment is too old to open; upgrade to 1.2.5+ first"); - } - - segment_id_type id = std::stoull(m[3].str().substr(1)); - uint32_t ver = std::stoul(m[2].str()); - - return std::make_pair(id, ver); - }()) { -} - -sstring db::commitlog::descriptor::filename() const { - return FILENAME_PREFIX + std::to_string(ver) + SEPARATOR - + std::to_string(id) + FILENAME_EXTENSION; -} - -db::commitlog::descriptor::operator db::replay_position() const { - return replay_position(id); -} - -const std::string db::commitlog::descriptor::SEPARATOR("-"); -const std::string db::commitlog::descriptor::FILENAME_PREFIX( - "CommitLog" + SEPARATOR); -const std::string db::commitlog::descriptor::FILENAME_EXTENSION(".log"); - -class db::commitlog::segment_manager : public ::enable_shared_from_this { -public: - config cfg; - std::vector _segments_to_replay; - const uint64_t max_size; - const uint64_t max_mutation_size; - // Divide the size-on-disk threshold by #cpus used, since we assume - // we distribute stuff more or less equally across shards. - const uint64_t max_disk_size; // per-shard - - bool _shutdown = false; - std::experimental::optional> _shutdown_promise = {}; - - // Allocation must throw timed_out_error by contract. - using timeout_exception_factory = default_timeout_exception_factory; - - basic_semaphore _flush_semaphore; - - seastar::metrics::metric_groups _metrics; - - // TODO: verify that we're ok with not-so-great granularity - using clock_type = lowres_clock; - using time_point = clock_type::time_point; - using sseg_ptr = ::shared_ptr; - - using request_controller_type = basic_semaphore; - using request_controller_units = semaphore_units; - request_controller_type _request_controller; - - stdx::optional>> _segment_allocating; - - void account_memory_usage(size_t size) { - _request_controller.consume(size); - } - - void notify_memory_written(size_t size) { - _request_controller.signal(size); - } - - future - allocate_when_possible(const cf_id_type& id, shared_ptr writer, commitlog::timeout_clock::time_point timeout); - - struct stats { - uint64_t cycle_count = 0; - uint64_t flush_count = 0; - uint64_t allocation_count = 0; - uint64_t bytes_written = 0; - uint64_t bytes_slack = 0; - uint64_t segments_created = 0; - uint64_t segments_destroyed = 0; - uint64_t pending_flushes = 0; - uint64_t flush_limit_exceeded = 0; - uint64_t total_size = 0; - uint64_t buffer_list_bytes = 0; - uint64_t total_size_on_disk = 0; - uint64_t requests_blocked_memory = 0; - }; - - stats totals; - - size_t pending_allocations() const { - return _request_controller.waiters(); - } - - future<> begin_flush() { - ++totals.pending_flushes; - if (totals.pending_flushes >= cfg.max_active_flushes) { - ++totals.flush_limit_exceeded; - clogger.trace("Flush ops overflow: {}. Will block.", totals.pending_flushes); - } - return _flush_semaphore.wait(); - } - void end_flush() { - _flush_semaphore.signal(); - --totals.pending_flushes; - } - segment_manager(config c); - ~segment_manager() { - clogger.trace("Commitlog {} disposed", cfg.commit_log_location); - } - - uint64_t next_id() { - return ++_ids; - } - - std::exception_ptr sanity_check_size(size_t size) { - if (size > max_mutation_size) { - return make_exception_ptr(std::invalid_argument( - "Mutation of " + std::to_string(size) - + " bytes is too large for the maxiumum size of " - + std::to_string(max_mutation_size))); - } - return nullptr; - } - - future<> init(); - future new_segment(); - future active_segment(commitlog::timeout_clock::time_point timeout); - future allocate_segment(bool active); - - future<> clear(); - future<> sync_all_segments(bool shutdown = false); - future<> shutdown(); - - void create_counters(); - - future<> orphan_all(); - - void discard_unused_segments(); - void discard_completed_segments(const cf_id_type&); - void discard_completed_segments(const cf_id_type&, const rp_set&); - void on_timer(); - void sync(); - void arm(uint32_t extra = 0) { - if (!_shutdown) { - _timer.arm(std::chrono::milliseconds(cfg.commitlog_sync_period_in_ms + extra)); - } - } - - std::vector get_active_names() const; - uint64_t get_num_dirty_segments() const; - uint64_t get_num_active_segments() const; - - using buffer_type = temporary_buffer; - - buffer_type acquire_buffer(size_t s); - void release_buffer(buffer_type&&); - - future> list_descriptors(sstring dir); - - flush_handler_id add_flush_handler(flush_handler h) { - auto id = ++_flush_ids; - _flush_handlers[id] = std::move(h); - return id; - } - void remove_flush_handler(flush_handler_id id) { - _flush_handlers.erase(id); - } - - void flush_segments(bool = false); - -private: - future<> clear_reserve_segments(); - - size_t max_request_controller_units() const; - segment_id_type _ids = 0; - std::vector _segments; - queue _reserve_segments; - std::vector _temp_buffers; - std::unordered_map _flush_handlers; - flush_handler_id _flush_ids = 0; - replay_position _flush_position; - timer _timer; - future<> replenish_reserve(); - future<> _reserve_replenisher; - seastar::gate _gate; - uint64_t _new_counter = 0; -}; - -/* - * A single commit log file on disk. Manages creation of the file and writing mutations to disk, - * as well as tracking the last mutation position of any "dirty" CFs covered by the segment file. Segment - * files are initially allocated to a fixed size and can grow to accomidate a larger value if necessary. - * - * The IO flow is somewhat convoluted and goes something like this: - * - * Mutation path: - * - Adding data to the segment usually writes into the internal buffer - * - On EOB or overflow we issue a write to disk ("cycle"). - * - A cycle call will acquire the segment read lock and send the - * buffer to the corresponding position in the file - * - If we are periodic and crossed a timing threshold, or running "batch" mode - * we might be forced to issue a flush ("sync") after adding data - * - A sync call acquires the write lock, thus locking out writes - * and waiting for pending writes to finish. It then checks the - * high data mark, and issues the actual file flush. - * Note that the write lock is released prior to issuing the - * actual file flush, thus we are allowed to write data to - * after a flush point concurrently with a pending flush. - * - * Sync timer: - * - In periodic mode, we try to primarily issue sync calls in - * a timer task issued every N seconds. The timer does the same - * operation as the above described sync, and resets the timeout - * so that mutation path will not trigger syncs and delay. - * - * Note that we do not care which order segment chunks finish writing - * to disk, other than all below a flush point must finish before flushing. - * - * We currently do not wait for flushes to finish before issueing the next - * cycle call ("after" flush point in the file). This might not be optimal. - * - * To close and finish a segment, we first close the gate object that guards - * writing data to it, then flush it fully (including waiting for futures create - * by the timer to run their course), and finally wait for it to - * become "clean", i.e. get notified that all mutations it holds have been - * persisted to sstables elsewhere. Once this is done, we can delete the - * segment. If a segment (object) is deleted without being fully clean, we - * do not remove the file on disk. - * - */ - -class db::commitlog::segment : public enable_shared_from_this, public cf_holder { - friend class rp_handle; - - ::shared_ptr _segment_manager; - - descriptor _desc; - file _file; - sstring _file_name; - - uint64_t _file_pos = 0; - uint64_t _flush_pos = 0; - uint64_t _buf_pos = 0; - bool _closed = false; - - using buffer_type = segment_manager::buffer_type; - using sseg_ptr = segment_manager::sseg_ptr; - using clock_type = segment_manager::clock_type; - using time_point = segment_manager::time_point; - - buffer_type _buffer; - std::unordered_map _cf_dirty; - time_point _sync_time; - seastar::gate _gate; - uint64_t _write_waiters = 0; - utils::flush_queue, clock_type> _pending_ops; - - uint64_t _num_allocs = 0; - - std::unordered_set _known_schema_versions; - - friend std::ostream& operator<<(std::ostream&, const segment&); - friend class segment_manager; - - future<> begin_flush() { - // This is maintaining the semantica of only using the write-lock - // as a gate for flushing, i.e. once we've begun a flush for position X - // we are ok with writes to positions > X - return _segment_manager->begin_flush(); - } - - void end_flush() { - _segment_manager->end_flush(); - } - -public: - struct cf_mark { - const segment& s; - }; - friend std::ostream& operator<<(std::ostream&, const cf_mark&); - - // The commit log entry overhead in bytes (int: length + int: head checksum + int: tail checksum) - static constexpr size_t entry_overhead_size = 3 * sizeof(uint32_t); - static constexpr size_t segment_overhead_size = 2 * sizeof(uint32_t); - static constexpr size_t descriptor_header_size = 5 * sizeof(uint32_t); - static constexpr uint32_t segment_magic = ('S'<<24) |('C'<< 16) | ('L' << 8) | 'C'; - - // The commit log (chained) sync marker/header size in bytes (int: length + int: checksum [segmentId, position]) - static constexpr size_t sync_marker_size = 2 * sizeof(uint32_t); - - static constexpr size_t alignment = 4096; - // TODO : tune initial / default size - static constexpr size_t default_size = align_up(128 * 1024, alignment); - - segment(::shared_ptr m, const descriptor& d, file && f, bool active) - : _segment_manager(std::move(m)), _desc(std::move(d)), _file(std::move(f)), - _file_name(_segment_manager->cfg.commit_log_location + "/" + _desc.filename()), _sync_time( - clock_type::now()), _pending_ops(true) // want exception propagation - { - ++_segment_manager->totals.segments_created; - clogger.debug("Created new {} segment {}", active ? "active" : "reserve", *this); - } - ~segment() { - if (is_clean()) { - clogger.debug("Segment {} is no longer active and will be deleted now", *this); - ++_segment_manager->totals.segments_destroyed; - _segment_manager->totals.total_size_on_disk -= size_on_disk(); - _segment_manager->totals.total_size -= (size_on_disk() + _buffer.size()); - try { - commit_io_check([] (const char* fname) { ::unlink(fname); }, - _file_name.c_str()); - } catch (...) { - clogger.error("Could not delete segment {}: {}", *this, std::current_exception()); - } - } else { - clogger.warn("Segment {} is dirty and is left on disk.", *this); - } - } - - bool is_schema_version_known(schema_ptr s) { - return _known_schema_versions.count(s->version()); - } - void add_schema_version(schema_ptr s) { - _known_schema_versions.emplace(s->version()); - } - void forget_schema_versions() { - _known_schema_versions.clear(); - } - - void release_cf_count(const cf_id_type& cf) override { - mark_clean(cf, 1); - if (can_delete()) { - _segment_manager->discard_unused_segments(); - } - } - - bool must_sync() { - if (_segment_manager->cfg.mode == sync_mode::BATCH) { - return false; - } - auto now = clock_type::now(); - auto ms = std::chrono::duration_cast( - now - _sync_time).count(); - if ((_segment_manager->cfg.commitlog_sync_period_in_ms * 2) < uint64_t(ms)) { - clogger.debug("{} needs sync. {} ms elapsed", *this, ms); - return true; - } - return false; - } - /** - * Finalize this segment and get a new one - */ - future finish_and_get_new(commitlog::timeout_clock::time_point timeout) { - _closed = true; - sync(); - return _segment_manager->active_segment(timeout); - } - void reset_sync_time() { - _sync_time = clock_type::now(); - } - // See class comment for info - future sync(bool shutdown = false) { - /** - * If we are shutting down, we first - * close the allocation gate, thus no new - * data can be appended. Then we just issue a - * flush, which will wait for any queued ops - * to complete as well. Then we close the ops - * queue, just to be sure. - */ - if (shutdown) { - auto me = shared_from_this(); - return _gate.close().then([me] { - me->_closed = true; - return me->sync().finally([me] { - // When we get here, nothing should add ops, - // and we should have waited out all pending. - return me->_pending_ops.close(); - }); - }); - } - - // Note: this is not a marker for when sync was finished. - // It is when it was initiated - reset_sync_time(); - return cycle(true); - } - // See class comment for info - future flush(uint64_t pos = 0) { - auto me = shared_from_this(); - assert(me.use_count() > 1); - if (pos == 0) { - pos = _file_pos; - } - - clogger.trace("Syncing {} {} -> {}", *this, _flush_pos, pos); - - // Only run the flush when all write ops at lower rp:s - // have completed. - replay_position rp(_desc.id, position_type(pos)); - - // Run like this to ensure flush ordering, and making flushes "waitable" - return _pending_ops.run_with_ordered_post_op(rp, [] { return make_ready_future<>(); }, [this, pos, me, rp] { - assert(_pending_ops.has_operation(rp)); - return do_flush(pos); - }); - } - - future do_flush(uint64_t pos) { - auto me = shared_from_this(); - return begin_flush().then([this, pos]() { - if (pos <= _flush_pos) { - clogger.trace("{} already synced! ({} < {})", *this, pos, _flush_pos); - return make_ready_future<>(); - } - return _file.flush().then_wrapped([this, pos](future<> f) { - try { - f.get(); - // TODO: retry/ignore/fail/stop - optional behaviour in origin. - // we fast-fail the whole commit. - _flush_pos = std::max(pos, _flush_pos); - ++_segment_manager->totals.flush_count; - clogger.trace("{} synced to {}", *this, _flush_pos); - } catch (...) { - clogger.error("Failed to flush commits to disk: {}", std::current_exception()); - throw; - } - }); - }).finally([this] { - end_flush(); - }).then([me] { - return make_ready_future(me); - }); - } - - /** - * Allocate a new buffer - */ - void new_buffer(size_t s) { - assert(_buffer.empty()); - - auto overhead = segment_overhead_size; - if (_file_pos == 0) { - overhead += descriptor_header_size; - } - - auto a = align_up(s + overhead, alignment); - auto k = std::max(a, default_size); - - for (;;) { - try { - _buffer = _segment_manager->acquire_buffer(k); - break; - } catch (std::bad_alloc&) { - clogger.warn("Could not allocate {} k bytes output buffer ({} k required)", k / 1024, a / 1024); - if (k > a) { - k = std::max(a, k / 2); - clogger.debug("Trying reduced size: {} k", k / 1024); - continue; - } - throw; - } - } - _buf_pos = overhead; - auto * p = reinterpret_cast(_buffer.get_write()); - std::fill(p, p + overhead, 0); - _segment_manager->totals.total_size += k; - } - - bool buffer_is_empty() const { - return _buf_pos <= segment_overhead_size - || (_file_pos == 0 && _buf_pos <= (segment_overhead_size + descriptor_header_size)); - } - /** - * Send any buffer contents to disk and get a new tmp buffer - */ - // See class comment for info - future cycle(bool flush_after = false) { - if (_buffer.empty()) { - return flush_after ? flush() : make_ready_future(shared_from_this()); - } - - auto size = clear_buffer_slack(); - auto buf = std::move(_buffer); - auto off = _file_pos; - auto top = off + size; - auto num = _num_allocs; - - _file_pos = top; - _buf_pos = 0; - _num_allocs = 0; - - auto me = shared_from_this(); - assert(me.use_count() > 1); - - auto * p = buf.get_write(); - assert(std::count(p, p + 2 * sizeof(uint32_t), 0) == 2 * sizeof(uint32_t)); - - data_output out(p, p + buf.size()); - - auto header_size = 0; - - if (off == 0) { - // first block. write file header. - out.write(segment_magic); - out.write(_desc.ver); - out.write(_desc.id); - crc32_nbo crc; - crc.process(_desc.ver); - crc.process(_desc.id & 0xffffffff); - crc.process(_desc.id >> 32); - out.write(crc.checksum()); - header_size = descriptor_header_size; - } - - // write chunk header - crc32_nbo crc; - crc.process(_desc.id & 0xffffffff); - crc.process(_desc.id >> 32); - crc.process(uint32_t(off + header_size)); - - out.write(uint32_t(_file_pos)); - out.write(crc.checksum()); - - forget_schema_versions(); - - replay_position rp(_desc.id, position_type(off)); - - clogger.trace("Writing {} entries, {} k in {} -> {}", num, size, off, off + size); - - // The write will be allowed to start now, but flush (below) must wait for not only this, - // but all previous write/flush pairs. - return _pending_ops.run_with_ordered_post_op(rp, [this, size, off, buf = std::move(buf)]() mutable { - auto written = make_lw_shared(0); - auto p = buf.get(); - return repeat([this, size, off, written, p]() mutable { - auto&& priority_class = service::get_local_commitlog_priority(); - return _file.dma_write(off + *written, p + *written, size - *written, priority_class).then_wrapped([this, size, written](future&& f) { - try { - auto bytes = std::get<0>(f.get()); - *written += bytes; - _segment_manager->totals.bytes_written += bytes; - _segment_manager->totals.total_size_on_disk += bytes; - ++_segment_manager->totals.cycle_count; - if (*written == size) { - return make_ready_future(stop_iteration::yes); - } - // gah, partial write. should always get here with dma chunk sized - // "bytes", but lets make sure... - clogger.debug("Partial write {}: {}/{} bytes", *this, *written, size); - *written = align_down(*written, alignment); - return make_ready_future(stop_iteration::no); - // TODO: retry/ignore/fail/stop - optional behaviour in origin. - // we fast-fail the whole commit. - } catch (...) { - clogger.error("Failed to persist commits to disk for {}: {}", *this, std::current_exception()); - throw; - } - }); - }).finally([this, buf = std::move(buf), size]() mutable { - _segment_manager->release_buffer(std::move(buf)); - _segment_manager->notify_memory_written(size); - }); - }, [me, flush_after, top, rp] { // lambda instead of bind, so we keep "me" alive. - assert(me->_pending_ops.has_operation(rp)); - return flush_after ? me->do_flush(top) : make_ready_future(me); - }); - } - - future batch_cycle(timeout_clock::time_point timeout) { - /** - * For batch mode we force a write "immediately". - * However, we first wait for all previous writes/flushes - * to complete. - * - * This has the benefit of allowing several allocations to - * queue up in a single buffer. - */ - auto me = shared_from_this(); - auto fp = _file_pos; - return _pending_ops.wait_for_pending(timeout).then([me = std::move(me), fp, timeout] { - if (fp != me->_file_pos) { - // some other request already wrote this buffer. - // If so, wait for the operation at our intended file offset - // to finish, then we know the flush is complete and we - // are in accord. - // (Note: wait_for_pending(pos) waits for operation _at_ pos (and before), - replay_position rp(me->_desc.id, position_type(fp)); - return me->_pending_ops.wait_for_pending(rp, timeout).then([me, fp] { - assert(me->_flush_pos > fp); - return make_ready_future(me); - }); - } - // It is ok to leave the sync behind on timeout because there will be at most one - // such sync, all later allocations will block on _pending_ops until it is done. - return with_timeout(timeout, me->sync()); - }).handle_exception([me, fp](auto p) { - // If we get an IO exception (which we assume this is) - // we should close the segment. - // TODO: should we also trunctate away any partial write - // we did? - me->_closed = true; // just mark segment as closed, no writes will be done. - return make_exception_future(p); - }); - } - - /** - * Add a "mutation" to the segment. - */ - future allocate(const cf_id_type& id, shared_ptr writer, segment_manager::request_controller_units permit, commitlog::timeout_clock::time_point timeout) { - if (must_sync()) { - return with_timeout(timeout, sync()).then([this, id, writer = std::move(writer), permit = std::move(permit), timeout] (auto s) mutable { - return s->allocate(id, std::move(writer), std::move(permit), timeout); - }); - } - - const auto size = writer->size(*this); - const auto s = size + entry_overhead_size; // total size - auto ep = _segment_manager->sanity_check_size(s); - if (ep) { - return make_exception_future(std::move(ep)); - } - - - if (!is_still_allocating() || position() + s > _segment_manager->max_size) { // would we make the file too big? - return finish_and_get_new(timeout).then([id, writer = std::move(writer), permit = std::move(permit), timeout] (auto new_seg) mutable { - return new_seg->allocate(id, std::move(writer), std::move(permit), timeout); - }); - } else if (!_buffer.empty() && (s > (_buffer.size() - _buf_pos))) { // enough data? - if (_segment_manager->cfg.mode == sync_mode::BATCH) { - // TODO: this could cause starvation if we're really unlucky. - // If we run batch mode and find ourselves not fit in a non-empty - // buffer, we must force a cycle and wait for it (to keep flush order) - // This will most likely cause parallel writes, and consecutive flushes. - return with_timeout(timeout, cycle(true)).then([this, id, writer = std::move(writer), permit = std::move(permit), timeout] (auto new_seg) mutable { - return new_seg->allocate(id, std::move(writer), std::move(permit), timeout); - }); - } else { - cycle().discard_result().handle_exception([] (auto ex) { - clogger.error("Failed to flush commits to disk: {}", ex); - }); - } - } - - size_t buf_memory = s; - if (_buffer.empty()) { - new_buffer(s); - buf_memory += _buf_pos; - } - - _gate.enter(); // this might throw. I guess we accept this? - buf_memory -= permit.release(); - _segment_manager->account_memory_usage(buf_memory); - - replay_position rp(_desc.id, position()); - auto pos = _buf_pos; - _buf_pos += s; - _cf_dirty[id]++; // increase use count for cf. - - rp_handle h(static_pointer_cast(shared_from_this()), std::move(id), rp); - - auto * p = _buffer.get_write() + pos; - auto * e = _buffer.get_write() + pos + s - sizeof(uint32_t); - - data_output out(p, e); - crc32_nbo crc; - - out.write(uint32_t(s)); - crc.process(uint32_t(s)); - out.write(crc.checksum()); - - // actual data - writer->write(*this, out); - - crc.process_bytes(p + 2 * sizeof(uint32_t), size); - - out = data_output(e, sizeof(uint32_t)); - out.write(crc.checksum()); - - ++_segment_manager->totals.allocation_count; - ++_num_allocs; - - _gate.leave(); - - if (_segment_manager->cfg.mode == sync_mode::BATCH) { - return batch_cycle(timeout).then([h = std::move(h)](auto s) mutable { - return make_ready_future(std::move(h)); - }); - } else { - // If this buffer alone is too big, potentially bigger than the maximum allowed size, - // then no other request will be allowed in to force the cycle()ing of this buffer. We - // have to do it ourselves. - if ((_buf_pos >= (db::commitlog::segment::default_size))) { - cycle().discard_result().handle_exception([] (auto ex) { - clogger.error("Failed to flush commits to disk: {}", ex); - }); - } - return make_ready_future(std::move(h)); - } - } - - position_type position() const { - return position_type(_file_pos + _buf_pos); - } - - size_t size_on_disk() const { - return _file_pos; - } - - // ensures no more of this segment is writeable, by allocating any unused section at the end and marking it discarded - // a.k.a. zero the tail. - size_t clear_buffer_slack() { - auto size = align_up(_buf_pos, alignment); - std::fill(_buffer.get_write() + _buf_pos, _buffer.get_write() + size, - 0); - _segment_manager->totals.bytes_slack += (size - _buf_pos); - _segment_manager->account_memory_usage(size - _buf_pos); - return size; - } - void mark_clean(const cf_id_type& id, uint64_t count) { - auto i = _cf_dirty.find(id); - if (i != _cf_dirty.end()) { - assert(i->second >= count); - i->second -= count; - if (i->second == 0) { - _cf_dirty.erase(i); - } - } - } - void mark_clean(const cf_id_type& id) { - _cf_dirty.erase(id); - } - void mark_clean() { - _cf_dirty.clear(); - } - bool is_still_allocating() const { - return !_closed && position() < _segment_manager->max_size; - } - bool is_clean() const { - return _cf_dirty.empty(); - } - bool is_unused() const { - return !is_still_allocating() && is_clean(); - } - bool is_flushed() const { - return position() <= _flush_pos; - } - bool can_delete() const { - return is_unused() && is_flushed(); - } - bool contains(const replay_position& pos) const { - return pos.id == _desc.id; - } - sstring get_segment_name() const { - return _desc.filename(); - } -}; - -future -db::commitlog::segment_manager::allocate_when_possible(const cf_id_type& id, shared_ptr writer, commitlog::timeout_clock::time_point timeout) { - auto size = writer->size(); - // If this is already too big now, we should throw early. It's also a correctness issue, since - // if we are too big at this moment we'll never reach allocate() to actually throw at that - // point. - auto ep = sanity_check_size(size); - if (ep) { - return make_exception_future(std::move(ep)); - } - - auto fut = get_units(_request_controller, size, timeout); - if (_request_controller.waiters()) { - totals.requests_blocked_memory++; - } - return fut.then([this, id, writer = std::move(writer), timeout] (auto permit) mutable { - return this->active_segment(timeout).then([this, timeout, id, writer = std::move(writer), permit = std::move(permit)] (auto s) mutable { - return s->allocate(id, std::move(writer), std::move(permit), timeout); - }); - }); -} - -const size_t db::commitlog::segment::default_size; - -db::commitlog::segment_manager::segment_manager(config c) - : cfg([&c] { - config cfg(c); - - if (cfg.commit_log_location.empty()) { - cfg.commit_log_location = "/var/lib/scylla/commitlog"; - } - - if (cfg.max_active_writes == 0) { - cfg.max_active_writes = // TODO: call someone to get an idea... - 25 * smp::count; - } - cfg.max_active_writes = std::max(uint64_t(1), cfg.max_active_writes / smp::count); - if (cfg.max_active_flushes == 0) { - cfg.max_active_flushes = // TODO: call someone to get an idea... - 5 * smp::count; - } - cfg.max_active_flushes = std::max(uint64_t(1), cfg.max_active_flushes / smp::count); - - return cfg; - }()) - , max_size(std::min(std::numeric_limits::max(), std::max(cfg.commitlog_segment_size_in_mb, 1) * 1024 * 1024)) - , max_mutation_size(max_size >> 1) - , max_disk_size(size_t(std::ceil(cfg.commitlog_total_space_in_mb / double(smp::count))) * 1024 * 1024) - , _flush_semaphore(cfg.max_active_flushes) - // That is enough concurrency to allow for our largest mutation (max_mutation_size), plus - // an existing in-flight buffer. Since we'll force the cycling() of any buffer that is bigger - // than default_size at the end of the allocation, that allows for every valid mutation to - // always be admitted for processing. - , _request_controller(max_request_controller_units()) - , _reserve_segments(1) - , _reserve_replenisher(make_ready_future<>()) -{ - assert(max_size > 0); - - clogger.trace("Commitlog {} maximum disk size: {} MB / cpu ({} cpus)", - cfg.commit_log_location, max_disk_size / (1024 * 1024), - smp::count); - - create_counters(); -} - -size_t db::commitlog::segment_manager::max_request_controller_units() const { - return max_mutation_size + db::commitlog::segment::default_size; -} - -future<> db::commitlog::segment_manager::replenish_reserve() { - return do_until([this] { return _shutdown; }, [this] { - return _reserve_segments.not_full().then([this] { - if (_shutdown) { - return make_ready_future<>(); - } - return with_gate(_gate, [this] { - return this->allocate_segment(false).then([this](sseg_ptr s) { - auto ret = _reserve_segments.push(std::move(s)); - if (!ret) { - clogger.error("Segment reserve is full! Ignoring and trying to continue, but shouldn't happen"); - } - return make_ready_future<>(); - }); - }).handle_exception([](std::exception_ptr ep) { - clogger.warn("Exception in segment reservation: {}", ep); - return sleep(100ms); - }); - }); - }); -} - -future> -db::commitlog::segment_manager::list_descriptors(sstring dirname) { - struct helper { - sstring _dirname; - file _file; - subscription _list; - std::vector _result; - - helper(helper&&) = default; - helper(sstring n, file && f) - : _dirname(std::move(n)), _file(std::move(f)), _list( - _file.list_directory( - std::bind(&helper::process, this, - std::placeholders::_1))) { - } - - future<> process(directory_entry de) { - auto entry_type = [this](const directory_entry & de) { - if (!de.type && !de.name.empty()) { - return engine().file_type(_dirname + "/" + de.name); - } - return make_ready_future>(de.type); - }; - return entry_type(de).then([this, de](std::experimental::optional type) { - if (type == directory_entry_type::regular && de.name[0] != '.' && !is_cassandra_segment(de.name)) { - try { - _result.emplace_back(de.name); - } catch (std::domain_error& e) { - clogger.warn(e.what()); - } - } - return make_ready_future<>(); - }); - } - - future<> done() { - return _list.done(); - } - - static bool is_cassandra_segment(sstring name) { - // We want to ignore commitlog segments generated by Cassandra-derived tools (#1112) - auto c = sstring("Cassandra"); - if (name.size() < c.size()) { - return false; - } - return name.substr(0, c.size()) == c; - } - }; - - return open_checked_directory(commit_error_handler, dirname).then([this, dirname](file dir) { - auto h = make_lw_shared(std::move(dirname), std::move(dir)); - return h->done().then([h]() { - return make_ready_future>(std::move(h->_result)); - }).finally([h] {}); - }); -} - -future<> db::commitlog::segment_manager::init() { - return list_descriptors(cfg.commit_log_location).then([this](std::vector descs) { - assert(_reserve_segments.empty()); // _segments_to_replay must not pick them up - segment_id_type id = std::chrono::duration_cast(runtime::get_boot_time().time_since_epoch()).count() + 1; - for (auto& d : descs) { - id = std::max(id, replay_position(d.id).base_id()); - _segments_to_replay.push_back(cfg.commit_log_location + "/" + d.filename()); - } - - // base id counter is [ | ] - _ids = replay_position(engine().cpu_id(), id).id; - // always run the timer now, since we need to handle segment pre-alloc etc as well. - _timer.set_callback(std::bind(&segment_manager::on_timer, this)); - auto delay = engine().cpu_id() * std::ceil(double(cfg.commitlog_sync_period_in_ms) / smp::count); - clogger.trace("Delaying timer loop {} ms", delay); - // We need to wait until we have scanned all other segments to actually start serving new - // segments. We are ready now - this->_reserve_replenisher = replenish_reserve(); - this->arm(delay); - }); -} - -void db::commitlog::segment_manager::create_counters() { - namespace sm = seastar::metrics; - - _metrics.add_group("commitlog", { - sm::make_gauge("segments", [this] { return _segments.size(); }, - sm::description("Holds the current number of segments.")), - - sm::make_gauge("allocating_segments", [this] { return std::count_if(_segments.begin(), _segments.end(), [] (const sseg_ptr & s) { return s->is_still_allocating(); }); }, - sm::description("Holds the number of not closed segments that still have some free space. " - "This value should not get too high.")), - - sm::make_gauge("unused_segments", [this] { return std::count_if(_segments.begin(), _segments.end(), [] (const sseg_ptr & s) { return s->is_unused(); }); }, - sm::description("Holds the current number of unused segments. " - "A non-zero value indicates that the disk write path became temporary slow.")), - - sm::make_derive("alloc", totals.allocation_count, - sm::description("Counts a number of times a new mutation has been added to a segment. " - "Divide bytes_written by this value to get the average number of bytes per mutation written to the disk.")), - - sm::make_derive("cycle", totals.cycle_count, - sm::description("Counts a number of commitlog write cycles - when the data is written from the internal memory buffer to the disk.")), - - sm::make_derive("flush", totals.flush_count, - sm::description("Counts a number of times the flush() method was called for a file.")), - - sm::make_derive("bytes_written", totals.bytes_written, - sm::description("Counts a number of bytes written to the disk. " - "Divide this value by \"alloc\" to get the average number of bytes per mutation written to the disk.")), - - sm::make_derive("slack", totals.bytes_slack, - sm::description("Counts a number of unused bytes written to the disk due to disk segment alignment.")), - - sm::make_gauge("pending_flushes", totals.pending_flushes, - sm::description("Holds a number of currently pending flushes. See the related flush_limit_exceeded metric.")), - - sm::make_gauge("pending_allocations", [this] { return pending_allocations(); }, - sm::description("Holds a number of currently pending allocations. " - "A non-zero value indicates that we have a bottleneck in the disk write flow.")), - - sm::make_derive("requests_blocked_memory", totals.requests_blocked_memory, - sm::description("Counts a number of requests blocked due to memory pressure. " - "A non-zero value indicates that the commitlog memory quota is not enough to serve the required amount of requests.")), - - sm::make_derive("flush_limit_exceeded", totals.flush_limit_exceeded, - sm::description( - seastar::format("Counts a number of times a flush limit was exceeded. " - "A non-zero value indicates that there are too many pending flush operations (see pending_flushes) and some of " - "them will be blocked till the total amount of pending flush operaitions drops below {}.", cfg.max_active_flushes))), - - sm::make_gauge("disk_total_bytes", totals.total_size, - sm::description("Holds a size of disk space in bytes used for data so far. " - "A too high value indicates that we have some bottleneck in the writting to sstables path.")), - - sm::make_gauge("memory_buffer_bytes", totals.buffer_list_bytes, - sm::description("Holds the total number of bytes in internal memory buffers.")), - }); -} - -void db::commitlog::segment_manager::flush_segments(bool force) { - if (_segments.empty()) { - return; - } - // defensive copy. - auto callbacks = boost::copy_range>(_flush_handlers | boost::adaptors::map_values); - auto& active = _segments.back(); - - // RP at "start" of segment we leave untouched. - replay_position high(active->_desc.id, 0); - - // But if all segments are closed or we force-flush, - // include all. - if (force || !active->is_still_allocating()) { - high = replay_position(high.id + 1, 0); - } - - // Now get a set of used CF ids: - std::unordered_set ids; - std::for_each(_segments.begin(), _segments.end() - 1, [&ids](sseg_ptr& s) { - for (auto& id : s->_cf_dirty | boost::adaptors::map_keys) { - ids.insert(id); - } - }); - - clogger.debug("Flushing ({}) to {}", force, high); - - // For each CF id: for each callback c: call c(id, high) - for (auto& f : callbacks) { - for (auto& id : ids) { - try { - f(id, high); - } catch (...) { - clogger.error("Exception during flush request {}/{}: {}", id, high, std::current_exception()); - } - } - } -} - -future db::commitlog::segment_manager::allocate_segment(bool active) { - descriptor d(next_id()); - file_open_options opt; - opt.extent_allocation_size_hint = max_size; - return open_checked_file_dma(commit_error_handler, cfg.commit_log_location + "/" + d.filename(), open_flags::wo | open_flags::create, opt).then([this, d, active](file f) { - // xfs doesn't like files extended betond eof, so enlarge the file - return f.truncate(max_size).then([this, d, active, f] () mutable { - auto s = make_shared(this->shared_from_this(), d, std::move(f), active); - return make_ready_future(s); - }); - }); -} - -future db::commitlog::segment_manager::new_segment() { - if (_shutdown) { - throw std::runtime_error("Commitlog has been shut down. Cannot add data"); - } - - ++_new_counter; - - if (_reserve_segments.empty() && (_reserve_segments.max_size() < cfg.max_reserve_segments)) { - _reserve_segments.set_max_size(_reserve_segments.max_size() + 1); - clogger.debug("Increased segment reserve count to {}", _reserve_segments.max_size()); - } - return _reserve_segments.pop_eventually().then([this] (auto s) { - _segments.push_back(std::move(s)); - _segments.back()->reset_sync_time(); - return make_ready_future(_segments.back()); - }); -} - -future db::commitlog::segment_manager::active_segment(commitlog::timeout_clock::time_point timeout) { - // If there is no active segment, try to allocate one using new_segment(). If we time out, - // make sure later invocations can still pick that segment up once it's ready. - return repeat_until_value([this, timeout] () -> future> { - if (!_segments.empty() && _segments.back()->is_still_allocating()) { - return make_ready_future>(_segments.back()); - } - return [this, timeout] { - if (!_segment_allocating) { - promise<> p; - _segment_allocating.emplace(p.get_future()); - auto f = _segment_allocating->get_future(timeout); - with_gate(_gate, [this] { - return new_segment().discard_result().finally([this]() { - _segment_allocating = stdx::nullopt; - }); - }).forward_to(std::move(p)); - return f; - } else { - return _segment_allocating->get_future(timeout); - } - }().then([] () -> stdx::optional { - return stdx::nullopt; - }); - }); -} - -/** - * go through all segments, clear id up to pos. if segment becomes clean and unused by this, - * it is discarded. - */ -void db::commitlog::segment_manager::discard_completed_segments(const cf_id_type& id, const rp_set& used) { - auto& usage = used.usage(); - - clogger.debug("Discarding {}: {}", id, usage); - - for (auto&s : _segments) { - auto i = usage.find(s->_desc.id); - if (i != usage.end()) { - s->mark_clean(id, i->second); - } - } - discard_unused_segments(); -} - -void db::commitlog::segment_manager::discard_completed_segments(const cf_id_type& id) { - clogger.debug("Discard all data for {}", id); - for (auto&s : _segments) { - s->mark_clean(id); - } - discard_unused_segments(); -} - -namespace db { - -std::ostream& operator<<(std::ostream& out, const db::commitlog::segment& s) { - return out << s._desc.filename(); -} - -std::ostream& operator<<(std::ostream& out, const db::commitlog::segment::cf_mark& m) { - return out << (m.s._cf_dirty | boost::adaptors::map_keys); -} - -std::ostream& operator<<(std::ostream& out, const db::replay_position& p) { - return out << "{" << p.shard_id() << ", " << p.base_id() << ", " << p.pos << "}"; -} - -} - -void db::commitlog::segment_manager::discard_unused_segments() { - clogger.trace("Checking for unused segments ({} active)", _segments.size()); - - auto i = std::remove_if(_segments.begin(), _segments.end(), [=](sseg_ptr s) { - if (s->can_delete()) { - clogger.debug("Segment {} is unused", *s); - return true; - } - if (s->is_still_allocating()) { - clogger.debug("Not safe to delete segment {}; still allocating.", s); - } else if (!s->is_clean()) { - clogger.debug("Not safe to delete segment {}; dirty is {}", s, segment::cf_mark {*s}); - } else { - clogger.debug("Not safe to delete segment {}; disk ops pending", s); - } - return false; - }); - if (i != _segments.end()) { - _segments.erase(i, _segments.end()); - } -} - -// FIXME: pop() will call unlink -> sleeping in reactor thread. -// Not urgent since mostly called during shutdown, but have to fix. -future<> db::commitlog::segment_manager::clear_reserve_segments() { - while (!_reserve_segments.empty()) { - _reserve_segments.pop(); - } - return make_ready_future<>(); -} - -future<> db::commitlog::segment_manager::sync_all_segments(bool shutdown) { - clogger.debug("Issuing sync for all segments ({})", shutdown ? "shutdown" : "active"); - return parallel_for_each(_segments, [this, shutdown](sseg_ptr s) { - return s->sync(shutdown).then([](sseg_ptr s) { - clogger.debug("Synced segment {}", *s); - }); - }); -} - -future<> db::commitlog::segment_manager::shutdown() { - if (!_shutdown_promise) { - _shutdown_promise = shared_promise<>(); - - // Wait for all pending requests to finish. Need to sync first because segments that are - // alive may be holding semaphore permits. - auto block_new_requests = get_units(_request_controller, max_request_controller_units()); - return sync_all_segments(false).then([this, block_new_requests = std::move(block_new_requests)] () mutable { - return std::move(block_new_requests).then([this] (auto permits) { - _timer.cancel(); // no more timer calls - _shutdown = true; // no re-arm, no create new segments. - // Now first wait for periodic task to finish, then sync and close all - // segments, flushing out any remaining data. - return _gate.close().then(std::bind(&segment_manager::sync_all_segments, this, true)); - }); - }).finally([this] { - discard_unused_segments(); - // Now that the gate is closed and requests completed we are sure nobody else will pop() - return clear_reserve_segments().finally([this] { - return std::move(_reserve_replenisher).then_wrapped([this] (auto f) { - // Could be cleaner with proper seastar support - if (f.failed()) { - _shutdown_promise->set_exception(f.get_exception()); - } else { - _shutdown_promise->set_value(); - } - }); - }); - }); - } - return _shutdown_promise->get_shared_future(); -} - -future<> db::commitlog::segment_manager::orphan_all() { - _segments.clear(); - return clear_reserve_segments(); -} - -/* - * Sync all segments, then clear them out. To ensure all ops are done. - * (Assumes you have barriered adding ops!) - * Only use from tests. - */ -future<> db::commitlog::segment_manager::clear() { - clogger.debug("Clearing commitlog"); - return shutdown().then([this] { - clogger.debug("Clearing all segments"); - for (auto& s : _segments) { - s->mark_clean(); - } - return orphan_all(); - }); -} -/** - * Called by timer in periodic mode. - */ -void db::commitlog::segment_manager::sync() { - for (auto s : _segments) { - s->sync(); // we do not care about waiting... - } -} - -void db::commitlog::segment_manager::on_timer() { - // Gate, because we are starting potentially blocking ops - // without waiting for them, so segement_manager could be shut down - // while they are running. - seastar::with_gate(_gate, [this] { - if (cfg.mode != sync_mode::BATCH) { - sync(); - } - // IFF a new segment was put in use since last we checked, and we're - // above threshold, request flush. - if (_new_counter > 0) { - auto max = max_disk_size; - auto cur = totals.total_size_on_disk; - if (max != 0 && cur >= max) { - _new_counter = 0; - clogger.debug("Size on disk {} MB exceeds local maximum {} MB", cur / (1024 * 1024), max / (1024 * 1024)); - flush_segments(); - } - } - return make_ready_future<>(); - }); - arm(); -} - -std::vector db::commitlog::segment_manager::get_active_names() const { - std::vector res; - for (auto i: _segments) { - if (!i->is_unused()) { - // Each shared is located in its own directory - res.push_back(cfg.commit_log_location + "/" + i->get_segment_name()); - } - } - return res; -} - -uint64_t db::commitlog::segment_manager::get_num_dirty_segments() const { - return std::count_if(_segments.begin(), _segments.end(), [](sseg_ptr s) { - return !s->is_still_allocating() && !s->is_clean(); - }); -} - -uint64_t db::commitlog::segment_manager::get_num_active_segments() const { - return std::count_if(_segments.begin(), _segments.end(), [](sseg_ptr s) { - return s->is_still_allocating(); - }); -} - - -db::commitlog::segment_manager::buffer_type db::commitlog::segment_manager::acquire_buffer(size_t s) { - auto i = _temp_buffers.begin(); - auto e = _temp_buffers.end(); - - while (i != e) { - if (i->size() >= s) { - auto r = std::move(*i); - _temp_buffers.erase(i); - totals.buffer_list_bytes -= r.size(); - return r; - } - ++i; - } - auto a = ::memalign(segment::alignment, s); - if (a == nullptr) { - throw std::bad_alloc(); - } - clogger.trace("Allocated {} k buffer", s / 1024); - return buffer_type(reinterpret_cast(a), s, make_free_deleter(a)); -} - -void db::commitlog::segment_manager::release_buffer(buffer_type&& b) { - _temp_buffers.emplace_back(std::move(b)); - std::sort(_temp_buffers.begin(), _temp_buffers.end(), [](const buffer_type& b1, const buffer_type& b2) { - return b1.size() < b2.size(); - }); - - constexpr const size_t max_temp_buffers = 4; - - if (_temp_buffers.size() > max_temp_buffers) { - clogger.trace("Deleting {} buffers", _temp_buffers.size() - max_temp_buffers); - _temp_buffers.erase(_temp_buffers.begin() + max_temp_buffers, _temp_buffers.end()); - } - totals.buffer_list_bytes = boost::accumulate( - _temp_buffers | boost::adaptors::transformed(std::mem_fn(&buffer_type::size)), - size_t(0), std::plus()); -} - -/** - * Add mutation. - */ -future db::commitlog::add(const cf_id_type& id, - size_t size, commitlog::timeout_clock::time_point timeout, serializer_func func) { - class serializer_func_entry_writer final : public entry_writer { - serializer_func _func; - size_t _size; - public: - serializer_func_entry_writer(size_t sz, serializer_func func) - : _func(std::move(func)), _size(sz) - { } - virtual size_t size(segment&) override { return _size; } - virtual size_t size() override { return _size; } - virtual void write(segment&, output& out) override { - _func(out); - } - }; - auto writer = ::make_shared(size, std::move(func)); - return _segment_manager->allocate_when_possible(id, writer, timeout); -} - -future db::commitlog::add_entry(const cf_id_type& id, const commitlog_entry_writer& cew, timeout_clock::time_point timeout) -{ - class cl_entry_writer final : public entry_writer { - commitlog_entry_writer _writer; - public: - cl_entry_writer(const commitlog_entry_writer& wr) : _writer(wr) { } - virtual size_t size(segment& seg) override { - _writer.set_with_schema(!seg.is_schema_version_known(_writer.schema())); - return _writer.size(); - } - virtual size_t size() override { - return _writer.mutation_size(); - } - virtual void write(segment& seg, output& out) override { - if (_writer.with_schema()) { - seg.add_schema_version(_writer.schema()); - } - _writer.write(out); - } - }; - auto writer = ::make_shared(cew); - return _segment_manager->allocate_when_possible(id, writer, timeout); -} - -db::commitlog::commitlog(config cfg) - : _segment_manager(::make_shared(std::move(cfg))) { -} - -db::commitlog::commitlog(commitlog&& v) noexcept - : _segment_manager(std::move(v._segment_manager)) { -} - -db::commitlog::~commitlog() { - if (_segment_manager != nullptr) { - _segment_manager->orphan_all(); - } -} - -future db::commitlog::create_commitlog(config cfg) { - commitlog c(std::move(cfg)); - auto f = c._segment_manager->init(); - return f.then([c = std::move(c)]() mutable { - return make_ready_future(std::move(c)); - }); -} - -db::commitlog::flush_handler_anchor::flush_handler_anchor(flush_handler_anchor&& f) - : _cl(f._cl), _id(f._id) -{ - f._id = 0; -} - -db::commitlog::flush_handler_anchor::flush_handler_anchor(commitlog& cl, flush_handler_id id) - : _cl(cl), _id(id) -{} - -db::commitlog::flush_handler_anchor::~flush_handler_anchor() { - unregister(); -} - -db::commitlog::flush_handler_id db::commitlog::flush_handler_anchor::release() { - flush_handler_id id = 0; - std::swap(_id, id); - return id; -} - -void db::commitlog::flush_handler_anchor::unregister() { - auto id = release(); - if (id != 0) { - _cl.remove_flush_handler(id); - } -} - -db::commitlog::flush_handler_anchor db::commitlog::add_flush_handler(flush_handler h) { - return flush_handler_anchor(*this, _segment_manager->add_flush_handler(std::move(h))); -} - -void db::commitlog::remove_flush_handler(flush_handler_id id) { - _segment_manager->remove_flush_handler(id); -} - -void db::commitlog::discard_completed_segments(const cf_id_type& id, const rp_set& used) { - _segment_manager->discard_completed_segments(id, used); -} - -void db::commitlog::discard_completed_segments(const cf_id_type& id) { - _segment_manager->discard_completed_segments(id); -} - -future<> db::commitlog::sync_all_segments() { - return _segment_manager->sync_all_segments(); -} - -future<> db::commitlog::shutdown() { - return _segment_manager->shutdown(); -} - -future<> db::commitlog::release() { - return _segment_manager->orphan_all(); -} - -size_t db::commitlog::max_record_size() const { - return _segment_manager->max_mutation_size - segment::entry_overhead_size; -} - -uint64_t db::commitlog::max_active_writes() const { - return _segment_manager->cfg.max_active_writes; -} - -uint64_t db::commitlog::max_active_flushes() const { - return _segment_manager->cfg.max_active_flushes; -} - -future<> db::commitlog::clear() { - return _segment_manager->clear(); -} - -const db::commitlog::config& db::commitlog::active_config() const { - return _segment_manager->cfg; -} - -// No commit_io_check needed in the log reader since the database will fail -// on error at startup if required -future, db::replay_position>>> -db::commitlog::read_log_file(const sstring& filename, commit_load_reader_func next, position_type off) { - return open_checked_file_dma(commit_error_handler, filename, open_flags::ro).then([next = std::move(next), off](file f) { - return std::make_unique, replay_position>>( - read_log_file(std::move(f), std::move(next), off)); - }); -} - -// No commit_io_check needed in the log reader since the database will fail -// on error at startup if required -subscription, db::replay_position> -db::commitlog::read_log_file(file f, commit_load_reader_func next, position_type off) { - struct work { - private: - file_input_stream_options make_file_input_stream_options() { - file_input_stream_options fo; - fo.buffer_size = db::commitlog::segment::default_size; - fo.read_ahead = 10; - fo.io_priority_class = service::get_local_commitlog_priority(); - return fo; - } - public: - file f; - stream, replay_position> s; - input_stream fin; - input_stream r; - uint64_t id = 0; - size_t pos = 0; - size_t next = 0; - size_t start_off = 0; - size_t skip_to = 0; - size_t file_size = 0; - size_t corrupt_size = 0; - bool eof = false; - bool header = true; - bool failed = false; - - work(file f, position_type o = 0) - : f(f), fin(make_file_input_stream(f, 0, make_file_input_stream_options())), start_off(o) { - } - work(work&&) = default; - - bool advance(const temporary_buffer& buf) { - pos += buf.size(); - if (buf.size() == 0) { - eof = true; - } - return !eof; - } - bool end_of_file() const { - return eof; - } - bool end_of_chunk() const { - return eof || next == pos; - } - future<> skip(size_t bytes) { - skip_to = pos + bytes; - return do_until([this] { return pos == skip_to || eof; }, [this, bytes] { - auto s = std::min(4096, skip_to - pos); - // should eof be an error here? - return fin.read_exactly(s).then([this](auto buf) { - this->advance(buf); - }); - }); - } - future<> stop() { - eof = true; - return make_ready_future<>(); - } - future<> fail() { - failed = true; - return stop(); - } - future<> read_header() { - return fin.read_exactly(segment::descriptor_header_size).then([this](temporary_buffer buf) { - if (!advance(buf)) { - // zero length file. accept it just to be nice. - return make_ready_future<>(); - } - // Will throw if we got eof - data_input in(buf); - auto magic = in.read(); - auto ver = in.read(); - auto id = in.read(); - auto checksum = in.read(); - - if (magic == 0 && ver == 0 && id == 0 && checksum == 0) { - // let's assume this was an empty (pre-allocated) - // file. just skip it. - return stop(); - } - - if (magic != segment::segment_magic) { - throw std::invalid_argument("Not a scylla format commitlog file"); - } - crc32_nbo crc; - crc.process(ver); - crc.process(id & 0xffffffff); - crc.process(id >> 32); - - auto cs = crc.checksum(); - if (cs != checksum) { - throw std::runtime_error("Checksum error in file header"); - } - - this->id = id; - this->next = 0; - - return make_ready_future<>(); - }); - } - future<> read_chunk() { - return fin.read_exactly(segment::segment_overhead_size).then([this](temporary_buffer buf) { - auto start = pos; - - if (!advance(buf)) { - return make_ready_future<>(); - } - - data_input in(buf); - auto next = in.read(); - auto checksum = in.read(); - - if (next == 0 && checksum == 0) { - // in a pre-allocating world, this means eof - return stop(); - } - - crc32_nbo crc; - crc.process(id & 0xffffffff); - crc.process(id >> 32); - crc.process(start); - - auto cs = crc.checksum(); - if (cs != checksum) { - // if a chunk header checksum is broken, we shall just assume that all - // remaining is as well. We cannot trust the "next" pointer, so... - clogger.debug("Checksum error in segment chunk at {}.", pos); - corrupt_size += (file_size - pos); - return stop(); - } - - this->next = next; - - if (start_off >= next) { - return skip(next - pos); - } - - return do_until(std::bind(&work::end_of_chunk, this), std::bind(&work::read_entry, this)); - }); - } - future<> read_entry() { - static constexpr size_t entry_header_size = segment::entry_overhead_size - sizeof(uint32_t); - - /** - * #598 - Must check that data left in chunk is enough to even read an entry. - * If not, this is small slack space in the chunk end, and we should just go - * to the next. - */ - assert(pos <= next); - if ((pos + entry_header_size) >= next) { - return skip(next - pos); - } - - return fin.read_exactly(entry_header_size).then([this](temporary_buffer buf) { - replay_position rp(id, position_type(pos)); - - if (!advance(buf)) { - return make_ready_future<>(); - } - - data_input in(buf); - - auto size = in.read(); - auto checksum = in.read(); - - crc32_nbo crc; - crc.process(size); - - if (size < 3 * sizeof(uint32_t) || checksum != crc.checksum()) { - auto slack = next - pos; - if (size != 0) { - clogger.debug("Segment entry at {} has broken header. Skipping to next chunk ({} bytes)", rp, slack); - corrupt_size += slack; - } - // size == 0 -> special scylla case: zero padding due to dma blocks - return skip(slack); - } - - return fin.read_exactly(size - entry_header_size).then([this, size, crc = std::move(crc), rp](temporary_buffer buf) mutable { - advance(buf); - - data_input in(buf); - - auto data_size = size - segment::entry_overhead_size; - in.skip(data_size); - auto checksum = in.read(); - - crc.process_bytes(buf.get(), data_size); - - if (crc.checksum() != checksum) { - // If we're getting a checksum error here, most likely the rest of - // the file will be corrupt as well. But it does not hurt to retry. - // Just go to the next entry (since "size" in header seemed ok). - clogger.debug("Segment entry at {} checksum error. Skipping {} bytes", rp, size); - corrupt_size += size; - return make_ready_future<>(); - } - - return s.produce(buf.share(0, data_size), rp).handle_exception([this](auto ep) { - return this->fail(); - }); - }); - }); - } - future<> read_file() { - return f.size().then([this](uint64_t size) { - file_size = size; - }).then([this] { - return read_header().then( - [this] { - return do_until(std::bind(&work::end_of_file, this), std::bind(&work::read_chunk, this)); - }).then([this] { - if (corrupt_size > 0) { - throw segment_data_corruption_error("Data corruption", corrupt_size); - } - }); - }).finally([this] { - return fin.close(); - }); - } - }; - - auto w = make_lw_shared(std::move(f), off); - auto ret = w->s.listen(std::move(next)); - - w->s.started().then(std::bind(&work::read_file, w.get())).then([w] { - if (!w->failed) { - w->s.close(); - } - }).handle_exception([w](auto ep) { - w->s.set_exception(ep); - }); - - return ret; -} - -std::vector db::commitlog::get_active_segment_names() const { - return _segment_manager->get_active_names(); -} - -uint64_t db::commitlog::get_total_size() const { - return _segment_manager->totals.total_size; -} - -uint64_t db::commitlog::get_completed_tasks() const { - return _segment_manager->totals.allocation_count; -} - -uint64_t db::commitlog::get_flush_count() const { - return _segment_manager->totals.flush_count; -} - -uint64_t db::commitlog::get_pending_tasks() const { - return _segment_manager->totals.pending_flushes; -} - -uint64_t db::commitlog::get_pending_flushes() const { - return _segment_manager->totals.pending_flushes; -} - -uint64_t db::commitlog::get_pending_allocations() const { - return _segment_manager->pending_allocations(); -} - -uint64_t db::commitlog::get_flush_limit_exceeded_count() const { - return _segment_manager->totals.flush_limit_exceeded; -} - -uint64_t db::commitlog::get_num_segments_created() const { - return _segment_manager->totals.segments_created; -} - -uint64_t db::commitlog::get_num_segments_destroyed() const { - return _segment_manager->totals.segments_destroyed; -} - -uint64_t db::commitlog::get_num_dirty_segments() const { - return _segment_manager->get_num_dirty_segments(); -} - -uint64_t db::commitlog::get_num_active_segments() const { - return _segment_manager->get_num_active_segments(); -} - -future> db::commitlog::list_existing_descriptors() const { - return list_existing_descriptors(active_config().commit_log_location); -} - -future> db::commitlog::list_existing_descriptors(const sstring& dir) const { - return _segment_manager->list_descriptors(dir); -} - -future> db::commitlog::list_existing_segments() const { - return list_existing_segments(active_config().commit_log_location); -} - -future> db::commitlog::list_existing_segments(const sstring& dir) const { - return list_existing_descriptors(dir).then([dir](auto descs) { - std::vector paths; - std::transform(descs.begin(), descs.end(), std::back_inserter(paths), [&](auto& d) { - return dir + "/" + d.filename(); - }); - return make_ready_future>(std::move(paths)); - }); -} - -std::vector db::commitlog::get_segments_to_replay() { - return std::move(_segment_manager->_segments_to_replay); -} - -db::rp_handle::rp_handle() noexcept -{} - -db::rp_handle::rp_handle(shared_ptr h, cf_id_type cf, replay_position rp) noexcept - : _h(std::move(h)), _cf(cf), _rp(rp) -{} - -db::rp_handle::rp_handle(rp_handle&& v) noexcept - : _h(std::move(v._h)), _cf(v._cf), _rp(std::exchange(v._rp, {})) -{} - -db::rp_handle& db::rp_handle::operator=(rp_handle&& v) noexcept { - if (this != &v) { - this->~rp_handle(); - new (this) rp_handle(std::move(v)); - } - return *this; -} - -db::rp_handle::~rp_handle() { - if (_rp != replay_position() && _h) { - _h->release_cf_count(_cf); - } -} - -db::replay_position db::rp_handle::release() { - return std::exchange(_rp, {}); -} diff --git a/scylla/db/commitlog/commitlog.hh b/scylla/db/commitlog/commitlog.hh deleted file mode 100644 index 72df6e8..0000000 --- a/scylla/db/commitlog/commitlog.hh +++ /dev/null @@ -1,360 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include - -#include "utils/data_output.hh" -#include "core/future.hh" -#include "core/shared_ptr.hh" -#include "core/stream.hh" -#include "replay_position.hh" -#include "commitlog_entry.hh" - -namespace seastar { class file; } - -#include "seastarx.hh" - -namespace db { - -class config; -class rp_set; -class rp_handle; - -/* - * Commit Log tracks every write operation into the system. The aim of - * the commit log is to be able to successfully recover data that was - * not stored to disk via the Memtable. - * - * This impl is cassandra log format compatible (for what it is worth). - * The behaviour is similar, but not 100% identical as "stock cl". - * - * Files are managed with "normal" file writes (as normal as seastar - * gets) - no mmapping. Data is kept in internal buffers which, when - * full, are written to disk (see below). Files are also flushed - * periodically (or always), ensuring all data is written + writes are - * complete. - * - * In BATCH mode, every write to the log will also send the data to disk - * + issue a flush and wait for both to complete. - * - * In PERIODIC mode, most writes will only add to the internal memory - * buffers. If the mem buffer is saturated, data is sent to disk, but we - * don't wait for the write to complete. However, if periodic (timer) - * flushing has not been done in X ms, we will write + flush to file. In - * which case we wait for it. - * - * The commitlog does not guarantee any ordering between "add" callers - * (due to the above). The actual order in the commitlog is however - * identified by the replay_position returned. - * - * Like the stock cl, the log segments keep track of the highest dirty - * (added) internal position for a given table id (cf_id_type / UUID). - * Code should ensure to use discard_completed_segments with UUID + - * highest rp once a memtable has been flushed. This will allow - * discarding used segments. Failure to do so will keep stuff - * indefinately. - */ -class commitlog { -public: - using timeout_clock = lowres_clock; - - class segment_manager; - class segment; - - friend class rp_handle; -private: - ::shared_ptr _segment_manager; -public: - enum class sync_mode { - PERIODIC, BATCH - }; - struct config { - config() = default; - config(const config&) = default; - config(const db::config&); - - sstring commit_log_location; - uint64_t commitlog_total_space_in_mb = 0; - uint64_t commitlog_segment_size_in_mb = 32; - uint64_t commitlog_sync_period_in_ms = 10 * 1000; //TODO: verify default! - // Max number of segments to keep in pre-alloc reserve. - // Not (yet) configurable from scylla.conf. - uint64_t max_reserve_segments = 12; - // Max active writes/flushes. Default value - // zero means try to figure it out ourselves - uint64_t max_active_writes = 0; - uint64_t max_active_flushes = 0; - - sync_mode mode = sync_mode::PERIODIC; - }; - - struct descriptor { - private: - descriptor(std::pair p); - public: - static const std::string SEPARATOR; - static const std::string FILENAME_PREFIX; - static const std::string FILENAME_EXTENSION; - - descriptor(descriptor&&) = default; - descriptor(const descriptor&) = default; - descriptor(segment_id_type i, uint32_t v = 1); - descriptor(replay_position p); - descriptor(sstring filename); - - sstring filename() const; - operator replay_position() const; - - const segment_id_type id; - const uint32_t ver; - }; - - commitlog(commitlog&&) noexcept; - ~commitlog(); - - /** - * Commitlog is created via a factory func. - * This of course because it needs to access disk to get up to speed. - * Optionally, could have an "init" func and require calling this. - */ - static future create_commitlog(config); - - - /** - * Note: To be able to keep impl out of header file, - * actual data writing is done via a std::function. - * If it is proven that this has unacceptable overhead, this can be replace - * by iter an interface or move segments and stuff into the header. But - * I hope not. - * - * A serializing func is provided along with a parameter indicating the size - * of data to be written. (See add). - * Don't write less, absolutely don't write more... - */ - using output = data_output; - using serializer_func = std::function; - - /** - * Add a "Mutation" to the commit log. - * - * Resolves with timed_out_error when timeout is reached. - * - * @param mutation_func a function that writes 'size' bytes to the log, representing the mutation. - */ - future add(const cf_id_type& id, size_t size, timeout_clock::time_point timeout, serializer_func mutation_func); - - /** - * Template version of add. - * Resolves with timed_out_error when timeout is reached. - * @param mu an invokable op that generates the serialized data. (Of size bytes) - */ - template - future add_mutation(const cf_id_type& id, size_t size, timeout_clock::time_point timeout, _MutationOp&& mu) { - return add(id, size, timeout, [mu = std::forward<_MutationOp>(mu)](output& out) { - mu(out); - }); - } - - /** - * Template version of add. - * @param mu an invokable op that generates the serialized data. (Of size bytes) - */ - template - future add_mutation(const cf_id_type& id, size_t size, _MutationOp&& mu) { - return add_mutation(id, size, timeout_clock::time_point::max(), std::forward<_MutationOp>(mu)); - } - - /** - * Add an entry to the commit log. - * Resolves with timed_out_error when timeout is reached. - * @param entry_writer a writer responsible for writing the entry - */ - future add_entry(const cf_id_type& id, const commitlog_entry_writer& entry_writer, timeout_clock::time_point timeout); - - /** - * Modifies the per-CF dirty cursors of any commit log segments for the column family according to the position - * given. Discards any commit log segments that are no longer used. - * - * @param cfId the column family ID that was flushed - * @param rp_set the replay positions of the flush - */ - void discard_completed_segments(const cf_id_type&, const rp_set&); - - void discard_completed_segments(const cf_id_type&); - - /** - * A 'flush_handler' is invoked when the CL determines that size on disk has - * exceeded allowable threshold. It is called once for every currently active - * CF id with the highest replay_position which we would prefer to free "until". - * I.e. a the highest potentially freeable position in the CL. - * - * Whatever the callback does to help (or not) this desire is up to him. - * This is called synchronously, so callee might want to instigate async ops - * in the background. - * - */ - typedef std::function flush_handler; - typedef uint64_t flush_handler_id; - - class flush_handler_anchor { - public: - friend class commitlog; - ~flush_handler_anchor(); - flush_handler_anchor(flush_handler_anchor&&); - flush_handler_anchor(const flush_handler_anchor&) = delete; - - flush_handler_id release(); // disengage anchor - danger danger. - void unregister(); - - private: - flush_handler_anchor(commitlog&, flush_handler_id); - - commitlog & _cl; - flush_handler_id _id; - }; - - flush_handler_anchor add_flush_handler(flush_handler); - void remove_flush_handler(flush_handler_id); - - /** - * Returns a vector of the segment names - */ - std::vector get_active_segment_names() const; - - /** - * Returns a vector of segment paths which were - * preexisting when this instance of commitlog was created. - * - * The list will be empty when called for the second time. - */ - std::vector get_segments_to_replay(); - - uint64_t get_total_size() const; - uint64_t get_completed_tasks() const; - uint64_t get_flush_count() const; - uint64_t get_pending_tasks() const; - uint64_t get_pending_flushes() const; - uint64_t get_pending_allocations() const; - uint64_t get_flush_limit_exceeded_count() const; - uint64_t get_num_segments_created() const; - uint64_t get_num_segments_destroyed() const; - /** - * Get number of inactive (finished), segments lingering - * due to still being dirty - */ - uint64_t get_num_dirty_segments() const; - /** - * Get number of active segments, i.e. still being allocated to - */ - uint64_t get_num_active_segments() const; - - /** - * Returns the largest amount of data that can be written in a single "mutation". - */ - size_t max_record_size() const; - - /** - * Return max allowed pending writes (per this shard) - */ - uint64_t max_active_writes() const; - /** - * Return max allowed pending flushes (per this shard) - */ - uint64_t max_active_flushes() const; - - future<> clear(); - - const config& active_config() const; - - /** - * Issues disk sync on all (allocating) segments. I.e. ensures that - * all data written up until this call is indeed on disk. - * _However_, if you issue new "add" ops while this is executing, - * those can/will be missed. - */ - future<> sync_all_segments(); - /** - * Shuts everything down and causes any - * incoming writes to throw exceptions - */ - future<> shutdown(); - /** - * Ensure segments are released, even if we don't free the - * commitlog proper. (hint, our shutdown is "partial") - */ - future<> release(); - - future> list_existing_descriptors() const; - future> list_existing_descriptors(const sstring& dir) const; - - future> list_existing_segments() const; - future> list_existing_segments(const sstring& dir) const; - - typedef std::function(temporary_buffer, replay_position)> commit_load_reader_func; - - class segment_data_corruption_error: public std::runtime_error { - public: - segment_data_corruption_error(std::string msg, uint64_t s) - : std::runtime_error(msg), _bytes(s) { - } - uint64_t bytes() const { - return _bytes; - } - private: - uint64_t _bytes; - }; - - static subscription, replay_position> read_log_file(file, commit_load_reader_func, position_type = 0); - static future, replay_position>>> read_log_file( - const sstring&, commit_load_reader_func, position_type = 0); -private: - commitlog(config); - - struct entry_writer { - virtual size_t size(segment&) = 0; - // Returns segment-independent size of the entry. Must be <= than segment-dependant size. - virtual size_t size() = 0; - virtual void write(segment&, output&) = 0; - }; -}; - -} diff --git a/scylla/db/commitlog/commitlog_entry.cc b/scylla/db/commitlog/commitlog_entry.cc deleted file mode 100644 index a742370..0000000 --- a/scylla/db/commitlog/commitlog_entry.cc +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright 2016 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "counters.hh" -#include "commitlog_entry.hh" -#include "idl/uuid.dist.hh" -#include "idl/keys.dist.hh" -#include "idl/frozen_mutation.dist.hh" -#include "idl/mutation.dist.hh" -#include "idl/commitlog.dist.hh" -#include "serializer_impl.hh" -#include "serialization_visitors.hh" -#include "idl/uuid.dist.impl.hh" -#include "idl/keys.dist.impl.hh" -#include "idl/frozen_mutation.dist.impl.hh" -#include "idl/mutation.dist.impl.hh" -#include "idl/commitlog.dist.impl.hh" - -template -void commitlog_entry_writer::serialize(Output& out) const { - [this, wr = ser::writer_of_commitlog_entry(out)] () mutable { - if (_with_schema) { - return std::move(wr).write_mapping(_schema->get_column_mapping()); - } else { - return std::move(wr).skip_mapping(); - } - }().write_mutation(_mutation).end_commitlog_entry(); -} - -void commitlog_entry_writer::compute_size() { - seastar::measuring_output_stream ms; - serialize(ms); - _size = ms.size(); -} - -void commitlog_entry_writer::write(data_output& out) const { - seastar::simple_output_stream str(out.reserve(size()), size()); - serialize(str); -} - -commitlog_entry_reader::commitlog_entry_reader(const temporary_buffer& buffer) - : _ce([&] { - seastar::simple_input_stream in(buffer.get(), buffer.size()); - return ser::deserialize(in, boost::type()); -}()) -{ -} diff --git a/scylla/db/commitlog/commitlog_entry.hh b/scylla/db/commitlog/commitlog_entry.hh deleted file mode 100644 index fc23f25..0000000 --- a/scylla/db/commitlog/commitlog_entry.hh +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright 2016 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include - -#include "frozen_mutation.hh" -#include "schema.hh" -#include "utils/data_output.hh" -#include "stdx.hh" - -class commitlog_entry { - stdx::optional _mapping; - frozen_mutation _mutation; -public: - commitlog_entry(stdx::optional mapping, frozen_mutation&& mutation) - : _mapping(std::move(mapping)), _mutation(std::move(mutation)) { } - const stdx::optional& mapping() const { return _mapping; } - const frozen_mutation& mutation() const { return _mutation; } -}; - -class commitlog_entry_writer { - schema_ptr _schema; - const frozen_mutation& _mutation; - bool _with_schema = true; - size_t _size = std::numeric_limits::max(); -private: - template - void serialize(Output&) const; - void compute_size(); -public: - commitlog_entry_writer(schema_ptr s, const frozen_mutation& fm) - : _schema(std::move(s)), _mutation(fm) - {} - - void set_with_schema(bool value) { - _with_schema = value; - compute_size(); - } - bool with_schema() { - return _with_schema; - } - schema_ptr schema() const { - return _schema; - } - - size_t size() const { - assert(_size != std::numeric_limits::max()); - return _size; - } - - size_t mutation_size() const { - return _mutation.representation().size(); - } - - void write(data_output& out) const; -}; - -class commitlog_entry_reader { - commitlog_entry _ce; -public: - commitlog_entry_reader(const temporary_buffer& buffer); - - const stdx::optional& get_column_mapping() const { return _ce.mapping(); } - const frozen_mutation& mutation() const { return _ce.mutation(); } -}; diff --git a/scylla/db/commitlog/commitlog_replayer.cc b/scylla/db/commitlog/commitlog_replayer.cc deleted file mode 100644 index ffd73ed..0000000 --- a/scylla/db/commitlog/commitlog_replayer.cc +++ /dev/null @@ -1,398 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/* - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include -#include -#include -#include -#include - -#include -#include - -#include "commitlog.hh" -#include "commitlog_replayer.hh" -#include "database.hh" -#include "sstables/sstables.hh" -#include "db/system_keyspace.hh" -#include "cql3/query_processor.hh" -#include "log.hh" -#include "converting_mutation_partition_applier.hh" -#include "schema_registry.hh" -#include "commitlog_entry.hh" - -static logging::logger rlogger("commitlog_replayer"); - -class db::commitlog_replayer::impl { - struct column_mappings { - std::unordered_map map; - future<> stop() { return make_ready_future<>(); } - }; - - // we want the processing methods to be const, since they use - // shard-sharing of data -> read only - // this one is special since it is thread local. - // Should actually make sharded::local a const function (it does - // not modify content), but... - mutable seastar::sharded _column_mappings; - - friend class db::commitlog_replayer; -public: - impl(seastar::sharded& db); - - future<> init(); - - struct stats { - uint64_t invalid_mutations = 0; - uint64_t skipped_mutations = 0; - uint64_t applied_mutations = 0; - uint64_t corrupt_bytes = 0; - - stats& operator+=(const stats& s) { - invalid_mutations += s.invalid_mutations; - skipped_mutations += s.skipped_mutations; - applied_mutations += s.applied_mutations; - corrupt_bytes += s.corrupt_bytes; - return *this; - } - stats operator+(const stats& s) const { - stats tmp = *this; - tmp += s; - return tmp; - } - }; - - // move start/stop of the thread local bookkeep to "top level" - // and also make sure to assert on it actually being started. - future<> start() { - return _column_mappings.start(); - } - future<> stop() { - return _column_mappings.stop(); - } - - future<> process(stats*, temporary_buffer buf, replay_position rp) const; - future recover(sstring file) const; - - typedef std::unordered_map rp_map; - typedef std::unordered_map shard_rpm_map; - typedef std::unordered_map shard_rp_map; - - replay_position min_pos(unsigned shard) const { - auto i = _min_pos.find(shard); - return i != _min_pos.end() ? i->second : replay_position(); - } - replay_position cf_min_pos(const utils::UUID& uuid, unsigned shard) const { - auto i = _rpm.find(shard); - if (i == _rpm.end()) { - return replay_position(); - } - auto j = i->second.find(uuid); - return j != i->second.end() ? j->second : replay_position(); - } - - seastar::sharded& - _qp; - shard_rpm_map - _rpm; - shard_rp_map - _min_pos; -}; - -db::commitlog_replayer::impl::impl(seastar::sharded& qp) - : _qp(qp) -{} - -future<> db::commitlog_replayer::impl::init() { - return _qp.map_reduce([this](shard_rpm_map map) { - for (auto& p1 : map) { - for (auto& p2 : p1.second) { - auto& pp = _rpm[p1.first][p2.first]; - pp = std::max(pp, p2.second); - - auto i = _min_pos.find(p1.first); - if (i == _min_pos.end() || p2.second < i->second) { - _min_pos[p1.first] = p2.second; - } - } - } - }, [this](cql3::query_processor& qp) { - return do_with(shard_rpm_map{}, [this, &qp](shard_rpm_map& map) { - return parallel_for_each(qp.db().local().get_column_families(), [&map, &qp](auto& cfp) { - auto uuid = cfp.first; - // We do this on each cpu, for each CF, which technically is a little wasteful, but the values are - // cached, this is only startup, and it makes the code easier. - // Get all truncation records for the CF and initialize max rps if - // present. Cannot do this on demand, as there may be no sstables to - // mark the CF as "needed". - return db::system_keyspace::get_truncated_position(uuid).then([&map, &uuid](std::vector tpps) { - for (auto& p : tpps) { - rlogger.trace("CF {} truncated at {}", uuid, p); - auto& pp = map[p.shard_id()][uuid]; - pp = std::max(pp, p); - } - }); - }).then([&map] { - return make_ready_future(map); - }); - }); - }).finally([this] { - // bugfix: the above map-reduce will not_ detect if sstables - // are _missing_ from a CF. And because of re-sharding, we can't - // just insert initial zeros into the maps, because we don't know - // how many shards there was last time. - // However, this only affects global min pos, since - // for each CF, the worst that happens is that we have a missing - // entry -> empty replay_pos == min value. But calculating - // global min pos will be off, since we will only base it on - // existing sstables-per-shard. - // So, go through all CF:s and check, if a shard mapping does not - // have data for it, assume we must set global pos to zero. - for (auto&p : _qp.local().db().local().get_column_families()) { - for (auto&p1 : _rpm) { // for each shard - if (!p1.second.count(p.first)) { - _min_pos[p1.first] = replay_position(); - } - } - } - for (auto&p : _min_pos) { - rlogger.debug("minimum position for shard {}: {}", p.first, p.second); - } - for (auto&p1 : _rpm) { - for (auto& p2 : p1.second) { - rlogger.debug("replay position for shard/uuid {}/{}: {}", p1.first, p2.first, p2.second); - } - } - }); -} - -future -db::commitlog_replayer::impl::recover(sstring file) const { - assert(_column_mappings.local_is_initialized()); - - replay_position rp{commitlog::descriptor(file)}; - auto gp = min_pos(rp.shard_id()); - - if (rp.id < gp.id) { - rlogger.debug("skipping replay of fully-flushed {}", file); - return make_ready_future(); - } - position_type p = 0; - if (rp.id == gp.id) { - p = gp.pos; - } - - auto s = make_lw_shared(); - - return db::commitlog::read_log_file(file, - std::bind(&impl::process, this, s.get(), std::placeholders::_1, - std::placeholders::_2), p).then([](auto s) { - auto f = s->done(); - return f.finally([s = std::move(s)] {}); - }).then_wrapped([s](future<> f) { - try { - f.get(); - } catch (commitlog::segment_data_corruption_error& e) { - s->corrupt_bytes += e.bytes(); - } catch (...) { - throw; - } - return make_ready_future(*s); - }); -} - -future<> db::commitlog_replayer::impl::process(stats* s, temporary_buffer buf, replay_position rp) const { - try { - - commitlog_entry_reader cer(buf); - auto& fm = cer.mutation(); - - auto& local_cm = _column_mappings.local().map; - auto cm_it = local_cm.find(fm.schema_version()); - if (cm_it == local_cm.end()) { - if (!cer.get_column_mapping()) { - throw std::runtime_error(sprint("unknown schema version {}", fm.schema_version())); - } - rlogger.debug("new schema version {} in entry {}", fm.schema_version(), rp); - cm_it = local_cm.emplace(fm.schema_version(), *cer.get_column_mapping()).first; - } - const column_mapping& src_cm = cm_it->second; - - auto shard_id = rp.shard_id(); - if (rp < min_pos(shard_id)) { - rlogger.trace("entry {} is less than global min position. skipping", rp); - s->skipped_mutations++; - return make_ready_future<>(); - } - - auto uuid = fm.column_family_id(); - auto cf_rp = cf_min_pos(uuid, shard_id); - if (rp <= cf_rp) { - rlogger.trace("entry {} at {} is younger than recorded replay position {}. skipping", fm.column_family_id(), rp, cf_rp); - s->skipped_mutations++; - return make_ready_future<>(); - } - - auto shard = _qp.local().db().local().shard_of(fm); - return _qp.local().db().invoke_on(shard, [this, cer = std::move(cer), &src_cm, rp, shard, s] (database& db) -> future<> { - auto& fm = cer.mutation(); - // TODO: might need better verification that the deserialized mutation - // is schema compatible. My guess is that just applying the mutation - // will not do this. - auto& cf = db.find_column_family(fm.column_family_id()); - - if (rlogger.is_enabled(logging::log_level::debug)) { - rlogger.debug("replaying at {} v={} {}:{} at {}", fm.column_family_id(), fm.schema_version(), - cf.schema()->ks_name(), cf.schema()->cf_name(), rp); - } - // Removed forwarding "new" RP. Instead give none/empty. - // This is what origin does, and it should be fine. - // The end result should be that once sstables are flushed out - // their "replay_position" attribute will be empty, which is - // lower than anything the new session will produce. - if (cf.schema()->version() != fm.schema_version()) { - auto& local_cm = _column_mappings.local().map; - auto cm_it = local_cm.find(fm.schema_version()); - if (cm_it == local_cm.end()) { - cm_it = local_cm.emplace(fm.schema_version(), src_cm).first; - } - const column_mapping& cm = cm_it->second; - mutation m(fm.decorated_key(*cf.schema()), cf.schema()); - converting_mutation_partition_applier v(cm, *cf.schema(), m.partition()); - fm.partition().accept(cm, v); - cf.apply(std::move(m)); - } else { - cf.apply(fm, cf.schema()); - } - s->applied_mutations++; - return make_ready_future<>(); - }).handle_exception([s](auto ep) { - s->invalid_mutations++; - // TODO: write mutation to file like origin. - rlogger.warn("error replaying: {}", ep); - }); - } catch (no_such_column_family&) { - // No such CF now? Origin just ignores this. - } catch (...) { - s->invalid_mutations++; - // TODO: write mutation to file like origin. - rlogger.warn("error replaying: {}", std::current_exception()); - } - - return make_ready_future<>(); -} - -db::commitlog_replayer::commitlog_replayer(seastar::sharded& qp) - : _impl(std::make_unique(qp)) -{} - -db::commitlog_replayer::commitlog_replayer(commitlog_replayer&& r) noexcept - : _impl(std::move(r._impl)) -{} - -db::commitlog_replayer::~commitlog_replayer() -{} - -future db::commitlog_replayer::create_replayer(seastar::sharded& qp) { - return do_with(commitlog_replayer(qp), [](auto&& rp) { - auto f = rp._impl->init(); - return f.then([rp = std::move(rp)]() mutable { - return make_ready_future(std::move(rp)); - }); - }); -} - -future<> db::commitlog_replayer::recover(std::vector files) { - typedef std::unordered_multimap shard_file_map; - - rlogger.info("Replaying {}", join(", ", files)); - - // pre-compute work per shard already. - auto map = ::make_lw_shared(); - for (auto& f : files) { - commitlog::descriptor d(f); - replay_position p = d; - map->emplace(p.shard_id() % smp::count, std::move(f)); - } - - return _impl->start().then([this, map] { - return map_reduce(smp::all_cpus(), [this, map](unsigned id) { - return smp::submit_to(id, [this, id, map]() { - auto total = ::make_lw_shared(); - // TODO: or something. For now, we do this serialized per shard, - // to reduce mutation congestion. We could probably (says avi) - // do 2 segments in parallel or something, but lets use this first. - auto range = map->equal_range(id); - return do_for_each(range.first, range.second, [this, total](const std::pair& p) { - auto&f = p.second; - rlogger.debug("Replaying {}", f); - return _impl->recover(f).then([f, total](impl::stats stats) { - if (stats.corrupt_bytes != 0) { - rlogger.warn("Corrupted file: {}. {} bytes skipped.", f, stats.corrupt_bytes); - } - rlogger.debug("Log replay of {} complete, {} replayed mutations ({} invalid, {} skipped)" - , f - , stats.applied_mutations - , stats.invalid_mutations - , stats.skipped_mutations - ); - *total += stats; - }); - }).then([total] { - return make_ready_future(*total); - }); - }); - }, impl::stats(), std::plus()).then([](impl::stats totals) { - rlogger.info("Log replay complete, {} replayed mutations ({} invalid, {} skipped)" - , totals.applied_mutations - , totals.invalid_mutations - , totals.skipped_mutations - ); - }); - }).finally([this] { - return _impl->stop(); - }); -} - -future<> db::commitlog_replayer::recover(sstring f) { - return recover(std::vector{ f }); -} - diff --git a/scylla/db/commitlog/commitlog_replayer.hh b/scylla/db/commitlog/commitlog_replayer.hh deleted file mode 100644 index d00631e..0000000 --- a/scylla/db/commitlog/commitlog_replayer.hh +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/* - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include -#include -#include - -class database; - -namespace cql3 { - class query_processor; -} - -namespace db { - -class commitlog; - -class commitlog_replayer { -public: - commitlog_replayer(commitlog_replayer&&) noexcept; - ~commitlog_replayer(); - - static future create_replayer(seastar::sharded&); - - future<> recover(std::vector files); - future<> recover(sstring file); - -private: - commitlog_replayer(seastar::sharded&); - - class impl; - std::unique_ptr _impl; -}; - -} diff --git a/scylla/db/commitlog/replay_position.hh b/scylla/db/commitlog/replay_position.hh deleted file mode 100644 index 16856ed..0000000 --- a/scylla/db/commitlog/replay_position.hh +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include -#include "utils/UUID.hh" - - -namespace db { - -using segment_id_type = uint64_t; -using position_type = uint32_t; - -struct replay_position { - static const constexpr size_t max_cpu_bits = 10; // 1024 cpus. should be enough for anyone - static const constexpr size_t max_ts_bits = 8 * sizeof(segment_id_type) - max_cpu_bits; - static const constexpr segment_id_type ts_mask = (segment_id_type(1) << max_ts_bits) - 1; - static const constexpr segment_id_type cpu_mask = ~ts_mask; - - segment_id_type id; - position_type pos; - - replay_position(segment_id_type i = 0, position_type p = 0) - : id(i), pos(p) - {} - - replay_position(unsigned shard, segment_id_type i, position_type p = 0) - : id((segment_id_type(shard) << max_ts_bits) | i), pos(p) - { - if (i & cpu_mask) { - throw std::invalid_argument("base id overflow: " + std::to_string(i)); - } - } - - bool operator<(const replay_position & r) const { - return id < r.id ? true : (r.id < id ? false : pos < r.pos); - } - bool operator<=(const replay_position & r) const { - return !(r < *this); - } - bool operator==(const replay_position & r) const { - return id == r.id && pos == r.pos; - } - bool operator!=(const replay_position & r) const { - return !(*this == r); - } - - unsigned shard_id() const { - return unsigned(id >> max_ts_bits); - } - segment_id_type base_id() const { - return id & ts_mask; - } - replay_position base() const { - return replay_position(base_id(), pos); - } - - template - auto describe_type(Describer f) { return f(id, pos); } -}; - -class commitlog; -class cf_holder; - -using cf_id_type = utils::UUID; - -class rp_handle { -public: - rp_handle() noexcept; - rp_handle(rp_handle&&) noexcept; - rp_handle& operator=(rp_handle&&) noexcept; - ~rp_handle(); - - replay_position release(); - - operator bool() const { - return _h && _rp != replay_position(); - } - operator const replay_position&() const { - return _rp; - } - const replay_position& rp() const { - return _rp; - } -private: - friend class commitlog; - - rp_handle(shared_ptr, cf_id_type, replay_position) noexcept; - - ::shared_ptr _h; - cf_id_type _cf; - replay_position _rp; -}; - - -std::ostream& operator<<(std::ostream& out, const replay_position& s); - -} diff --git a/scylla/db/commitlog/rp_set.hh b/scylla/db/commitlog/rp_set.hh deleted file mode 100644 index edf8e48..0000000 --- a/scylla/db/commitlog/rp_set.hh +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Modified by ScyllaDB - * Copyright (C) 2016 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include - -#include "replay_position.hh" -#include "commitlog.hh" - -namespace db { - -class rp_set { -public: - typedef std::unordered_map usage_map; - - rp_set() - {} - rp_set(const replay_position & rp) - { - put(rp); - } - rp_set(rp_set&&) = default; - - rp_set& operator=(rp_set&&) = default; - - void put(const replay_position& rp) { - _usage[rp.id]++; - } - void put(rp_handle && h) { - if (h) { - put(h.rp()); - } - h.release(); - } - - size_t size() const { - return _usage.size(); - } - bool empty() const { - return _usage.empty(); - } - - const usage_map& usage() const { - return _usage; - } -private: - usage_map _usage; -}; - -} diff --git a/scylla/db/consistency_level.cc b/scylla/db/consistency_level.cc deleted file mode 100644 index 465b440..0000000 --- a/scylla/db/consistency_level.cc +++ /dev/null @@ -1,333 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "db/consistency_level.hh" - -#include -#include -#include -#include "exceptions/exceptions.hh" -#include "core/sstring.hh" -#include "schema.hh" -#include "database.hh" -#include "unimplemented.hh" -#include "db/read_repair_decision.hh" -#include "locator/abstract_replication_strategy.hh" -#include "locator/network_topology_strategy.hh" -#include "utils/fb_utilities.hh" -#include "heat_load_balance.hh" - -namespace db { - -logging::logger cl_logger("consistency"); - -size_t quorum_for(keyspace& ks) { - return (ks.get_replication_strategy().get_replication_factor() / 2) + 1; -} - -size_t local_quorum_for(keyspace& ks, const sstring& dc) { - using namespace locator; - - auto& rs = ks.get_replication_strategy(); - - if (rs.get_type() == replication_strategy_type::network_topology) { - network_topology_strategy* nrs = - static_cast(&rs); - - return (nrs->get_replication_factor(dc) / 2) + 1; - } - - return quorum_for(ks); -} - -size_t block_for_local_serial(keyspace& ks) { - using namespace locator; - - // - // TODO: Consider caching the final result in order to avoid all these - // useless dereferencing. Note however that this will introduce quite - // a lot of complications since both snitch output for a local host - // and the snitch itself (and thus its output) may change dynamically. - // - auto& snitch_ptr = i_endpoint_snitch::get_local_snitch_ptr(); - auto local_addr = utils::fb_utilities::get_broadcast_address(); - - return local_quorum_for(ks, snitch_ptr->get_datacenter(local_addr)); -} - -size_t block_for_each_quorum(keyspace& ks) { - using namespace locator; - - auto& rs = ks.get_replication_strategy(); - - if (rs.get_type() == replication_strategy_type::network_topology) { - network_topology_strategy* nrs = - static_cast(&rs); - size_t n = 0; - - for (auto& dc : nrs->get_datacenters()) { - n += local_quorum_for(ks, dc); - } - - return n; - } else { - return quorum_for(ks); - } -} - -size_t block_for(keyspace& ks, consistency_level cl) { - switch (cl) { - case consistency_level::ONE: - case consistency_level::LOCAL_ONE: - return 1; - case consistency_level::ANY: - return 1; - case consistency_level::TWO: - return 2; - case consistency_level::THREE: - return 3; - case consistency_level::QUORUM: - case consistency_level::SERIAL: - return quorum_for(ks); - case consistency_level::ALL: - return ks.get_replication_strategy().get_replication_factor(); - case consistency_level::LOCAL_QUORUM: - case consistency_level::LOCAL_SERIAL: - return block_for_local_serial(ks); - case consistency_level::EACH_QUORUM: - return block_for_each_quorum(ks); - default: - abort(); - } -} - -bool is_datacenter_local(consistency_level l) { - return l == consistency_level::LOCAL_ONE || l == consistency_level::LOCAL_QUORUM; -} - -bool is_local(gms::inet_address endpoint) { - using namespace locator; - - auto& snitch_ptr = i_endpoint_snitch::get_local_snitch_ptr(); - auto local_addr = utils::fb_utilities::get_broadcast_address(); - - return snitch_ptr->get_datacenter(local_addr) == - snitch_ptr->get_datacenter(endpoint); -} - -std::vector -filter_for_query(consistency_level cl, - keyspace& ks, - std::vector live_endpoints, - read_repair_decision read_repair, gms::inet_address* extra, column_family* cf) { - size_t local_count; - - if (read_repair == read_repair_decision::GLOBAL) { // take RRD.GLOBAL out of the way - return std::move(live_endpoints); - } - - if (read_repair == read_repair_decision::DC_LOCAL || is_datacenter_local(cl)) { - auto it = boost::range::stable_partition(live_endpoints, is_local); - local_count = std::distance(live_endpoints.begin(), it); - if (is_datacenter_local(cl)) { - live_endpoints.erase(it, live_endpoints.end()); - } - } - - size_t bf = block_for(ks, cl); - - if (read_repair == read_repair_decision::DC_LOCAL) { - bf = std::max(block_for(ks, cl), local_count); - } - - if (bf >= live_endpoints.size()) { // RRD.DC_LOCAL + CL.LOCAL or CL.ALL - return std::move(live_endpoints); - } - - if (cf) { - auto get_hit_rate = [cf] (gms::inet_address ep) -> float { - constexpr float max_hit_rate = 0.999; - auto ht = cf->get_hit_rate(ep); - if (float(ht.rate) < 0) { - return float(ht.rate); - } else if (lowres_clock::now() - ht.last_updated > std::chrono::milliseconds(1000)) { - // if a cache entry is not updates for a while try to send traffic there - // to get more up to date data, mark it updated to not send to much traffic there - cf->set_hit_rate(ep, ht.rate); - return max_hit_rate; - } else { - return std::min(float(ht.rate), max_hit_rate); // calculation below cannot work with hit rate 1 - } - }; - - float ht_max = 0; - float ht_min = 1; - bool old_node = false; - - auto epi = boost::copy_range>>(live_endpoints | boost::adaptors::transformed([&] (gms::inet_address ep) { - auto ht = get_hit_rate(ep); - old_node = old_node || ht < 0; - ht_max = std::max(ht_max, ht); - ht_min = std::min(ht_min, ht); - return std::make_pair(ep, ht); - })); - - if (!old_node && ht_max - ht_min > 0.01) { // if there is old node or hit rates are close skip calculations - // local node is always first if present (see storage_proxy::get_live_sorted_endpoints) - unsigned local_idx = epi[0].first == utils::fb_utilities::get_broadcast_address() ? 0 : epi.size() + 1; - live_endpoints = miss_equalizing_combination(epi, local_idx, bf, bool(extra)); - } - } - - if (extra) { - *extra = live_endpoints[bf]; // extra replica for speculation - } - - live_endpoints.erase(live_endpoints.begin() + bf, live_endpoints.end()); - - return std::move(live_endpoints); -} - -std::vector filter_for_query(consistency_level cl, keyspace& ks, std::vector& live_endpoints, column_family* cf) { - return filter_for_query(cl, ks, live_endpoints, read_repair_decision::NONE, nullptr, cf); -} - -bool -is_sufficient_live_nodes(consistency_level cl, - keyspace& ks, - const std::vector& live_endpoints) { - using namespace locator; - - switch (cl) { - case consistency_level::ANY: - // local hint is acceptable, and local node is always live - return true; - case consistency_level::LOCAL_ONE: - return count_local_endpoints(live_endpoints) >= 1; - case consistency_level::LOCAL_QUORUM: - return count_local_endpoints(live_endpoints) >= block_for(ks, cl); - case consistency_level::EACH_QUORUM: - { - auto& rs = ks.get_replication_strategy(); - - if (rs.get_type() == replication_strategy_type::network_topology) { - for (auto& entry : count_per_dc_endpoints(ks, live_endpoints)) { - if (entry.second.live < local_quorum_for(ks, entry.first)) { - return false; - } - } - - return true; - } - } - // Fallthough on purpose for SimpleStrategy - default: - return live_endpoints.size() >= block_for(ks, cl); - } -} - -void validate_for_read(const sstring& keyspace_name, consistency_level cl) { - switch (cl) { - case consistency_level::ANY: - throw exceptions::invalid_request_exception("ANY ConsistencyLevel is only supported for writes"); - case consistency_level::EACH_QUORUM: - throw exceptions::invalid_request_exception("EACH_QUORUM ConsistencyLevel is only supported for writes"); - default: - break; - } -} - -void validate_for_write(const sstring& keyspace_name, consistency_level cl) { - switch (cl) { - case consistency_level::SERIAL: - case consistency_level::LOCAL_SERIAL: - throw exceptions::invalid_request_exception("You must use conditional updates for serializable writes"); - default: - break; - } -} - -#if 0 - // This is the same than validateForWrite really, but we include a slightly different error message for SERIAL/LOCAL_SERIAL - public void validateForCasCommit(String keyspaceName) throws InvalidRequestException - { - switch (this) - { - case EACH_QUORUM: - requireNetworkTopologyStrategy(keyspaceName); - break; - case SERIAL: - case LOCAL_SERIAL: - throw new InvalidRequestException(this + " is not supported as conditional update commit consistency. Use ANY if you mean \"make sure it is accepted but I don't care how many replicas commit it for non-SERIAL reads\""); - } - } - - public void validateForCas() throws InvalidRequestException - { - if (!isSerialConsistency()) - throw new InvalidRequestException("Invalid consistency for conditional update. Must be one of SERIAL or LOCAL_SERIAL"); - } -#endif - -bool is_serial_consistency(consistency_level cl) { - return cl == consistency_level::SERIAL || cl == consistency_level::LOCAL_SERIAL; -} - -void validate_counter_for_write(schema_ptr s, consistency_level cl) { - if (cl == consistency_level::ANY) { - throw exceptions::invalid_request_exception(sprint("Consistency level ANY is not yet supported for counter table %s", s->cf_name())); - } - - if (is_serial_consistency(cl)) { - throw exceptions::invalid_request_exception("Counter operations are inherently non-serializable"); - } -} - -#if 0 - private void requireNetworkTopologyStrategy(String keyspaceName) throws InvalidRequestException - { - AbstractReplicationStrategy strategy = Keyspace.open(keyspaceName).getReplicationStrategy(); - if (!(strategy instanceof NetworkTopologyStrategy)) - throw new InvalidRequestException(String.format("consistency level %s not compatible with replication strategy (%s)", this, strategy.getClass().getName())); - } -#endif - -} diff --git a/scylla/db/consistency_level.hh b/scylla/db/consistency_level.hh deleted file mode 100644 index 2156b2f..0000000 --- a/scylla/db/consistency_level.hh +++ /dev/null @@ -1,215 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "locator/network_topology_strategy.hh" -#include "db/consistency_level_type.hh" -#include "db/read_repair_decision.hh" -#include "exceptions/exceptions.hh" -#include "utils/fb_utilities.hh" -#include "gms/inet_address.hh" -#include "database.hh" - -#include -#include - -namespace db { - -extern logging::logger cl_logger; - -size_t quorum_for(keyspace& ks); - -size_t local_quorum_for(keyspace& ks, const sstring& dc); - -size_t block_for_local_serial(keyspace& ks); - -size_t block_for_each_quorum(keyspace& ks); - -size_t block_for(keyspace& ks, consistency_level cl); - -bool is_datacenter_local(consistency_level l); - -bool is_local(gms::inet_address endpoint); - -template -inline size_t count_local_endpoints(Range& live_endpoints) { - return std::count_if(live_endpoints.begin(), live_endpoints.end(), is_local); -} - -std::vector -filter_for_query(consistency_level cl, - keyspace& ks, - std::vector live_endpoints, - read_repair_decision read_repair, gms::inet_address* extra, column_family* cf); - -std::vector filter_for_query(consistency_level cl, keyspace& ks, std::vector& live_endpoints, column_family* cf); - -struct dc_node_count { - size_t live = 0; - size_t pending = 0; -}; - -template > -inline std::unordered_map count_per_dc_endpoints( - keyspace& ks, - Range& live_endpoints, - const PendingRange& pending_endpoints = std::array()) { - using namespace locator; - - auto& rs = ks.get_replication_strategy(); - auto& snitch_ptr = i_endpoint_snitch::get_local_snitch_ptr(); - - network_topology_strategy* nrs = - static_cast(&rs); - - std::unordered_map dc_endpoints; - for (auto& dc : nrs->get_datacenters()) { - dc_endpoints.emplace(dc, dc_node_count()); - } - - // - // Since live_endpoints are a subset of a get_natural_endpoints() output we - // will never get any endpoints outside the dataceters from - // nrs->get_datacenters(). - // - for (auto& endpoint : live_endpoints) { - ++(dc_endpoints[snitch_ptr->get_datacenter(endpoint)].live); - } - - for (auto& endpoint : pending_endpoints) { - ++(dc_endpoints[snitch_ptr->get_datacenter(endpoint)].pending); - } - - return dc_endpoints; -} - -bool -is_sufficient_live_nodes(consistency_level cl, - keyspace& ks, - const std::vector& live_endpoints); - -template -inline bool assure_sufficient_live_nodes_each_quorum( - consistency_level cl, - keyspace& ks, - Range& live_endpoints, - const PendingRange& pending_endpoints) { - using namespace locator; - - auto& rs = ks.get_replication_strategy(); - - if (rs.get_type() == replication_strategy_type::network_topology) { - for (auto& entry : count_per_dc_endpoints(ks, live_endpoints, pending_endpoints)) { - auto dc_block_for = local_quorum_for(ks, entry.first); - auto dc_live = entry.second.live; - auto dc_pending = entry.second.pending; - - if (dc_live < dc_block_for + dc_pending) { - throw exceptions::unavailable_exception(cl, dc_block_for, dc_live); - } - } - - return true; - } - - return false; -} - -template> -inline void assure_sufficient_live_nodes( - consistency_level cl, - keyspace& ks, - Range& live_endpoints, - const PendingRange& pending_endpoints = std::array()) { - size_t need = block_for(ks, cl); - - auto adjust_live_for_error = [] (size_t live, size_t pending) { - // DowngradingConsistencyRetryPolicy uses alive replicas count from Unavailable - // exception to adjust CL for retry. When pending node is present CL is increased - // by 1 internally, so reported number of live nodes has to be adjusted to take - // this into account - return pending <= live ? live - pending : 0; - }; - - switch (cl) { - case consistency_level::ANY: - // local hint is acceptable, and local node is always live - break; - case consistency_level::LOCAL_ONE: - if (count_local_endpoints(live_endpoints) < count_local_endpoints(pending_endpoints) + 1) { - throw exceptions::unavailable_exception(cl, 1, 0); - } - break; - case consistency_level::LOCAL_QUORUM: { - size_t local_live = count_local_endpoints(live_endpoints); - size_t pending = count_local_endpoints(pending_endpoints); - if (local_live < need + pending) { - cl_logger.debug("Local replicas {} are insufficient to satisfy LOCAL_QUORUM requirement of needed {} and pending {}", live_endpoints, local_live, pending); - throw exceptions::unavailable_exception(cl, need, adjust_live_for_error(local_live, pending)); - } - break; - } - case consistency_level::EACH_QUORUM: - if (assure_sufficient_live_nodes_each_quorum(cl, ks, live_endpoints, pending_endpoints)) { - break; - } - // Fallthough on purpose for SimpleStrategy - default: - size_t live = live_endpoints.size(); - size_t pending = pending_endpoints.size(); - if (live < need + pending) { - cl_logger.debug("Live nodes {} do not satisfy ConsistencyLevel ({} required, {} pending)", live, need, pending); - throw exceptions::unavailable_exception(cl, need, adjust_live_for_error(live, pending)); - } - break; - } -} - -void validate_for_read(const sstring& keyspace_name, consistency_level cl); - -void validate_for_write(const sstring& keyspace_name, consistency_level cl); - -bool is_serial_consistency(consistency_level cl); - -void validate_counter_for_write(schema_ptr s, consistency_level cl); - -} diff --git a/scylla/db/consistency_level_type.hh b/scylla/db/consistency_level_type.hh deleted file mode 100644 index 8badc87..0000000 --- a/scylla/db/consistency_level_type.hh +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include - -namespace db { - -enum class consistency_level { - ANY, - ONE, - TWO, - THREE, - QUORUM, - ALL, - LOCAL_QUORUM, - EACH_QUORUM, - SERIAL, - LOCAL_SERIAL, - LOCAL_ONE -}; - -std::ostream& operator<<(std::ostream& os, consistency_level cl); - -} diff --git a/scylla/db/cql_type_parser.cc b/scylla/db/cql_type_parser.cc deleted file mode 100644 index 46dd4ee..0000000 --- a/scylla/db/cql_type_parser.cc +++ /dev/null @@ -1,198 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Modified by ScyllaDB - * Copyright (C) 2017 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ -#include -#include -#include - -#include "database.hh" -#include "service/storage_proxy.hh" -#include "cql3/CqlParser.hpp" -#include "cql3/util.hh" -#include "cql_type_parser.hh" -#include "types.hh" - -static ::shared_ptr parse_raw(const sstring& str) { - return cql3::util::do_with_parser(str, std::mem_fn(&cql3_parser::CqlParser::comparatorType)); -} - -data_type db::cql_type_parser::parse(const sstring& keyspace, const sstring& str, lw_shared_ptr user_types) { - static const thread_local std::unordered_map> native_types = []{ - std::unordered_map> res; - for (auto& nt : cql3::cql3_type::values()) { - res.emplace(nt->to_string(), nt); - } - return res; - }(); - - auto i = native_types.find(str); - if (i != native_types.end()) { - return i->second->get_type(); - } - - if (!user_types && service::get_storage_proxy().local_is_initialized()) { - user_types = service::get_storage_proxy().local().get_db().local().find_keyspace(keyspace).metadata()->user_types(); - } - // special-case top-level UDTs - if (user_types) { - auto& map = user_types->get_all_types(); - auto i = map.find(utf8_type->decompose(str)); - if (i != map.end()) { - return i->second; - } - } - - auto raw = parse_raw(str); - auto cql = raw->prepare_internal(keyspace, user_types); - return cql->get_type(); -} - -class db::cql_type_parser::raw_builder::impl { -public: - impl(sstring ks_name) - : _ks_name(std::move(ks_name)) - {} - -// static shared_ptr get_instance(sstring keyspace, bytes name, std::vector field_names, std::vector field_types) { - - struct entry { - sstring name; - std::vector field_names; - std::vector<::shared_ptr> field_types; - - user_type prepare(const sstring& keyspace, lw_shared_ptr user_types) const { - std::vector fields; - fields.reserve(field_types.size()); - std::transform(field_types.begin(), field_types.end(), std::back_inserter(fields), [&](auto& r) { - return r->prepare_internal(keyspace, user_types)->get_type(); - }); - std::vector names; - names.reserve(field_names.size()); - std::transform(field_names.begin(), field_names.end(), std::back_inserter(names), [](const sstring& s) { - return to_bytes(s); - }); - - return user_type_impl::get_instance(keyspace, to_bytes(name), std::move(names), std::move(fields)); - } - - }; - - void add(sstring name, std::vector field_names, std::vector field_types) { - entry e{ std::move(name), std::move(field_names) }; - for (auto& t : field_types) { - e.field_types.emplace_back(parse_raw(t)); - } - _definitions.emplace_back(std::move(e)); - } - - // See cassandra Types.java - std::vector build() { - if (_definitions.empty()) { - return {}; - } - - /* - * build a DAG of UDT dependencies - */ - std::unordered_multimap adjacency; - for (auto& e1 : _definitions) { - for (auto& e2 : _definitions) { - if (&e1 != &e2 && std::any_of(e1.field_types.begin(), e1.field_types.end(), [&e2](auto& t) { return t->references_user_type(e2.name); })) { - adjacency.emplace(&e2, &e1); - } - } - } - /* - * resolve dependencies in topological order, using Kahn's algorithm - */ - std::unordered_map vertices; // map values are numbers of referenced types - for (auto&p : adjacency) { - vertices[p.second]++; - } - - std::deque resolvable_types; - for (auto& e : _definitions) { - if (!vertices.count(&e)) { - resolvable_types.emplace_back(&e); - } - } - - auto types = ::make_lw_shared(); - - while (!resolvable_types.empty()) { - auto* e = resolvable_types.front(); - auto r = adjacency.equal_range(e); - - while (r.first != r.second) { - auto* d = r.first->second; - if (--vertices[d] == 0) { - resolvable_types.push_back(d); - } - ++r.first; - } - - types->add_type(e->prepare(_ks_name, types)); - resolvable_types.pop_front(); - } - - if (types->get_all_types().size() != _definitions.size()) { - throw exceptions::configuration_exception(sprint("Cannot resolve UDTs for keyspace %s: some types are missing", _ks_name)); - } - - return boost::copy_range>(types->get_all_types() | boost::adaptors::map_values); - } -private: - sstring _ks_name; - std::vector _definitions; -}; - -db::cql_type_parser::raw_builder::raw_builder(sstring ks_name) - : _impl(std::make_unique(std::move(ks_name))) -{} - -db::cql_type_parser::raw_builder::~raw_builder() -{} - -void db::cql_type_parser::raw_builder::add(sstring name, std::vector field_names, std::vector field_types) { - _impl->add(std::move(name), std::move(field_names), std::move(field_types)); -} - -std::vector db::cql_type_parser::raw_builder::build() { - return _impl->build(); -} diff --git a/scylla/db/cql_type_parser.hh b/scylla/db/cql_type_parser.hh deleted file mode 100644 index 4b4a28b..0000000 --- a/scylla/db/cql_type_parser.hh +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Modified by ScyllaDB - * Copyright (C) 2017 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include -#include -#include - -#include "types.hh" - -class user_types_metadata; - -namespace db { -namespace cql_type_parser { - -data_type parse(const sstring& keyspace, const sstring& type, lw_shared_ptr = {}); - -class raw_builder { -public: - raw_builder(sstring ks_name); - ~raw_builder(); - - void add(sstring name, std::vector field_names, std::vector field_types); - std::vector build(); -private: - class impl; - std::unique_ptr - _impl; -}; - -} -} diff --git a/scylla/db/heat_load_balance.cc b/scylla/db/heat_load_balance.cc deleted file mode 100644 index ac16821..0000000 --- a/scylla/db/heat_load_balance.cc +++ /dev/null @@ -1,475 +0,0 @@ -/* - * Copyright (C) 2017 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ -#include -#include -#include -#include "heat_load_balance.hh" - -logging::logger hr_logger("heat_load_balance"); - -// Return a uniformly-distributed random number in [0,1) -// We use per-thread state for thread safety. We seed the random number generator -// once with a real random value, if available, -static thread_local std::random_device r; -static thread_local std::default_random_engine random_engine(r()); -float -rand_float() { - static thread_local std::uniform_real_distribution u(0, 1); - float ret = u(random_engine); - // Gcc 5 has a bug (fixed in Gcc 6) where the above random number - // generator could return 1.0, contradicting the documentation. Let's - // replace 1.0 by the largest number below it. It's not really important - // what we replace it with... Could have also chosen any arbitrary - // constant in [0,1), or to run the random number generator again (this - // is what the fix in Gcc 6 does). - if (ret == 1.0f) { - ret = std::nextafter(ret, 0.0f); - } - return ret; -} - -// randone() takes a vector of N probability, and randomly returns one of -// the indexes in this vector, with the probability to choose each index -// given by the probability in the vector. -// -// The given probabilities must sum up to 1.0. This assumption is not -// verified by randone(). -// -// TODO: -// This implementation has complexity O(N). If we plan to call randone() -// many times on the same probability vector, and if N can grow large, -// we should consider a different implementation, known as "The Alias Method", -// which has O(N) preperation stage but then only O(1) for each call. -// The alias method was first suggested by A.J. Walker in 1977 and later -// refined by Knuth and others. Here is a short overview of this method: -// The O(N) implementation of randone() divides the interval [0,1) into -// consecutive intervals of length p[i] (which sum to 1), then picks a random -// point in [0,1) and checks which of these intervals it covers. The -// observation behind the Alias Method is that the same technique will -// continue to work if we take these intervals and rearrange and/or cut them -// up, as long as we keep their total lengths. The goal would be to cut them -// up in such a way that it makes it easy (O(1)) to find which interval is -// underneath each point we pick on [0,1). -// To do that, we begin by dividing [0,1) to N intervals of equal length 1/N, -// and then packing in each of those at most two intervals belonging to -// different i’s. Now, to find to which i a point belongs to, all we need -// to do is to find in which of the equal-length interval it is (a trivial -// division and truncation), and then finding out which one of the two -// possibilities that are left holds (one array indexing and comparison). -// How do we pack the equal-length 1/N intervals correctly? We begin by -// putting in the first one a p[i] such that p[i] <= 1/N (always possible, -// of course). If the inequality was strict, so p[i] did not completely fill -// the first 1/N-length interval, we pick another p[j] where p[j] >= 1/N -// (again, possible), take away from it what is needed to fill up the -// 1/N-length interval, reducing p[j] for the rest of the algorithm. Now, -// we continue the same algorithm with one interval less and one less value, -// so it will end in O(N) time. -// For really large N (which we'll never need here...) there are even papers -// on how to ensure that the initialization stage is really O(N) and not -// O(NlogN) - see https://web.archive.org/web/20131029203736/http://web.eecs.utk.edu/~vose/Publications/random.pdf - -static unsigned -randone(const std::vector& p, float rnd = rand_float()) { - unsigned last = p.size() - 1; - for (unsigned i = 0; i < last; i++) { - rnd -= p[i]; - if (rnd < 0) { - return i; - } - } - // Note: if we're here and rnd isn't 0 (or very close to 0) then the - // p[i]s do not sum to 1... But we don't check this assumption here. - return last; -} - -// ssample() produces a random combination (i.e., unordered subset) of -// length K out of N items 0..N-1, where the different items should be -// included with different probabilities, given by a vector p of N -// probabilities, whose sum should be 1.0. -// It returns a vector with size K whose items are different integers -// between 0 and N-1. -// -// The meaning of a probability p[i] is that if we count the individual -// items appearing in returned combinations, the count of item i will be a -// fraction p[i] of the overall count. Note that p[i] must not be higher -// than 1/K: even if we return item i in *every* K-combination, item i will -// still be only 1/K of the produced items. To reach p[i] > 1/K will mean -// some combinations will need to contain more than one copy of i - which -// contradicts the defintion of a "combination". -// -// Though ssample() is required to fulfill the first-order inclusion -// probabilities p (the probability of each item appearing in the returned -// combination), it is NOT required to make any guarantees on the high-order -// inclusion probabilities, i.e., the probablities for pairs of items to -// be returned together in the same combination. This greatly simplifies -// the implementation, and means we can use the "Systematic Sampling" -// technique (explained below) which only makes guarantees on the first-order -// inclusion probablities. In our use case, fulfilling *only* the 1st order -// inclusion probabilities is indeed enough: We want that each node gets a -// given amount of work, but don't care if the different K nodes we choose -// in one request are correlated. -// -// Not making any guarantees on high-order inclusion probablities basically -// means that the items are not independent. To understand what this means, -// consider a simple example: say we have N=4 items with equal probability -// and want to draw random pairs (K=2). Our implementation will return {0,1} -// half of the time, and {2,3} the other half of the time. That distribution -// achieves and achieve the desired probabilities (each item will be given 1/4 -// of the work), but the pair {1,2}, for example, will never appear in any -// individual draw. -// -// "Systematic Sampling" is a very simple method of reproducing a set of -// desired 1st-order inclusion probabilities. A good overview can found in -// http://stats.stackexchange.com/questions/139279/systematic-sampling-with-unequal-probabilities -// Basically, Systematic Sampling is a simple extension of the randone() -// algorithm above. Both start by putting the given probabilities one after -// another on the segment [0,1). randone() then drew one random number in -// [0,1) and looked on which of the segments this point falls. Here, we draw -// a random number x in [0, 1/K), look under it, but then look under x+1/K, -// x+2/K, ..., x + (K-1)/K, and these produce K different items with -// appropriate probabilities: -// 1. The items are necessarily different because of our assumption that -// none of the p[i] are larger than 1/K), -// 2. The probability to choose each item is exactly p_i*K. -// -// ssample() only calls for one random number generation (this is important -// for performance) but calls randone() on the same probablity vector K times, -// which makes it even more interesting to implement the Alias Method -// described above. However, for very small N like 3, the difference is not -// likely to be noticable. -// -// TODO: For the special case of K == N-1, we can have a slightly more -// efficient implementation, which calculates the probability for each of -// the N combinations (the combination lacking item i can be proven to have -// probablity 1 - K*p[i]) and then uses one randone() call with these -// modified probablities. -// TODO: Consider making this a template of K, N and have specialized -// implementations for low N (e.g., 3), K=N-1, etc. -// TODO: write to a pre-allocated return vector to avoid extra allocation. - -std::vector -ssample(unsigned k, const std::vector& p) { - const float interval = 1.0 / k; - const float rnd = rand_float() * interval; // random number in [0, 1/k) - std::vector ret; - ret.reserve(k); - float offset = 0; - for (unsigned i = 0; i < k; i++) { - ret.emplace_back(randone(p, rnd + offset)); - offset += interval; - } - hr_logger.trace("ssample returning {}", ret); - return ret; -} - -// Given the cache hit rate (cache hits / request) of N different nodes, -// calculate the fraction of requests that we'd like to send of each of -// these nodes to achieve the same number of misses per second on all nodes -std::vector -miss_equalizing_probablities(const std::vector& hit_rates) { - std::vector ret; - ret.reserve(hit_rates.size()); - // R[i] is the reciprocal miss rate 1/(1-H[i]). - float r_sum = 0; - for (float h : hit_rates) { - float r = 1 / (1 - h); - ret.emplace_back(r); - r_sum += r; - } - for (float& r : ret) { - r /= r_sum; - } - return ret; -} - -// Given a set of desired probablities with sum 1, clip the probablities -// to be not higher than the given limit. The rest of the probabilities are -// increased, in an attempt to preserve the ratios between probabilities, -// if possible - but keep all the probabilities below the limit. -void -clip_probabilities(std::vector& p, float limit) { - // TODO: We have iterations here because it's possible that increasing - // one proability will bring it also over the limit. Can we find a - // single-step algorithm to do this? - float ratio = 1.0; - for (;;) { - float clipped = 0; - float sum_unclipped = 0; - for (float& x : p) { - if (x >= limit) { - clipped += x - limit; - x = limit; - } else { - x *= ratio; - sum_unclipped += x; - } - } - // "ratio" is how much we need to increase the unclipped - // probabilities - if (clipped == 0) { - return; // done - } - ratio = (sum_unclipped + clipped) / sum_unclipped; - } -} - -// Run the "probability redistribution" algorithm, which aims for the -// desired probability distribution of the nodes, but does as much work -// as we can (i.e., 1/k) locally and redistributing the rest. -// Returns the vector of proabilities that node "me" should use to send -// requests. -std::vector -redistribute(const std::vector& p, unsigned me, unsigned k) { - unsigned rf = p.size(); - std::vector pp(rf); - - // "Keep for node i" - // A surplus node keeps its entire desired amount of request, N*p, - // for itself. A mixed node is cut off by 1/C. - pp[me] = std::min(rf * p[me], 1.0f / k); - hr_logger.trace("pp[me({})] = {}", me, pp[me]); - - std::vector deficit(rf); - float total_deficit = 0; - int mixed_count = 0; - for (unsigned j = 0; j < rf; j++) { - float NPj = rf * p[j]; - float deficit_j = NPj - 1.0f / k; - if (deficit_j >= 0) { - // mixed node - mixed_count++; - deficit[j] = deficit_j; - total_deficit += deficit_j; - } - } - // Each of the mixed nodes have the same same surplus: - float mixed_surplus = 1 - 1.0f / k; - - hr_logger.trace("starting distribution of mixed-node surplus to other mixed nodes:" - " mixed_count={}, deficit={}, mixed_surplus={}", mixed_count, deficit, mixed_surplus); - - float my_surplus; - if (deficit[me] == 0) { - // surplus node - my_surplus = 1 - rf * p[me]; - } else { - // mixed node, which will be converted below to either a deficit - // node or a surplus node. We can easily calculate now how much - // surplus will be left. It will be useful to know below if "me" - // will be a surplus node, because we only need to know how much - // work "me" *sends*, so if me is not a surplus node, we won't need - // to do the second step (of distributing surplus to the deficit - // nodes), and won't even need to update deficit[]. - if (deficit[me] <= mixed_surplus) { - // Node will be converted to a surplus node - my_surplus = mixed_surplus - deficit[me]; - } else { - // Node will be converted to a deficit node, and will not be - // left with any surplus - my_surplus = 0; - } - } - hr_logger.trace("my_surplus={}", my_surplus); - - // Mixed node redistribution algorithm, to "convert" mixed nodes into - // pure surplus or pure deficit nodes, while flowing probability between - // the mixed nodes (we only need to track this flow here if "me" is the - // node doing the sending - in pp[]). - if (deficit[me]) { - // "me" is a mixed node. - hr_logger.trace("CASE1"); - // We need a list of the mixed nodes sorted in increasing deficit order. - // Actually, we only need to sort those nodes with deficit <= - // min(deficit[me], mixed_surplus). - // TODO: use NlgN sort instead of this ridiculous N^2 implementation. - // TODO: can we do this without a NlgN (although very small N, not even - // the full rf)? Note also the distribution code below is N^2 anway - // (two nested for loops). - std::list> sorted_deficits; - for (unsigned i = 0; i < rf; i++) { - if (deficit[i] && deficit[i] <= deficit[me] && - deficit[i] < mixed_surplus) { - auto it = sorted_deficits.begin(); - while (it != sorted_deficits.end() && it->second < deficit[i]) - ++it; - sorted_deficits.insert(it, std::make_pair(i, deficit[i])); - } - } - hr_logger.trace("sorted_deficits={}{}", sorted_deficits | boost::adaptors::map_keys, sorted_deficits | boost::adaptors::map_values); - float s = 0; - int count = mixed_count; - for (auto& d : sorted_deficits) { - hr_logger.trace("next sorted deficit={{}, {}}", d.first, d.second); - // What "diff" to distribute - auto diff = d.second - s; - s = d.second; - hr_logger.trace("diff={}, pp before={}, count={}", diff, pp, count); - --count; - // Distribute diff among all the mixed nodes with higher deficit. - // There should be exactly "count" of those excluding me. - if (!count) { - break; - } - for (unsigned i = 0; i < rf; i++) { - hr_logger.trace("{} {} {} {}", i, d.first, deficit[i], d.second); - // The ">=" here is ok: If several deficits are tied, the first one - // contributes the diff to all those nodes (all are equal, so >=), - // while when we get to the following nodes, they have diff==0 - // (because of the tied deficit) so we don't care that this loop - // doesn't quite match count nodes. - if (i != me && deficit[i] >= d.second) { - pp[i] += diff / count; - hr_logger.trace("pp[{}]={} (case a)", i, pp[i]); - } - } - - hr_logger.trace(" pp after1=", pp); - if (d.first == me) { - // We only care what "me" sends, and only the elements in - // the sorted list earlier than me could have forced it to - // send, so the rest of the algorithm isn't interesting. - break; - } - } - // additionally, if me is converted to a deficit node, we need to - // take the remaining surplus (mixed_surplus minus the last deficit - // in sorted_deficits) and distribute it to the other count-1 - // converted-to-surplus nodes. Of course we can only do this if - // count > 1 - if count==1, we remain with just one mixed node - // and cannot eliminate its surplus without "fixing" some of the - // decisions made earlier - if (deficit[me] > mixed_surplus) { - auto last_deficit = sorted_deficits.back().second; - auto diff = mixed_surplus - last_deficit; - if (count > 1) { - hr_logger.trace("CASE4. surplus {} count {}", diff, count); - for (unsigned i = 0; i < rf; i++) { - if (i != me && deficit[i] > last_deficit) { - hr_logger.trace("adding {} to pp[{}]={}", (diff / (count-1)), i, pp[i]); - pp[i] += diff / (count - 1); - } - } - // TODO: confirm that this loop worked exactly count - 1 times. - } else { - hr_logger.trace("CASE3a. surplus={}", diff); - // CASE3: count == 1 is possible. example for p = 0.2, 0.3, 0.5: - // surplus 0.5 0.5 0.5 - // deficit 0.1 0.4 1.0 - // after first step redistributing 0.1 to 3 nodes: - // surplus 0.4 0.4 0.4 - // deficit 0.0 0.3 0.9 - // after first step redistributing 0.3 to 2 nodes: - // surplus 0.4 0.1 0.1 - // deficit 0.0 0.0 0.6 - // So we're left with 1 mixed node (count=1), and can't - // redistribute its surplus to itself! - // This happens because the original distribution step was - // already a mistake: In this case the *only* solution is for node - // 0 and 1 is to send all their surplus (total of 1.0) to fill - // node 2's entire deficit (1.0). Node 0 can't afford to send - // any of its surplus to node 1 - and if it does (like we did in - // the first step redistributing 0.1), we end up with - // deficit remaining on node 2! - // - // Special case of one remaining mixed node. Tell the other - // nodes not to give each other as much (we don't have to - // do this here, as we only care about "me") and instead - // "me" will give them their surplus - for (unsigned i = 0; i < rf; i++) { - if (i != me) { - pp[i] += diff / (mixed_count - 1); - hr_logger.trace("pp[{}]={} (case b)", i, pp[i]); - } - } - } - hr_logger.trace(" pp after2={}", pp); - } else { - // Additionally, if the algorithm ends with a single mixed node - // we need to apply a fix. Above we already handled the case that - // this single mixed node is "me", so it needs to send more to the - // other nodes. Here we need to handle the opposite side - me is - // one of the nodes which sent too much to other nodes and needs - // to send to the mixed node instead. - // TODO: find a more efficient way to check if the alorithm will - // end with just one mixed node and its surplus :-( - unsigned n_converted_to_deficit = 0; - unsigned mix_i = 0; // only used if n_converted_to_deficit==1 - float last_deficit = 0; - for (unsigned i = 0; i < rf; i++) { - if (deficit[i] > mixed_surplus) { - n_converted_to_deficit++; - mix_i = i; - } else { - last_deficit = std::max(last_deficit, deficit[i]); - } - } - if (n_converted_to_deficit == 1) { - auto diff = mixed_surplus - last_deficit; - hr_logger.trace("CASE3b. surplus={}", diff); - pp[mix_i] += diff / (mixed_count - 1); - hr_logger.trace("pp[{}]={} (case c)", mix_i, pp[mix_i]); - for (unsigned i = 0; i < rf; i++) { - if (deficit[i] > 0) { // mixed node - if (i != mix_i && i != me) { - pp[i] -= diff / (mixed_count - 1) / (mixed_count - 2); - hr_logger.trace("pp[{}]={} (case d)", i, pp[i]); - } - } - } - } - } - } - - if (my_surplus) { - // "me" is a surplus node, or became one during the mixed node - // redistribution algorithm. We need to know the new deficit nodes - // produced by that algorithm. i.e., we need to update deficit[]. - float new_total_deficit = 0; - for (unsigned i = 0; i < rf; i++) { - if (deficit[i] > 0) { - // Mixed node. - if (deficit[i] > mixed_surplus) { - // The mixed-node redistribution algorithm converted it - // to a deficit node, with this deficit: - deficit[i] -= mixed_surplus; - new_total_deficit += deficit[i]; - } else { - // The mixed-node redistribution algorithm converted it - // to a surplus node, with no deficit: - deficit[i] = 0; - } - } - } - // Split "me"'s surplus to the other nodes' remaining deficit, - // according to their share in the total remaining deficit. - for (unsigned j = 0; j < rf ; j++) { - if (deficit[j] > 0) { - // note j!= me because surplus node has deficit==0. - // Note pp[j] +=, not =, because this node might have - // already flowed some work to other nodes in the - // mixed node redistribution algorithm above. - pp[j] += deficit[j] / new_total_deficit * my_surplus; - hr_logger.trace("pp[{}]={} (case e)", j, pp[j]); - } - } - } - return pp; -} diff --git a/scylla/db/heat_load_balance.hh b/scylla/db/heat_load_balance.hh deleted file mode 100644 index cc13867..0000000 --- a/scylla/db/heat_load_balance.hh +++ /dev/null @@ -1,154 +0,0 @@ -/* - * Copyright (C) 2017 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -/* - * Given a vector of cache hit ratio (hits per request) for each of N nodes, - * and knowing which of them is the current node, our goal is to return a - * random vector of K different nodes (i.e., a combination of K out of N), - * where the goals of the random distribution are: - * - * 1. If we send each request to the K returned nodes, the *misses per - * second* of all nodes will be the same. In other words, nodes with - * low hit ratios will be sent less work. - * - * 2. We know that this node is one of the N nodes. As much as possible, - * without breaking goal 1, we should return this node as one of the - * results. - * - * 3. We assume that *this node* got chosen uniformly randomly among the - * N nodes (in other words, the client chose a coordinator node, us, - * uniformly, and we need to choose K nodes and forward the request - * to them). - */ -#include -#include -#include -#include "log.hh" - -extern logging::logger hr_logger; - -class rand_exception {}; - -float rand_float(); -std::vector ssample(unsigned k, const std::vector& p); -std::vector miss_equalizing_probablities(const std::vector& hit_rates); -void clip_probabilities(std::vector& p, float limit); -std::vector redistribute(const std::vector& p, unsigned me, unsigned k); - - -template -class combination_generator { -private: - std::vector _pp; - std::vector _nodes; - unsigned _k; - // If "extra" is true, in addition to the regular k nodes returned by - // get(), it returns one extra node which the caller should use if one - // of the nodes returned does not answer. - // The "extra" is guaranteed to be different from any of the regular nodes, - // but does not participate in the probability calculation and we do - // not make a guarantee how it will be distributed (it will in fact - // be uniformly distributed over the remaining nodes). - // In particular, the caller should only use the extra node in - // exceptional situations. If the caller always plans to send a request - // to one additional node up-front, it should use a combination_generator - // of k+1 - and extra=false. - bool _extra; -public: - combination_generator(std::vector&& pp, std::vector&& nodes, unsigned k, bool extra) - : _pp(std::move(pp)), _nodes(std::move(nodes)), _k(k), _extra(extra) { - // TODO: throw if _pp.size() != _nodes.size() or not 1 <= k < _pp.size() - } - std::vector get() { - auto n = _pp.size(); - auto ke = _k + (_extra ? 1 : 0); - assert(ke <= n); - std::vector ret; - ret.reserve(ke); - std::vector r = ssample(_k, _pp); - for (int i : r) { - ret.push_back(_nodes[i]); - } - if (_extra) { - // Choose one of the remaining n-k nodes as the extra (k+1)th - // returned node. Currently, we choose the nodes with equal - // probablities. We could have also used _pp or the original p - // for this - I don't know which is better, if it even matters. - std::vector used(n); - for (int i : r) { - used[i] = true; - } - int m = ::rand_float() * (n - _k); - for (unsigned i = 0; i < n; i++) { - if (!used[i]) { - if (!m) { - ret.push_back(_nodes[i]); - break; - } - --m; - } - } - } - assert(ret.size() == ke); - return ret; - } -}; - - -template -std::vector -miss_equalizing_combination( - const std::vector>& node_hit_rate, unsigned me, int bf, bool extra=false) -{ - auto rf = node_hit_rate.size(); - - // FIXME: don't take std::pair but separate vectors - std::vector hit_rates; - hit_rates.reserve(rf); - for (auto& nh : node_hit_rate) { - hit_rates.emplace_back(nh.second); - } - auto p = miss_equalizing_probablities(hit_rates); - // When we'll ask for combinations of "bf" different nodes, probabilities - // higher than 1/bf cannot be achieved (1/bf itsef can be achieved by - // returning this node in every returned combination). So no matter what - // we do, we can't actually achieve the desired probabilities. Let's - // try for the best we can - clip_probabilities(p, 1.0f / bf); - - - hr_logger.trace("desired probabilities: {}, {}", node_hit_rate | boost::adaptors::map_keys, p); - - // If me >= rf, this node is NOT one of the replicas, and we just need - // to use the probabilties for these replicas, without doing the - // redistribution to prefer the local replica. - if (me < rf) { - p = redistribute(p, me, bf); - } - - hr_logger.trace("returned _pp={}", p); - std::vector nodes(rf); - for (unsigned i = 0; i < rf; i++) { - nodes[i] = node_hit_rate[i].first; - } - return combination_generator(std::move(p), std::move(nodes), bf, extra).get(); -} - diff --git a/scylla/db/index/secondary_index.cc b/scylla/db/index/secondary_index.cc deleted file mode 100644 index 0435633..0000000 --- a/scylla/db/index/secondary_index.cc +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "secondary_index.hh" - -const sstring db::index::secondary_index::custom_index_option_name = "class_name"; -const sstring db::index::secondary_index::index_keys_option_name = "index_keys"; -const sstring db::index::secondary_index::index_values_option_name = "index_values"; -const sstring db::index::secondary_index::index_entries_option_name = "index_keys_and_values"; - diff --git a/scylla/db/index/secondary_index.hh b/scylla/db/index/secondary_index.hh deleted file mode 100644 index 43fdcb2..0000000 --- a/scylla/db/index/secondary_index.hh +++ /dev/null @@ -1,389 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "core/sstring.hh" -#include "seastarx.hh" - -namespace db { -namespace index { - -/** - * Abstract base class for different types of secondary indexes. - * - * Do not extend this directly, please pick from PerColumnSecondaryIndex or PerRowSecondaryIndex - */ -class secondary_index { -public: - static const sstring custom_index_option_name; - - /** - * The name of the option used to specify that the index is on the collection keys. - */ - static const sstring index_keys_option_name; - - /** - * The name of the option used to specify that the index is on the collection values. - */ - static const sstring index_values_option_name; - - /** - * The name of the option used to specify that the index is on the collection (map) entries. - */ - static const sstring index_entries_option_name; - -#if 0 // TODO: - - public static final AbstractType keyComparator = StorageService.getPartitioner().preservesOrder() - ? BytesType.instance - : new LocalByPartionerType(StorageService.getPartitioner()); - - /** - * Base CF that has many indexes - */ - protected ColumnFamilyStore baseCfs; - - - /** - * The column definitions which this index is responsible for - */ - protected final Set columnDefs = Collections.newSetFromMap(new ConcurrentHashMap()); - - /** - * Perform any initialization work - */ - public abstract void init(); - - /** - * Reload an existing index following a change to its configuration, - * or that of the indexed column(s). Differs from init() in that we expect - * expect new resources (such as CFS for a KEYS index) to be created by - * init() but not here - */ - public abstract void reload(); - - /** - * Validates the index_options passed in the ColumnDef - * @throws ConfigurationException - */ - public abstract void validateOptions() throws ConfigurationException; - - /** - * @return The name of the index - */ - abstract public String getIndexName(); - - /** - * All internal 2ndary indexes will return "_internal_" for this. Custom - * 2ndary indexes will return their class name. This only matter for - * SecondaryIndexManager.groupByIndexType. - */ - String indexTypeForGrouping() - { - // Our internal indexes overwrite this - return getClass().getCanonicalName(); - } - - /** - * Return the unique name for this index and column - * to be stored in the SystemKeyspace that tracks if each column is built - * - * @param columnName the name of the column - * @return the unique name - */ - abstract public String getNameForSystemKeyspace(ByteBuffer columnName); - - /** - * Checks if the index for specified column is fully built - * - * @param columnName the column - * @return true if the index is fully built - */ - public boolean isIndexBuilt(ByteBuffer columnName) - { - return SystemKeyspace.isIndexBuilt(baseCfs.keyspace.getName(), getNameForSystemKeyspace(columnName)); - } - - public void setIndexBuilt() - { - for (ColumnDefinition columnDef : columnDefs) - SystemKeyspace.setIndexBuilt(baseCfs.keyspace.getName(), getNameForSystemKeyspace(columnDef.name.bytes)); - } - - public void setIndexRemoved() - { - for (ColumnDefinition columnDef : columnDefs) - SystemKeyspace.setIndexRemoved(baseCfs.keyspace.getName(), getNameForSystemKeyspace(columnDef.name.bytes)); - } - - /** - * Called at query time - * Creates a implementation specific searcher instance for this index type - * @param columns the list of columns which belong to this index type - * @return the secondary index search impl - */ - protected abstract SecondaryIndexSearcher createSecondaryIndexSearcher(Set columns); - - /** - * Forces this indexes' in memory data to disk - */ - public abstract void forceBlockingFlush(); - - /** - * Allow access to the underlying column family store if there is one - * @return the underlying column family store or null - */ - public abstract ColumnFamilyStore getIndexCfs(); - - - /** - * Delete all files and references to this index - * @param columnName the indexed column to remove - */ - public abstract void removeIndex(ByteBuffer columnName); - - /** - * Remove the index and unregisters this index's mbean if one exists - */ - public abstract void invalidate(); - - /** - * Truncate all the data from the current index - * - * @param truncatedAt The truncation timestamp, all data before that timestamp should be rejected. - */ - public abstract void truncateBlocking(long truncatedAt); - - /** - * Builds the index using the data in the underlying CFS - * Blocks till it's complete - */ - protected void buildIndexBlocking() - { - logger.info(String.format("Submitting index build of %s for data in %s", - getIndexName(), StringUtils.join(baseCfs.getSSTables(), ", "))); - - try (Refs sstables = baseCfs.selectAndReference(ColumnFamilyStore.CANONICAL_SSTABLES).refs) - { - SecondaryIndexBuilder builder = new SecondaryIndexBuilder(baseCfs, - Collections.singleton(getIndexName()), - new ReducingKeyIterator(sstables)); - Future future = CompactionManager.instance.submitIndexBuild(builder); - FBUtilities.waitOnFuture(future); - forceBlockingFlush(); - setIndexBuilt(); - } - logger.info("Index build of {} complete", getIndexName()); - } - - - /** - * Builds the index using the data in the underlying CF, non blocking - * - * - * @return A future object which the caller can block on (optional) - */ - public Future buildIndexAsync() - { - // if we're just linking in the index to indexedColumns on an already-built index post-restart, we're done - boolean allAreBuilt = true; - for (ColumnDefinition cdef : columnDefs) - { - if (!SystemKeyspace.isIndexBuilt(baseCfs.keyspace.getName(), getNameForSystemKeyspace(cdef.name.bytes))) - { - allAreBuilt = false; - break; - } - } - - if (allAreBuilt) - return null; - - // build it asynchronously; addIndex gets called by CFS open and schema update, neither of which - // we want to block for a long period. (actual build is serialized on CompactionManager.) - Runnable runnable = new Runnable() - { - public void run() - { - baseCfs.forceBlockingFlush(); - buildIndexBlocking(); - } - }; - FutureTask f = new FutureTask(runnable, null); - - new Thread(f, "Creating index: " + getIndexName()).start(); - return f; - } - - public ColumnFamilyStore getBaseCfs() - { - return baseCfs; - } - - private void setBaseCfs(ColumnFamilyStore baseCfs) - { - this.baseCfs = baseCfs; - } - - public Set getColumnDefs() - { - return columnDefs; - } - - void addColumnDef(ColumnDefinition columnDef) - { - columnDefs.add(columnDef); - } - - void removeColumnDef(ByteBuffer name) - { - Iterator it = columnDefs.iterator(); - while (it.hasNext()) - { - if (it.next().name.bytes.equals(name)) - it.remove(); - } - } - - /** Returns true if the index supports lookups for the given operator, false otherwise. */ - public boolean supportsOperator(Operator operator) - { - return operator == Operator.EQ; - } - - /** - * Returns the decoratedKey for a column value. Assumes an index CFS is present. - * @param value column value - * @return decorated key - */ - public DecoratedKey getIndexKeyFor(ByteBuffer value) - { - return getIndexCfs().partitioner.decorateKey(value); - } - - /** - * Returns true if the provided cell name is indexed by this secondary index. - */ - public abstract boolean indexes(CellName name); - - /** - * This is the primary way to create a secondary index instance for a CF column. - * It will validate the index_options before initializing. - * - * @param baseCfs the source of data for the Index - * @param cdef the meta information about this column (index_type, index_options, name, etc...) - * - * @return The secondary index instance for this column - * @throws ConfigurationException - */ - public static SecondaryIndex createInstance(ColumnFamilyStore baseCfs, ColumnDefinition cdef) throws ConfigurationException - { - SecondaryIndex index; - - switch (cdef.getIndexType()) - { - case KEYS: - index = new KeysIndex(); - break; - case COMPOSITES: - index = CompositesIndex.create(cdef); - break; - case CUSTOM: - assert cdef.getIndexOptions() != null; - String class_name = cdef.getIndexOptions().get(CUSTOM_INDEX_OPTION_NAME); - assert class_name != null; - try - { - index = (SecondaryIndex) Class.forName(class_name).newInstance(); - } - catch (Exception e) - { - throw new RuntimeException(e); - } - break; - default: - throw new RuntimeException("Unknown index type: " + cdef.getIndexName()); - } - - index.addColumnDef(cdef); - index.validateOptions(); - index.setBaseCfs(baseCfs); - - return index; - } - - public abstract boolean validate(ByteBuffer rowKey, Cell cell); - - public abstract long estimateResultRows(); - - /** - * Returns the index comparator for index backed by CFS, or null. - * - * Note: it would be cleaner to have this be a member method. However we need this when opening indexes - * sstables, but by then the CFS won't be fully initiated, so the SecondaryIndex object won't be accessible. - */ - public static CellNameType getIndexComparator(CFMetaData baseMetadata, ColumnDefinition cdef) - { - switch (cdef.getIndexType()) - { - case KEYS: - return new SimpleDenseCellNameType(keyComparator); - case COMPOSITES: - return CompositesIndex.getIndexComparator(baseMetadata, cdef); - case CUSTOM: - return null; - } - throw new AssertionError(); - } - - @Override - public String toString() - { - return Objects.toStringHelper(this).add("columnDefs", columnDefs).toString(); - } - -#endif - -}; - -} -} diff --git a/scylla/db/legacy_schema_migrator.cc b/scylla/db/legacy_schema_migrator.cc deleted file mode 100644 index 584658c..0000000 --- a/scylla/db/legacy_schema_migrator.cc +++ /dev/null @@ -1,644 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Modified by ScyllaDB - * Copyright (C) 2017 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include -#include -#include -#include -#include -#include - -#include "database.hh" -#include "legacy_schema_migrator.hh" -#include "system_keyspace.hh" -#include "schema_tables.hh" -#include "schema_builder.hh" -#include "json.hh" -#include "cql3/query_processor.hh" -#include "cql3/util.hh" -#include "utils/joinpoint.hh" - -static seastar::logger mlogger("legacy_schema_migrator"); - -namespace db { -namespace legacy_schema_migrator { - -// local data carriers - -class migrator { -public: - static const std::unordered_set legacy_schema_tables; - - migrator(sharded& sp, cql3::query_processor& qp) - : _sp(sp), _qp(qp) { - } - migrator(migrator&&) = default; - - typedef db_clock::time_point time_point; - - // TODO: we dont't support triggers. - // this is a placeholder. - struct trigger { - time_point timestamp; - sstring name; - std::unordered_map options; - }; - - struct table { - time_point timestamp; - schema_ptr metadata; - std::vector triggers; - }; - - struct type { - time_point timestamp; - user_type metadata; - }; - - struct function { - time_point timestamp; - sstring ks_name; - sstring fn_name; - std::vector arg_names; - std::vector arg_types; - sstring return_type; - bool called_on_null_input; - sstring language; - sstring body; - }; - - struct aggregate { - time_point timestamp; - sstring ks_name; - sstring fn_name; - std::vector arg_names; - std::vector arg_types; - sstring return_type; - sstring final_func; - sstring initcond; - sstring state_func; - sstring state_type; - }; - - struct keyspace { - time_point timestamp; - sstring name; - bool durable_writes; - std::map replication_params; - - std::vector
tables; - std::vector types; - std::vector functions; - std::vector aggregates; - }; - - class unsupported_feature : public std::runtime_error { - public: - using runtime_error::runtime_error; - }; - - static sstring fmt_query(const char* fmt, const char* table) { - return sprint(fmt, db::system_keyspace::NAME, table); - } - - typedef ::shared_ptr result_set_type; - typedef const cql3::untyped_result_set::row row_type; - - future<> read_table(keyspace& dst, sstring cf_name, time_point timestamp) { - auto fmt = "SELECT * FROM %s.%s WHERE keyspace_name = ? AND columnfamily_name = ?"; - auto tq = fmt_query(fmt, db::system_keyspace::legacy::COLUMNFAMILIES); - auto cq = fmt_query(fmt, db::system_keyspace::legacy::COLUMNS); - auto zq = fmt_query(fmt, db::system_keyspace::legacy::TRIGGERS); - - typedef std::tuple, future, future, future> result_tuple; - - return when_all(_qp.execute_internal(tq, { dst.name, cf_name }), - _qp.execute_internal(cq, { dst.name, cf_name }), - _qp.execute_internal(zq, { dst.name, cf_name }), - db::schema_tables::legacy::read_table_mutations(_sp, dst.name, cf_name, db::system_keyspace::legacy::column_families())) - .then([this, &dst, cf_name, timestamp](result_tuple&& t) { - - result_set_type tables = std::get<0>(t).get0(); - result_set_type columns = std::get<1>(t).get0(); - result_set_type triggers = std::get<2>(t).get0(); - db::schema_tables::legacy::schema_mutations sm = std::get<3>(t).get0(); - - row_type& td = tables->one(); - - auto ks_name = td.get_as("keyspace_name"); - auto cf_name = td.get_as("columnfamily_name"); - auto id = td.get_or("cf_id", generate_legacy_id(ks_name, cf_name)); - - schema_builder builder(dst.name, cf_name, id); - - builder.with_version(sm.digest()); - - cf_type cf = sstring_to_cf_type(td.get_or("type", sstring("standard"))); - if (cf == cf_type::super) { - fail(unimplemented::cause::SUPER); - } - - auto comparator = td.get_as("comparator"); - bool is_compound = cell_comparator::check_compound(comparator); - builder.set_is_compound(is_compound); - cell_comparator::read_collections(builder, comparator); - - bool filter_sparse = false; - - data_type default_validator = {}; - if (td.has("default_validator")) { - default_validator = db::schema_tables::parse_type(td.get_as("default_validator")); - if (default_validator->is_counter()) { - builder.set_is_counter(true); - } - builder.set_default_validation_class(default_validator); - } - - /* - * Determine whether or not the table is *really* dense - * We cannot trust is_dense value of true (see CASSANDRA-11502, that fixed the issue for 2.2 only, and not retroactively), - * but we can trust is_dense value of false. - */ - auto is_dense = td.get_opt("is_dense"); - if (!is_dense || *is_dense) { - is_dense = [&] { - /* - * As said above, this method is only here because we need to deal with thrift upgrades. - * Once a CF has been "upgraded", i.e. we've rebuilt and save its CQL3 metadata at least once, - * then we'll have saved the "is_dense" value and will be good to go. - * - * But non-upgraded thrift CF (and pre-7744 CF) will have no value for "is_dense", so we need - * to infer that information without relying on it in that case. And for the most part this is - * easy, a CF that has at least one REGULAR definition is not dense. But the subtlety is that not - * having a REGULAR definition may not mean dense because of CQL3 definitions that have only the - * PRIMARY KEY defined. - * - * So we need to recognize those special case CQL3 table with only a primary key. If we have some - * clustering columns, we're fine as said above. So the only problem is that we cannot decide for - * sure if a CF without REGULAR columns nor CLUSTERING_COLUMN definition is meant to be dense, or if it - * has been created in CQL3 by say: - * CREATE TABLE test (k int PRIMARY KEY) - * in which case it should not be dense. However, we can limit our margin of error by assuming we are - * in the latter case only if the comparator is exactly CompositeType(UTF8Type). - */ - stdx::optional max_cl_idx; - const cql3::untyped_result_set::row * regular = nullptr; - for (auto& row : *columns) { - auto kind_str = row.get_as("type"); - if (kind_str == "compact_value") { - continue; - } - - auto kind = db::schema_tables::deserialize_kind(kind_str); - - if (kind == column_kind::regular_column) { - if (regular != nullptr) { - return false; - } - regular = &row; - continue; - } - if (kind == column_kind::clustering_key) { - max_cl_idx = std::max(column_id(row.get_or("component_index", 0)), max_cl_idx.value_or(column_id())); - } - } - - auto is_cql3_only_pk_comparator = [](const sstring& comparator) { - if (!cell_comparator::check_compound(comparator)) { - return false; - } - // CMH. We don't have composites, nor a parser for it. This is a simple way of c - // checking the same. - auto comma = comparator.find(','); - if (comma != sstring::npos) { - return false; - } - auto off = comparator.find('('); - auto end = comparator.find(')'); - - return comparator.compare(off, end - off, utf8_type->name()) == 0; - }; - - if (max_cl_idx) { - auto n = std::count(comparator.begin(), comparator.end(), ','); // num comp - 1 - return *max_cl_idx == n; - } - - if (regular) { - return false; - } - - return !is_cql3_only_pk_comparator(comparator); - - }(); - - // now, if switched to sparse, remove redundant compact_value column and the last clustering column, - // directly copying CASSANDRA-11502 logic. See CASSANDRA-11315. - - filter_sparse = !*is_dense; - } - builder.set_is_dense(*is_dense); - - auto is_cql = !*is_dense && is_compound; - auto is_static_compact = !*is_dense && !is_compound; - - // org.apache.cassandra.schema.LegacySchemaMigrator#isEmptyCompactValueColumn - auto is_empty_compact_value = [](const cql3::untyped_result_set::row& column_row) { - auto kind_str = column_row.get_as("type"); - // Cassandra only checks for "compact_value", but Scylla generates "regular" instead (#2586) - return (kind_str == "compact_value" || kind_str == "regular") - && column_row.get_as("column_name").empty(); - }; - - for (auto& row : *columns) { - auto kind_str = row.get_as("type"); - auto kind = db::schema_tables::deserialize_kind(kind_str); - auto component_index = kind > column_kind::clustering_key ? 0 : column_id(row.get_or("component_index", 0)); - auto name = row.get_or("column_name", sstring()); - auto validator = db::schema_tables::parse_type(row.get_as("validator")); - - if (is_empty_compact_value(row)) { - continue; - } - - if (filter_sparse) { - if (kind_str == "compact_value") { - continue; - } - if (kind == column_kind::clustering_key) { - if (cf == cf_type::super && component_index != 0) { - continue; - } - if (cf != cf_type::super && !is_compound) { - continue; - } - } - } - - stdx::optional index_kind; - sstring index_name; - index_options_map options; - if (row.has("index_type")) { - index_kind = schema_tables::deserialize_index_kind(row.get_as("index_type")); - } - if (row.has("index_name")) { - index_name = row.get_as("index_name"); - } - if (row.has("index_options")) { - options = json::to_map(row.get_as("index_options"), index_options_map()); - sstring type; - auto i = options.find("index_keys"); - if (i != options.end()) { - options.erase(i); - type = "KEYS"; - } - i = options.find("index_keys_and_values"); - if (i != options.end()) { - options.erase(i); - type = "KEYS_AND_VALUES"; - } - if (type.empty()) { - if (validator->is_collection() && validator->is_multi_cell()) { - type = "FULL"; - } else { - type = "VALUES"; - } - } - auto column = cql3::util::maybe_quote(name); - options["target"] = validator->is_collection() - ? type + "(" + column + ")" - : column; - } - if (index_kind) { - // Origin assumes index_name is always set, so let's do the same - builder.with_index(index_metadata(index_name, options, *index_kind)); - } - - data_type column_name_type = [&] { - if (is_static_compact && kind == column_kind::regular_column) { - return db::schema_tables::parse_type(comparator); - } - return utf8_type; - }(); - auto column_name = [&] { - try { - return column_name_type->from_string(name); - } catch (marshal_exception) { - // #2597: Scylla < 2.0 writes names in serialized form, try to recover - column_name_type->validate(to_bytes_view(name)); - return to_bytes(name); - } - }(); - builder.with_column(std::move(column_name), std::move(validator), kind, component_index); - } - - if (is_static_compact) { - builder.set_regular_column_name_type(db::schema_tables::parse_type(comparator)); - } - - if (td.has("read_repair_chance")) { - builder.set_read_repair_chance(td.get_as("read_repair_chance")); - } - if (td.has("local_read_repair_chance")) { - builder.set_dc_local_read_repair_chance(td.get_as("local_read_repair_chance")); - } - if (td.has("gc_grace_seconds")) { - builder.set_gc_grace_seconds(td.get_as("gc_grace_seconds")); - } - if (td.has("min_compaction_threshold")) { - builder.set_min_compaction_threshold(td.get_as("min_compaction_threshold")); - } - if (td.has("max_compaction_threshold")) { - builder.set_max_compaction_threshold(td.get_as("max_compaction_threshold")); - } - if (td.has("comment")) { - builder.set_comment(td.get_as("comment")); - } - if (td.has("memtable_flush_period_in_ms")) { - builder.set_memtable_flush_period(td.get_as("memtable_flush_period_in_ms")); - } - if (td.has("caching")) { - builder.set_caching_options(caching_options::from_sstring(td.get_as("caching"))); - } - if (td.has("default_time_to_live")) { - builder.set_default_time_to_live(gc_clock::duration(td.get_as("default_time_to_live"))); - } - if (td.has("speculative_retry")) { - builder.set_speculative_retry(td.get_as("speculative_retry")); - } - if (td.has("compaction_strategy_class")) { - auto strategy = td.get_as("compaction_strategy_class"); - try { - builder.set_compaction_strategy(sstables::compaction_strategy::type(strategy)); - } catch (const exceptions::configuration_exception& e) { - // If compaction strategy class isn't supported, fallback to size tiered. - mlogger.warn("Falling back to size-tiered compaction strategy after the problem: {}", e.what()); - builder.set_compaction_strategy(sstables::compaction_strategy_type::size_tiered); - } - } - if (td.has("compaction_strategy_options")) { - builder.set_compaction_strategy_options(json::to_map(td.get_as("compaction_strategy_options"))); - } - auto comp_param = td.get_as("compression_parameters"); - compression_parameters cp(json::to_map(comp_param)); - builder.set_compressor_params(cp); - - if (td.has("min_index_interval")) { - builder.set_min_index_interval(td.get_as("min_index_interval")); - } else if (td.has("index_interval")) { // compatibility - builder.set_min_index_interval(td.get_as("index_interval")); - } - if (td.has("max_index_interval")) { - builder.set_max_index_interval(td.get_as("max_index_interval")); - } - if (td.has("bloom_filter_fp_chance")) { - builder.set_bloom_filter_fp_chance(td.get_as("bloom_filter_fp_chance")); - } else { - builder.set_bloom_filter_fp_chance(builder.get_bloom_filter_fp_chance()); - } - if (td.has("dropped_columns")) { - auto map = td.get_map("dropped_columns"); - for (auto&& e : map) { - builder.without_column(e.first, api::timestamp_type(e.second)); - }; - } - - // ignore version. we're transient - if (!triggers->empty()) { - throw unsupported_feature("triggers"); - } - - dst.tables.emplace_back(table{timestamp, builder.build() }); - }); - } - - future<> read_tables(keyspace& dst) { - auto query = fmt_query("SELECT columnfamily_name, writeTime(type) AS timestamp FROM %s.%s WHERE keyspace_name = ?", - db::system_keyspace::legacy::COLUMNFAMILIES); - return _qp.execute_internal(query, {dst.name}).then([this, &dst](result_set_type result) { - return parallel_for_each(*result, [this, &dst](row_type& row) { - return read_table(dst, row.get_as("columnfamily_name"), row.get_as("timestamp")); - }).finally([result] {}); - }); - } - - future read_type_timestamp(keyspace& dst, sstring type_name) { - // TODO: Unfortunately there is not a single REGULAR column in system.schema_usertypes, so annoyingly we cannot - // use the writeTime() CQL function, and must resort to a lower level. - // Origin digs up the actual cells of target partition and gets timestamp from there. - // We should do the same, but g-dam thats messy. Lets give back dung value for now. - return make_ready_future(dst.timestamp); - } - - future<> read_types(keyspace& dst) { - auto query = fmt_query("SELECT * FROM %s.%s WHERE keyspace_name = ?", db::system_keyspace::legacy::USERTYPES); - return _qp.execute_internal(query, {dst.name}).then([this, &dst](result_set_type result) { - return parallel_for_each(*result, [this, &dst](row_type& row) { - auto name = row.get_blob("type_name"); - auto columns = row.get_list("field_names"); - auto types = row.get_list("field_types"); - std::vector field_types; - for (auto&& value : types) { - field_types.emplace_back(db::schema_tables::parse_type(value)); - } - auto ut = user_type_impl::get_instance(dst.name, name, columns, field_types); - return read_type_timestamp(dst, value_cast(utf8_type->deserialize(name))).then([ut = std::move(ut), &dst](time_point timestamp) { - dst.types.emplace_back(type{timestamp, ut}); - }); - }).finally([result] {}); - }); - } - - future<> read_functions(keyspace& dst) { - auto query = fmt_query("SELECT * FROM %s.%s WHERE keyspace_name = ?", db::system_keyspace::legacy::FUNCTIONS); - return _qp.execute_internal(query, {dst.name}).then([this, &dst](result_set_type result) { - if (!result->empty()) { - throw unsupported_feature("functions"); - } - }); - } - - future<> read_aggregates(keyspace& dst) { - auto query = fmt_query("SELECT * FROM %s.%s WHERE keyspace_name = ?", db::system_keyspace::legacy::AGGREGATES); - return _qp.execute_internal(query, {dst.name}).then([this, &dst](result_set_type result) { - if (!result->empty()) { - throw unsupported_feature("aggregates"); - } - }); - } - - future read_keyspace(sstring ks_name, bool durable_writes, sstring strategy_class, sstring strategy_options, time_point timestamp) { - auto map = json::to_map(strategy_options); - map.emplace("class", std::move(strategy_class)); - auto ks = ::make_lw_shared(keyspace{timestamp, std::move(ks_name), durable_writes, std::move(map) }); - - return read_tables(*ks).then([this, ks] { - //Collection types = readTypes(keyspaceName); - return read_types(*ks); - }).then([this, ks] { - return read_functions(*ks); - }).then([this, ks] { - return read_aggregates(*ks); - }).then([this, ks] { - return make_ready_future(std::move(*ks)); - }); - } - - future<> read_all_keyspaces() { - static auto ks_filter = [](row_type& row) { - auto ks_name = row.get_as("keyspace_name"); - return ks_name != db::system_keyspace::NAME && ks_name != db::schema_tables::v3::NAME; - }; - - auto query = fmt_query("SELECT keyspace_name, durable_writes, strategy_options, strategy_class, writeTime(durable_writes) AS timestamp FROM %s.%s", - db::system_keyspace::legacy::KEYSPACES); - - return _qp.execute_internal(query).then([this](result_set_type result) { - auto i = boost::make_filter_iterator(ks_filter, result->begin(), result->end()); - auto e = boost::make_filter_iterator(ks_filter, result->end(), result->end()); - return parallel_for_each(i, e, [this](row_type& row) { - return read_keyspace(row.get_as("keyspace_name") - , row.get_as("durable_writes") - , row.get_as("strategy_class") - , row.get_as("strategy_options") - , row.get_as("timestamp") - ).then([this](keyspace ks) { - _keyspaces.emplace_back(std::move(ks)); - }); - }).finally([result] {}); - }); - } - - future<> drop_legacy_tables() { - mlogger.info("Dropping legacy schema tables"); - return parallel_for_each(legacy_schema_tables, [this](const sstring& cfname) { - return do_with(utils::make_joinpoint([] { return db_clock::now();}),[this, cfname](auto& tsf) { - auto with_snapshot = !_keyspaces.empty(); - return _qp.db().invoke_on_all([&tsf, cfname, with_snapshot](database& db) { - return db.drop_column_family(db::system_keyspace::NAME, cfname, [&tsf] { return tsf.value(); }, with_snapshot); - }); - }); - }); - } - - future<> store_keyspaces_in_new_schema_tables() { - mlogger.info("Moving {} keyspaces from legacy schema tables to the new schema keyspace ({})", - _keyspaces.size(), db::schema_tables::v3::NAME); - - std::vector mutations; - - for (auto& ks : _keyspaces) { - auto ksm = ::make_lw_shared(ks.name - , ks.replication_params["class"] // TODO, make ksm like c3? - , ks.replication_params - , ks.durable_writes); - - // we want separate time stamps for tables/types, so cannot bulk them into the ksm. - for (auto&& m : db::schema_tables::make_create_keyspace_mutations(ksm, ks.timestamp.time_since_epoch().count(), false)) { - mutations.emplace_back(std::move(m)); - } - for (auto& t : ks.tables) { - db::schema_tables::add_table_or_view_to_schema_mutation(t.metadata, t.timestamp.time_since_epoch().count(), true, mutations); - } - for (auto& t : ks.types) { - db::schema_tables::add_type_to_schema_mutation(t.metadata, t.timestamp.time_since_epoch().count(), mutations); - } - } - return _qp.proxy().local().mutate_locally(std::move(mutations)); - } - - future<> migrate_indexes() { - return parallel_for_each(_keyspaces, [](const keyspace& ks) { - return parallel_for_each(ks.tables, [&](const table& t) { - return parallel_for_each(t.metadata->indices(), [&](const index_metadata& index) { - return system_keyspace::is_index_built(ks.name, t.metadata->cf_name() + "." + index.name()).then([&](bool built) { - if (!built) { - return make_ready_future(); - } - return system_keyspace::set_index_built(ks.name, index.name()).then([&] { - return system_keyspace::set_index_removed(ks.name, t.metadata->cf_name() + "." + index.name()); - }); - }); - }); - }); - }); - } - future<> flush_schemas() { - return _qp.proxy().local().get_db().invoke_on_all([this] (database& db) { - return parallel_for_each(db::schema_tables::ALL, [this, &db](const sstring& cf_name) { - auto& cf = db.find_column_family(db::schema_tables::NAME, cf_name); - return cf.flush(); - }); - }); - } - - future<> migrate() { - return read_all_keyspaces().then([this]() { - // write metadata to the new schema tables - return store_keyspaces_in_new_schema_tables().then(std::bind(&migrator::migrate_indexes, this)) - .then(std::bind(&migrator::flush_schemas, this)) - .then(std::bind(&migrator::drop_legacy_tables, this)) - .then([] { mlogger.info("Completed migration of legacy schema tables"); }); - }); - } - - sharded& _sp; - cql3::query_processor& _qp; - std::vector _keyspaces; -}; - -const std::unordered_set migrator::legacy_schema_tables = { - db::system_keyspace::legacy::KEYSPACES, - db::system_keyspace::legacy::COLUMNFAMILIES, - db::system_keyspace::legacy::COLUMNS, - db::system_keyspace::legacy::TRIGGERS, - db::system_keyspace::legacy::USERTYPES, - db::system_keyspace::legacy::FUNCTIONS, - db::system_keyspace::legacy::AGGREGATES, -}; - -} -} - -future<> -db::legacy_schema_migrator::migrate(sharded& sp, cql3::query_processor& qp) { - return do_with(migrator(sp, qp), std::bind(&migrator::migrate, std::placeholders::_1)); -} - diff --git a/scylla/db/legacy_schema_migrator.hh b/scylla/db/legacy_schema_migrator.hh deleted file mode 100644 index 12ff8e6..0000000 --- a/scylla/db/legacy_schema_migrator.hh +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Modified by ScyllaDB - * Copyright (C) 2017 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include - -class database; - -namespace cql3 { -class query_processor; -} - -namespace service { -class storage_proxy; -} - -namespace db { -namespace legacy_schema_migrator { - -future<> migrate(sharded&, cql3::query_processor&); - -} -} diff --git a/scylla/db/marshal/type_parser.cc b/scylla/db/marshal/type_parser.cc deleted file mode 100644 index f01deca..0000000 --- a/scylla/db/marshal/type_parser.cc +++ /dev/null @@ -1,324 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "db/marshal/type_parser.hh" - -#include "exceptions/exceptions.hh" - -#include -#include - -namespace db { - -namespace marshal { - -type_parser::type_parser(sstring_view str, size_t idx) - : _str{str.begin(), str.end()} - , _idx{idx} -{ } - -type_parser::type_parser(sstring_view str) - : type_parser{str, 0} -{ } - -data_type type_parser::parse(const sstring& str) { - return type_parser(sstring_view(str)).parse(); -} - -data_type type_parser::parse(sstring_view str) { - return type_parser(str).parse(); -} - -data_type type_parser::parse() { - return do_parse(true); -} - -data_type type_parser::do_parse(bool multicell) -{ - skip_blank(); - - sstring name = read_next_identifier(); - if (name.empty()) { - if (!is_eos()) { - throw std::runtime_error("unknown type: " + _str); - } - return bytes_type; - } - - if (_str[_idx] == ':') { - _idx++; - try { - size_t pos; - std::stoul(name, &pos, 0x10); - if (pos != name.size()) { - throw exceptions::syntax_exception(sprint("expected 8-byte hex number, found %s", name)); - } - } catch (const std::invalid_argument & e) { - throw exceptions::syntax_exception(sprint("expected 8-byte hex number, found %s", name)); - } catch (const std::out_of_range& e) { - throw exceptions::syntax_exception(sprint("expected 8-byte hex number, found %s", name)); - } - name = read_next_identifier(); - } - - skip_blank(); - if (!is_eos() && _str[_idx] == '(') - return get_abstract_type(name, *this, multicell); - else - return get_abstract_type(name); -} - -std::vector type_parser::get_type_parameters(bool multicell) -{ - std::vector list; - - if (is_eos()) { - return list; - } - - if (_str[_idx] != '(') { - throw std::logic_error("internal error"); - } - - ++_idx; // skipping '(' - - while (skip_blank_and_comma()) - { - if (_str[_idx] == ')') { - ++_idx; - return list; - } - - try { - list.emplace_back(do_parse(multicell)); - } catch (exceptions::syntax_exception& e) { - // FIXME -#if 0 - SyntaxException ex = new SyntaxException(String.format("Exception while parsing '%s' around char %d", str, idx)); - ex.initCause(e); -#endif - throw e; - } - } - throw exceptions::syntax_exception(sprint("Syntax error parsing '%s' at char %d: unexpected end of string", _str, _idx)); -} - -std::tuple, std::vector> type_parser::get_user_type_parameters() -{ - if (is_eos() || _str[_idx] != '(') { - throw std::logic_error("internal error"); - } - - ++_idx; // skipping '(' - - skip_blank_and_comma(); - sstring keyspace = read_next_identifier(); - skip_blank_and_comma(); - bytes name = from_hex(read_next_identifier()); - - std::vector field_names; - std::vector field_types; - - while (skip_blank_and_comma()) - { - if (_str[_idx] == ')') { - ++_idx; - return std::make_tuple(std::move(keyspace), std::move(name), std::move(field_names), std::move(field_types)); - } - - field_names.emplace_back(from_hex(read_next_identifier())); - - if (_str[_idx] != ':') { - throw exceptions::syntax_exception(sprint("expecting ':' token")); - } - ++_idx; - - try { - field_types.emplace_back(do_parse(true)); - } catch (exceptions::syntax_exception& e) { - // FIXME -#if 0 - SyntaxException ex = new SyntaxException(String.format("Exception while parsing '%s' around char %d", str, idx)); - ex.initCause(e); -#endif - throw e; - } - } - throw exceptions::syntax_exception(sprint("Syntax error parsing '%s' at char %d: unexpected end of string", _str, _idx)); -} - -data_type type_parser::get_abstract_type(const sstring& compare_with) -{ - sstring class_name; - if (compare_with.find('.') != sstring::npos) { - class_name = compare_with; - } else { - class_name = "org.apache.cassandra.db.marshal." + compare_with; - } - return abstract_type::parse_type(class_name); -} - -data_type type_parser::get_abstract_type(const sstring& compare_with, type_parser& parser, bool multicell) -{ - sstring class_name; - if (compare_with.find('.') != sstring::npos) { - class_name = compare_with; - } else { - class_name = "org.apache.cassandra.db.marshal." + compare_with; - } - if (class_name == "org.apache.cassandra.db.marshal.ReversedType") { - auto l = parser.get_type_parameters(false); - if (l.size() != 1) { - throw exceptions::configuration_exception("ReversedType takes exactly 1 type parameter"); - } - return reversed_type_impl::get_instance(l[0]); - } else if (class_name == "org.apache.cassandra.db.marshal.FrozenType") { - auto l = parser.get_type_parameters(false); - if (l.size() != 1) { - throw exceptions::configuration_exception("FrozenType takes exactly 1 type parameter"); - } - return l[0]; - } else if (class_name == "org.apache.cassandra.db.marshal.ListType") { - auto l = parser.get_type_parameters(); - if (l.size() != 1) { - throw exceptions::configuration_exception("ListType takes exactly 1 type parameter"); - } - return list_type_impl::get_instance(l[0], multicell); - } else if (class_name == "org.apache.cassandra.db.marshal.SetType") { - auto l = parser.get_type_parameters(); - if (l.size() != 1) { - throw exceptions::configuration_exception("SetType takes exactly 1 type parameter"); - } - return set_type_impl::get_instance(l[0], multicell); - } else if (class_name == "org.apache.cassandra.db.marshal.MapType") { - auto l = parser.get_type_parameters(); - if (l.size() != 2) { - throw exceptions::configuration_exception("MapType takes exactly 2 type parameters"); - } - return map_type_impl::get_instance(l[0], l[1], multicell); - } else if (class_name == "org.apache.cassandra.db.marshal.TupleType") { - auto l = parser.get_type_parameters(); - if (l.size() == 0) { - throw exceptions::configuration_exception("TupleType takes at least 1 type parameter"); - } - return tuple_type_impl::get_instance(l); - } else if (class_name == "org.apache.cassandra.db.marshal.UserType") { - sstring keyspace; - bytes name; - std::vector field_names; - std::vector field_types; - std::tie(keyspace, name, field_names, field_types) = parser.get_user_type_parameters(); - return user_type_impl::get_instance(std::move(keyspace), std::move(name), std::move(field_names), std::move(field_types)); - } else { - throw std::runtime_error("unknown type: " + class_name); - } -} - -bool type_parser::is_eos() const -{ - return is_eos(_str, _idx); -} - -bool type_parser::is_eos(const sstring& str, size_t i) -{ - return i >= str.size(); -} - -bool type_parser::is_blank(char c) -{ - return c == ' ' || c == '\t' || c == '\n'; -} - -void type_parser::skip_blank() -{ - _idx = skip_blank(_str, _idx); -} - -size_t type_parser::skip_blank(const sstring& str, size_t i) -{ - while (!is_eos(str, i) && is_blank(str[i])) { - ++i; - } - - return i; -} - -bool type_parser::skip_blank_and_comma() -{ - bool comma_found = false; - while (!is_eos()) { - int c = _str[_idx]; - if (c == ',') { - if (comma_found) - return true; - else - comma_found = true; - } else if (!is_blank(c)) { - return true; - } - ++_idx; - } - return false; -} - -/* - * [0..9a..bA..B-+._&] - */ -bool type_parser::is_identifier_char(char c) -{ - return (c >= '0' && c <= '9') - || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') - || c == '-' || c == '+' || c == '.' || c == '_' || c == '&'; -} - -// left idx positioned on the character stopping the read -sstring type_parser::read_next_identifier() -{ - size_t i = _idx; - while (!is_eos() && is_identifier_char(_str[_idx])) { - ++_idx; - } - return _str.substr(i, _idx-i); -} - -} - -} diff --git a/scylla/db/marshal/type_parser.hh b/scylla/db/marshal/type_parser.hh deleted file mode 100644 index b7ceb82..0000000 --- a/scylla/db/marshal/type_parser.hh +++ /dev/null @@ -1,447 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "types.hh" - -#include "core/sstring.hh" - -namespace db { - -namespace marshal { - -/** - * Parse a string containing an Type definition. - */ -class type_parser { - sstring _str; - size_t _idx; - -#if 0 - // A cache of parsed string, specially useful for DynamicCompositeType - private static final Map> cache = new HashMap>(); - - public static final TypeParser EMPTY_PARSER = new TypeParser("", 0); -#endif - type_parser(sstring_view str, size_t idx); -public: - explicit type_parser(sstring_view str); - - /** - * Parse a string containing an type definition. - */ - static data_type parse(const sstring& str); - static data_type parse(sstring_view str); - -#if 0 - public static AbstractType parse(CharSequence compareWith) throws SyntaxException, ConfigurationException - { - return parse(compareWith == null ? null : compareWith.toString()); - } - - public static String getShortName(AbstractType type) - { - return type.getClass().getSimpleName(); - } -#endif - - /** - * Parse an AbstractType from current position of this parser. - */ - data_type parse(); - -#if 0 - public Map getKeyValueParameters() throws SyntaxException - { - if (isEOS()) - return Collections.emptyMap(); - - if (str.charAt(idx) != '(') - throw new IllegalStateException(); - - Map map = new HashMap(); - ++idx; // skipping '(' - - while (skipBlankAndComma()) - { - if (str.charAt(idx) == ')') - { - ++idx; - return map; - } - - String k = readNextIdentifier(); - String v = ""; - skipBlank(); - if (str.charAt(idx) == '=') - { - ++idx; - skipBlank(); - v = readNextIdentifier(); - } - else if (str.charAt(idx) != ',' && str.charAt(idx) != ')') - { - throwSyntaxError("unexpected character '" + str.charAt(idx) + "'"); - } - map.put(k, v); - } - throw new SyntaxException(String.format("Syntax error parsing '%s' at char %d: unexpected end of string", str, idx)); - } -#endif - std::vector get_type_parameters(bool multicell=true); - std::tuple, std::vector> get_user_type_parameters(); - data_type do_parse(bool multicell = true); - -#if 0 - public Map> getAliasParameters() throws SyntaxException, ConfigurationException - { - Map> map = new HashMap>(); - - if (isEOS()) - return map; - - if (str.charAt(idx) != '(') - throw new IllegalStateException(); - - ++idx; // skipping '(' - - - while (skipBlankAndComma()) - { - if (str.charAt(idx) == ')') - { - ++idx; - return map; - } - - String alias = readNextIdentifier(); - if (alias.length() != 1) - throwSyntaxError("An alias should be a single character"); - char aliasChar = alias.charAt(0); - if (aliasChar < 33 || aliasChar > 127) - throwSyntaxError("An alias should be a single character in [0..9a..bA..B-+._&]"); - - skipBlank(); - if (!(str.charAt(idx) == '=' && str.charAt(idx+1) == '>')) - throwSyntaxError("expecting '=>' token"); - - idx += 2; - skipBlank(); - try - { - map.put((byte)aliasChar, parse()); - } - catch (SyntaxException e) - { - SyntaxException ex = new SyntaxException(String.format("Exception while parsing '%s' around char %d", str, idx)); - ex.initCause(e); - throw ex; - } - } - throw new SyntaxException(String.format("Syntax error parsing '%s' at char %d: unexpected end of string", str, idx)); - } - - public Map getCollectionsParameters() throws SyntaxException, ConfigurationException - { - Map map = new HashMap<>(); - - if (isEOS()) - return map; - - if (str.charAt(idx) != '(') - throw new IllegalStateException(); - - ++idx; // skipping '(' - - while (skipBlankAndComma()) - { - if (str.charAt(idx) == ')') - { - ++idx; - return map; - } - - ByteBuffer bb = fromHex(readNextIdentifier()); - - skipBlank(); - if (str.charAt(idx) != ':') - throwSyntaxError("expecting ':' token"); - - ++idx; - skipBlank(); - try - { - AbstractType type = parse(); - if (!(type instanceof CollectionType)) - throw new SyntaxException(type + " is not a collection type"); - map.put(bb, (CollectionType)type); - } - catch (SyntaxException e) - { - SyntaxException ex = new SyntaxException(String.format("Exception while parsing '%s' around char %d", str, idx)); - ex.initCause(e); - throw ex; - } - } - throw new SyntaxException(String.format("Syntax error parsing '%s' at char %d: unexpected end of string", str, idx)); - } - - private ByteBuffer fromHex(String hex) throws SyntaxException - { - try - { - return ByteBufferUtil.hexToBytes(hex); - } - catch (NumberFormatException e) - { - throwSyntaxError(e.getMessage()); - return null; - } - } - - public Pair, List>> getUserTypeParameters() throws SyntaxException, ConfigurationException - { - - if (isEOS() || str.charAt(idx) != '(') - throw new IllegalStateException(); - - ++idx; // skipping '(' - - skipBlankAndComma(); - String keyspace = readNextIdentifier(); - skipBlankAndComma(); - ByteBuffer typeName = fromHex(readNextIdentifier()); - List> defs = new ArrayList<>(); - - while (skipBlankAndComma()) - { - if (str.charAt(idx) == ')') - { - ++idx; - return Pair.create(Pair.create(keyspace, typeName), defs); - } - - ByteBuffer name = fromHex(readNextIdentifier()); - skipBlank(); - if (str.charAt(idx) != ':') - throwSyntaxError("expecting ':' token"); - ++idx; - skipBlank(); - try - { - AbstractType type = parse(); - defs.add(Pair.create(name, type)); - } - catch (SyntaxException e) - { - SyntaxException ex = new SyntaxException(String.format("Exception while parsing '%s' around char %d", str, idx)); - ex.initCause(e); - throw ex; - } - } - throw new SyntaxException(String.format("Syntax error parsing '%s' at char %d: unexpected end of string", str, idx)); - } -#endif - - static data_type get_abstract_type(const sstring& compare_with); - - static data_type get_abstract_type(const sstring& compare_with, type_parser& parser, bool multicell = true); - -#if 0 - private static AbstractType getRawAbstractType(Class> typeClass) throws ConfigurationException - { - try - { - Field field = typeClass.getDeclaredField("instance"); - return (AbstractType) field.get(null); - } - catch (NoSuchFieldException e) - { - throw new ConfigurationException("Invalid comparator class " + typeClass.getName() + ": must define a public static instance field or a public static method getInstance(TypeParser)."); - } - catch (IllegalAccessException e) - { - throw new ConfigurationException("Invalid comparator class " + typeClass.getName() + ": must define a public static instance field or a public static method getInstance(TypeParser)."); - } - } - - private static AbstractType getRawAbstractType(Class> typeClass, TypeParser parser) throws ConfigurationException - { - try - { - Method method = typeClass.getDeclaredMethod("getInstance", TypeParser.class); - return (AbstractType) method.invoke(null, parser); - } - catch (NoSuchMethodException e) - { - throw new ConfigurationException("Invalid comparator class " + typeClass.getName() + ": must define a public static instance field or a public static method getInstance(TypeParser)."); - } - catch (IllegalAccessException e) - { - throw new ConfigurationException("Invalid comparator class " + typeClass.getName() + ": must define a public static instance field or a public static method getInstance(TypeParser)."); - } - catch (InvocationTargetException e) - { - ConfigurationException ex = new ConfigurationException("Invalid definition for comparator " + typeClass.getName() + "."); - ex.initCause(e.getTargetException()); - throw ex; - } - } - - private void throwSyntaxError(String msg) throws SyntaxException - { - throw new SyntaxException(String.format("Syntax error parsing '%s' at char %d: %s", str, idx, msg)); - } -#endif - - bool is_eos() const; - - static bool is_eos(const sstring& str, size_t i); - - static bool is_blank(char c); - - void skip_blank(); - - static size_t skip_blank(const sstring& str, size_t i); - - // skip all blank and at best one comma, return true if there not EOS - bool skip_blank_and_comma(); - - /* - * [0..9a..bA..B-+._&] - */ - static bool is_identifier_char(char c); - - // left idx positioned on the character stopping the read - sstring read_next_identifier(); - -#if 0 - public char readNextChar() - { - skipBlank(); - return str.charAt(idx++); - } - - /** - * Helper function to ease the writing of AbstractType.toString() methods. - */ - public static String stringifyAliasesParameters(Map> aliases) - { - StringBuilder sb = new StringBuilder(); - sb.append('('); - Iterator>> iter = aliases.entrySet().iterator(); - if (iter.hasNext()) - { - Map.Entry> entry = iter.next(); - sb.append((char)(byte)entry.getKey()).append("=>").append(entry.getValue()); - } - while (iter.hasNext()) - { - Map.Entry> entry = iter.next(); - sb.append(',').append((char)(byte)entry.getKey()).append("=>").append(entry.getValue()); - } - sb.append(')'); - return sb.toString(); - } - - /** - * Helper function to ease the writing of AbstractType.toString() methods. - */ - public static String stringifyTypeParameters(List> types) - { - return stringifyTypeParameters(types, false); - } - - /** - * Helper function to ease the writing of AbstractType.toString() methods. - */ - public static String stringifyTypeParameters(List> types, boolean ignoreFreezing) - { - StringBuilder sb = new StringBuilder("("); - for (int i = 0; i < types.size(); i++) - { - if (i > 0) - sb.append(","); - sb.append(types.get(i).toString(ignoreFreezing)); - } - return sb.append(')').toString(); - } - - public static String stringifyCollectionsParameters(Map collections) - { - StringBuilder sb = new StringBuilder(); - sb.append('('); - boolean first = true; - for (Map.Entry entry : collections.entrySet()) - { - if (!first) - sb.append(','); - - first = false; - sb.append(ByteBufferUtil.bytesToHex(entry.getKey())).append(":"); - sb.append(entry.getValue()); - } - sb.append(')'); - return sb.toString(); - } - - public static String stringifyUserTypeParameters(String keysace, ByteBuffer typeName, List columnNames, List> columnTypes) - { - StringBuilder sb = new StringBuilder(); - sb.append('(').append(keysace).append(",").append(ByteBufferUtil.bytesToHex(typeName)); - - for (int i = 0; i < columnNames.size(); i++) - { - sb.append(','); - sb.append(ByteBufferUtil.bytesToHex(columnNames.get(i))).append(":"); - // omit FrozenType(...) from fields because it is currently implicit - sb.append(columnTypes.get(i).toString(true)); - } - sb.append(')'); - return sb.toString(); - } -#endif -}; - -} - -} diff --git a/scylla/db/query_context.hh b/scylla/db/query_context.hh deleted file mode 100644 index 9e02af7..0000000 --- a/scylla/db/query_context.hh +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ -#pragma once - -#include -#include "core/sharded.hh" -#include "core/future.hh" -#include "cql3/query_processor.hh" - -class database; - -namespace service { -class storage_proxy; -} - - -namespace db { -struct query_context { - distributed& _db; - distributed& _qp; - query_context(distributed& db, distributed& qp) : _db(db), _qp(qp) {} - - template - future<::shared_ptr> execute_cql(sstring req, Args&&... args) { - return this->_qp.local().execute_internal(req, { data_value(std::forward(args))... }); - } - database& db() { - return _db.local(); - } - - service::storage_proxy& proxy() { - return _qp.local().proxy().local(); - } - - api::timestamp_type next_timestamp() { - return _qp.local().next_timestamp(); - } - cql3::query_processor& qp() { - return _qp.local(); - } -}; - -// This does not have to be thread local, because all cores will share the same context. -extern std::unique_ptr qctx; - -// Sometimes we are not concerned about system tables at all - for instance, when we are testing. In those cases, just pretend -// we executed the query, and return an empty result -template -static future<::shared_ptr> execute_cql(sstring text, Args&&... args) { - assert(qctx); - return qctx->execute_cql(text, std::forward(args)...); -} - -} diff --git a/scylla/db/read_repair_decision.hh b/scylla/db/read_repair_decision.hh deleted file mode 100644 index c506a5f..0000000 --- a/scylla/db/read_repair_decision.hh +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -namespace db { - -enum class read_repair_decision { - NONE, - GLOBAL, - DC_LOCAL -}; - -inline std::ostream& operator<<(std::ostream& out, db::read_repair_decision d) { - switch (d) { - case db::read_repair_decision::NONE: out << "NONE"; break; - case db::read_repair_decision::GLOBAL: out << "GLOBAL"; break; - case db::read_repair_decision::DC_LOCAL: out << "DC_LOCAL"; break; - default: out << "ERR"; break; - } - return out; -} - -} diff --git a/scylla/db/schema_tables.cc b/scylla/db/schema_tables.cc deleted file mode 100644 index bc556cb..0000000 --- a/scylla/db/schema_tables.cc +++ /dev/null @@ -1,2568 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "db/schema_tables.hh" - -#include "service/migration_manager.hh" -#include "partition_slice_builder.hh" -#include "dht/i_partitioner.hh" -#include "system_keyspace.hh" -#include "query_context.hh" -#include "query-result-set.hh" -#include "query-result-writer.hh" -#include "schema_builder.hh" -#include "map_difference.hh" -#include "utils/UUID_gen.hh" -#include "core/do_with.hh" -#include "core/thread.hh" -#include "json.hh" -#include "log.hh" -#include "frozen_schema.hh" -#include "schema_registry.hh" -#include "mutation_query.hh" -#include "system_keyspace.hh" -#include "cql3/cql3_type.hh" - -#include "db/marshal/type_parser.hh" -#include "db/config.hh" -#include "md5_hasher.hh" - -#include -#include -#include -#include - -#include "compaction_strategy.hh" -#include "utils/joinpoint.hh" -#include "view_info.hh" -#include "cql_type_parser.hh" - -using namespace db::system_keyspace; -using namespace std::chrono_literals; - -/** system.schema_* tables used to store keyspace/table/type attributes prior to C* 3.0 */ -namespace db { -namespace schema_tables { - -logging::logger slogger("schema_tables"); - -const sstring version = "3"; - -struct push_back_and_return { - std::vector muts; - - std::vector operator()(mutation&& m) { - muts.emplace_back(std::move(m)); - return std::move(muts); - } -}; - -struct qualified_name { - sstring keyspace_name; - sstring table_name; - - qualified_name(sstring keyspace_name, sstring table_name) - : keyspace_name(std::move(keyspace_name)) - , table_name(std::move(table_name)) - { } - - qualified_name(const schema_ptr& s) - : keyspace_name(s->ks_name()) - , table_name(s->cf_name()) - { } - - bool operator<(const qualified_name& o) const { - return keyspace_name < o.keyspace_name - || (keyspace_name == o.keyspace_name && table_name < o.table_name); - } - - bool operator==(const qualified_name& o) const { - return keyspace_name == o.keyspace_name && table_name == o.table_name; - } -}; - -static future read_table_mutations(distributed& proxy, const qualified_name& table, schema_ptr s); - -static void merge_tables_and_views(distributed& proxy, - std::map&& tables_before, - std::map&& tables_after, - std::map&& views_before, - std::map&& views_after); - -static void merge_types(distributed& proxy, - schema_result&& before, - schema_result&& after); - -static future<> do_merge_schema(distributed&, std::vector, bool do_flush); - -static std::vector create_columns_from_column_rows( - const query::result_set& rows, const sstring& keyspace, - const sstring& table, bool is_super); - - -static std::vector create_indices_from_index_rows(const query::result_set& rows, - const sstring& keyspace, - const sstring& table); - -static index_metadata create_index_from_index_row(const query::result_set_row& row, - sstring keyspace, - sstring table); - -static void add_column_to_schema_mutation(schema_ptr, const column_definition&, - api::timestamp_type, mutation&); - -static void add_index_to_schema_mutation(schema_ptr table, - const index_metadata& index, api::timestamp_type timestamp, - mutation& mutation); - -static void drop_column_from_schema_mutation(schema_ptr schema_table, schema_ptr table, - const sstring& column_name, long timestamp, - std::vector&); - -static void drop_index_from_schema_mutation(schema_ptr table, - const index_metadata& column, long timestamp, - std::vector& mutations); - -static future create_table_from_table_row( - distributed&, - const query::result_set_row&); - -static void prepare_builder_from_table_row(schema_builder&, const query::result_set_row&); - -using namespace v3; - -std::vector ALL { KEYSPACES, TABLES, SCYLLA_TABLES, COLUMNS, DROPPED_COLUMNS, TRIGGERS, VIEWS, TYPES, FUNCTIONS, AGGREGATES, INDEXES }; - -using days = std::chrono::duration>; - -future<> save_system_schema(const sstring & ksname) { - auto& ks = db::qctx->db().find_keyspace(ksname); - auto ksm = ks.metadata(); - - // delete old, possibly obsolete entries in schema tables - return parallel_for_each(ALL, [ksm] (sstring cf) { - auto deletion_timestamp = schema_creation_timestamp() - 1; - return db::execute_cql(sprint("DELETE FROM %s.%s USING TIMESTAMP %s WHERE keyspace_name = ?", NAME, cf, - deletion_timestamp), ksm->name()).discard_result(); - }).then([ksm] { - auto mvec = make_create_keyspace_mutations(ksm, schema_creation_timestamp(), true); - return qctx->proxy().mutate_locally(std::move(mvec)); - }); -} - -/** add entries to system_schema.* for the hardcoded system definitions */ -future<> save_system_keyspace_schema() { - return save_system_schema(NAME); -} - -namespace v3 { - -static constexpr auto schema_gc_grace = std::chrono::duration_cast(days(7)).count(); - -schema_ptr keyspaces() { - static thread_local auto schema = [] { - schema_builder builder(make_lw_shared(::schema(generate_legacy_id(NAME, KEYSPACES), NAME, KEYSPACES, - // partition key - {{"keyspace_name", utf8_type}}, - // clustering key - {}, - // regular columns - { - {"durable_writes", boolean_type}, - {"replication", map_type_impl::get_instance(utf8_type, utf8_type, false)}, - }, - // static columns - {}, - // regular column name type - utf8_type, - // comment - "keyspace definitions" - ))); - builder.set_gc_grace_seconds(schema_gc_grace); - builder.with_version(generate_schema_version(builder.uuid())); - return builder.build(); - }(); - return schema; -} - -schema_ptr tables() { - static thread_local auto schema = [] { - schema_builder builder(make_lw_shared(::schema(generate_legacy_id(NAME, TABLES), NAME, TABLES, - // partition key - {{"keyspace_name", utf8_type}}, - // clustering key - {{"table_name", utf8_type}}, - // regular columns - { - {"bloom_filter_fp_chance", double_type}, - {"caching", map_type_impl::get_instance(utf8_type, utf8_type, false)}, - {"comment", utf8_type}, - {"compaction", map_type_impl::get_instance(utf8_type, utf8_type, false)}, - {"compression", map_type_impl::get_instance(utf8_type, utf8_type, false)}, - {"crc_check_chance", double_type}, - {"dclocal_read_repair_chance", double_type}, - {"default_time_to_live", int32_type}, - {"extensions", map_type_impl::get_instance(utf8_type, bytes_type, false)}, - {"flags", set_type_impl::get_instance(utf8_type, false)}, // SUPER, COUNTER, DENSE, COMPOUND - {"gc_grace_seconds", int32_type}, - {"id", uuid_type}, - {"max_index_interval", int32_type}, - {"memtable_flush_period_in_ms", int32_type}, - {"min_index_interval", int32_type}, - {"read_repair_chance", double_type}, - {"speculative_retry", utf8_type}, - }, - // static columns - {}, - // regular column name type - utf8_type, - // comment - "table definitions" - ))); - builder.set_gc_grace_seconds(schema_gc_grace); - builder.with_version(generate_schema_version(builder.uuid())); - return builder.build(); - }(); - return schema; -} - -// Holds Scylla-specific table metadata. -schema_ptr scylla_tables() { - static thread_local auto schema = [] { - auto id = generate_legacy_id(NAME, SCYLLA_TABLES); - return schema_builder(NAME, SCYLLA_TABLES, stdx::make_optional(id)) - .with_column("keyspace_name", utf8_type, column_kind::partition_key) - .with_column("table_name", utf8_type, column_kind::clustering_key) - .with_column("version", uuid_type) - .set_gc_grace_seconds(schema_gc_grace) - .with_version(generate_schema_version(id)) - .build(); - }(); - return schema; -} - -schema_ptr columns() { - static thread_local auto schema = [] { - schema_builder builder(make_lw_shared(::schema(generate_legacy_id(NAME, COLUMNS), NAME, COLUMNS, - // partition key - {{"keyspace_name", utf8_type}}, - // clustering key - {{"table_name", utf8_type},{"column_name", utf8_type}}, - // regular columns - { - {"clustering_order", utf8_type}, - {"column_name_bytes", bytes_type}, - {"kind", utf8_type}, - {"position", int32_type}, - {"type", utf8_type}, - }, - // static columns - {}, - // regular column name type - utf8_type, - // comment - "column definitions" - ))); - builder.set_gc_grace_seconds(schema_gc_grace); - builder.with_version(generate_schema_version(builder.uuid())); - return builder.build(); - }(); - return schema; -} - -schema_ptr dropped_columns() { - static thread_local auto schema = [] { - schema_builder builder(make_lw_shared(::schema(generate_legacy_id(NAME, DROPPED_COLUMNS), NAME, DROPPED_COLUMNS, - // partition key - {{"keyspace_name", utf8_type}}, - // clustering key - {{"table_name", utf8_type},{"column_name", utf8_type}}, - // regular columns - { - {"dropped_time", timestamp_type}, - {"type", utf8_type}, - }, - // static columns - {}, - // regular column name type - utf8_type, - // comment - "dropped column registry" - ))); - builder.set_gc_grace_seconds(schema_gc_grace); - builder.with_version(generate_schema_version(builder.uuid())); - return builder.build(); - }(); - return schema; -} - -schema_ptr triggers() { - static thread_local auto schema = [] { - schema_builder builder(make_lw_shared(::schema(generate_legacy_id(NAME, TRIGGERS), NAME, TRIGGERS, - // partition key - {{"keyspace_name", utf8_type}}, - // clustering key - {{"table_name", utf8_type},{"trigger_name", utf8_type}}, - // regular columns - { - {"options", map_type_impl::get_instance(utf8_type, utf8_type, false)}, - }, - // static columns - {}, - // regular column name type - utf8_type, - // comment - "trigger definitions" - ))); - builder.set_gc_grace_seconds(schema_gc_grace); - builder.with_version(generate_schema_version(builder.uuid())); - return builder.build(); - }(); - return schema; -} - -schema_ptr views() { - static thread_local auto schema = [] { - schema_builder builder(make_lw_shared(::schema(generate_legacy_id(NAME, VIEWS), NAME, VIEWS, - // partition key - {{"keyspace_name", utf8_type}}, - // clustering key - {{"view_name", utf8_type}}, - // regular columns - { - {"base_table_id", uuid_type}, - {"base_table_name", utf8_type}, - {"where_clause", utf8_type}, - {"bloom_filter_fp_chance", double_type}, - {"caching", map_type_impl::get_instance(utf8_type, utf8_type, false)}, - {"comment", utf8_type}, - {"compaction", map_type_impl::get_instance(utf8_type, utf8_type, false)}, - {"compression", map_type_impl::get_instance(utf8_type, utf8_type, false)}, - {"crc_check_chance", double_type}, - {"dclocal_read_repair_chance", double_type}, - {"default_time_to_live", int32_type}, - {"extensions", map_type_impl::get_instance(utf8_type, bytes_type, false)}, - {"gc_grace_seconds", int32_type}, - {"id", uuid_type}, - {"include_all_columns", boolean_type}, - {"max_index_interval", int32_type}, - {"memtable_flush_period_in_ms", int32_type}, - {"min_index_interval", int32_type}, - {"read_repair_chance", double_type}, - {"speculative_retry", utf8_type}, - }, - // static columns - {}, - // regular column name type - utf8_type, - // comment - "view definitions" - ))); - builder.set_gc_grace_seconds(schema_gc_grace); - builder.with_version(generate_schema_version(builder.uuid())); - return builder.build(); - }(); - return schema; -} - -schema_ptr indexes() { - static thread_local auto schema = [] { - schema_builder builder(make_lw_shared(::schema(generate_legacy_id(NAME, INDEXES), NAME, INDEXES, - // partition key - {{"keyspace_name", utf8_type}}, - // clustering key - {{"table_name", utf8_type},{"index_name", utf8_type}}, - // regular columns - { - {"kind", utf8_type}, - {"options", map_type_impl::get_instance(utf8_type, utf8_type, false)}, - }, - // static columns - {}, - // regular column name type - utf8_type, - // comment - "secondary index definitions" - ))); - builder.set_gc_grace_seconds(schema_gc_grace); - builder.with_version(generate_schema_version(builder.uuid())); - return builder.build(); - }(); - return schema; -} - -schema_ptr types() { - static thread_local auto schema = [] { - schema_builder builder(make_lw_shared(::schema(generate_legacy_id(NAME, TYPES), NAME, TYPES, - // partition key - {{"keyspace_name", utf8_type}}, - // clustering key - {{"type_name", utf8_type}}, - // regular columns - { - {"field_names", list_type_impl::get_instance(utf8_type, false)}, - {"field_types", list_type_impl::get_instance(utf8_type, false)}, - }, - // static columns - {}, - // regular column name type - utf8_type, - // comment - "user defined type definitions" - ))); - builder.set_gc_grace_seconds(schema_gc_grace); - builder.with_version(generate_schema_version(builder.uuid())); - return builder.build(); - }(); - return schema; -} - -schema_ptr functions() { - static thread_local auto schema = [] { - schema_builder builder(make_lw_shared(::schema(generate_legacy_id(NAME, FUNCTIONS), NAME, FUNCTIONS, - // partition key - {{"keyspace_name", utf8_type}}, - // clustering key - {{"function_name", utf8_type}, {"argument_types", list_type_impl::get_instance(utf8_type, false)}}, - // regular columns - { - {"argument_names", list_type_impl::get_instance(utf8_type, false)}, - {"body", utf8_type}, - {"language", utf8_type}, - {"return_type", utf8_type}, - {"called_on_null_input", boolean_type}, - }, - // static columns - {}, - // regular column name type - utf8_type, - // comment - "user defined function definitions" - ))); - builder.set_gc_grace_seconds(schema_gc_grace); - builder.with_version(generate_schema_version(builder.uuid())); - return builder.build(); - }(); - return schema; -} - -schema_ptr aggregates() { - static thread_local auto schema = [] { - schema_builder builder(make_lw_shared(::schema(generate_legacy_id(NAME, AGGREGATES), NAME, AGGREGATES, - // partition key - {{"keyspace_name", utf8_type}}, - // clustering key - {{"aggregate_name", utf8_type}, {"argument_types", list_type_impl::get_instance(utf8_type, false)}}, - // regular columns - { - {"final_func", utf8_type}, - {"initcond", utf8_type}, - {"return_type", utf8_type}, - {"state_func", utf8_type}, - {"state_type", utf8_type}, - }, - // static columns - {}, - // regular column name type - utf8_type, - // comment - "user defined aggregate definitions" - ))); - builder.set_gc_grace_seconds(schema_gc_grace); - builder.with_version(generate_schema_version(builder.uuid())); - return builder.build(); - }(); - return schema; -} - -} - -#if 0 - public static void truncateSchemaTables() - { - for (String table : ALL) - getSchemaCFS(table).truncateBlocking(); - } - - private static void flushSchemaTables() - { - for (String table : ALL) - SystemKeyspace.forceBlockingFlush(table); - } -#endif - -/** - * Read schema from system keyspace and calculate MD5 digest of every row, resulting digest - * will be converted into UUID which would act as content-based version of the schema. - */ -future calculate_schema_digest(distributed& proxy) -{ - auto map = [&proxy] (sstring table) { - return db::system_keyspace::query_mutations(proxy, NAME, table).then([&proxy, table] (auto rs) { - auto s = proxy.local().get_db().local().find_schema(NAME, table); - std::vector mutations; - for (auto&& p : rs->partitions()) { - auto mut = p.mut().unfreeze(s); - auto partition_key = value_cast(utf8_type->deserialize(mut.key().get_component(*s, 0))); - if (is_system_keyspace(partition_key)) { - continue; - } - mutations.emplace_back(std::move(mut)); - } - return mutations; - }); - }; - auto reduce = [] (auto& hash, auto&& mutations) { - for (const mutation& m : mutations) { - feed_hash_for_schema_digest(hash, m); - } - }; - return do_with(md5_hasher(), [map, reduce] (auto& hash) { - return do_for_each(ALL.begin(), ALL.end(), [&hash, map, reduce] (auto& table) { - return map(table).then([&hash, reduce] (auto&& mutations) { - reduce(hash, mutations); - }); - }).then([&hash] { - return make_ready_future(utils::UUID_gen::get_name_UUID(hash.finalize())); - }); - }); -} - -future> convert_schema_to_mutations(distributed& proxy) -{ - auto map = [&proxy] (sstring table) { - return db::system_keyspace::query_mutations(proxy, NAME, table).then([&proxy, table] (auto rs) { - auto s = proxy.local().get_db().local().find_schema(NAME, table); - std::vector results; - for (auto&& p : rs->partitions()) { - auto mut = p.mut().unfreeze(s); - auto partition_key = value_cast(utf8_type->deserialize(mut.key().get_component(*s, 0))); - if (is_system_keyspace(partition_key)) { - continue; - } - results.emplace_back(std::move(p.mut())); - } - return results; - }); - }; - auto reduce = [] (auto&& result, auto&& mutations) { - std::move(mutations.begin(), mutations.end(), std::back_inserter(result)); - return std::move(result); - }; - return map_reduce(ALL.begin(), ALL.end(), map, std::vector{}, reduce); -} - -future -read_schema_for_keyspaces(distributed& proxy, const sstring& schema_table_name, const std::set& keyspace_names) -{ - auto schema = proxy.local().get_db().local().find_schema(NAME, schema_table_name); - auto map = [&proxy, schema_table_name] (const sstring& keyspace_name) { return read_schema_partition_for_keyspace(proxy, schema_table_name, keyspace_name); }; - auto insert = [] (schema_result&& result, auto&& schema_entity) { - if (!schema_entity.second->empty()) { - result.insert(std::move(schema_entity)); - } - return std::move(result); - }; - return map_reduce(keyspace_names.begin(), keyspace_names.end(), map, schema_result{}, insert); -} - -static -future query_partition_mutation(service::storage_proxy& proxy, - schema_ptr s, - lw_shared_ptr cmd, - partition_key pkey) -{ - auto dk = dht::global_partitioner().decorate_key(*s, pkey); - return do_with(dht::partition_range::make_singular(dk), [&proxy, dk, s = std::move(s), cmd = std::move(cmd)] (auto& range) { - return proxy.query_mutations_locally(s, std::move(cmd), range) - .then([dk = std::move(dk), s](foreign_ptr> res, cache_temperature hit_rate) { - auto&& partitions = res->partitions(); - if (partitions.size() == 0) { - return mutation(std::move(dk), s); - } else if (partitions.size() == 1) { - return partitions[0].mut().unfreeze(s); - } else { - assert(false && "Results must have at most one partition"); - } - }); - }); -} - -future -read_schema_partition_for_keyspace(distributed& proxy, const sstring& schema_table_name, const sstring& keyspace_name) -{ - auto schema = proxy.local().get_db().local().find_schema(NAME, schema_table_name); - auto keyspace_key = dht::global_partitioner().decorate_key(*schema, - partition_key::from_singular(*schema, keyspace_name)); - return db::system_keyspace::query(proxy, NAME, schema_table_name, keyspace_key).then([keyspace_name] (auto&& rs) { - return schema_result_value_type{keyspace_name, std::move(rs)}; - }); -} - -future -read_schema_partition_for_table(distributed& proxy, schema_ptr schema, const sstring& keyspace_name, const sstring& table_name) -{ - auto keyspace_key = partition_key::from_singular(*schema, keyspace_name); - auto clustering_range = query::clustering_range(clustering_key_prefix::from_clustering_prefix( - *schema, exploded_clustering_prefix({utf8_type->decompose(table_name)}))); - auto slice = partition_slice_builder(*schema) - .with_range(std::move(clustering_range)) - .build(); - auto cmd = make_lw_shared(schema->id(), schema->version(), std::move(slice), query::max_rows); - return query_partition_mutation(proxy.local(), std::move(schema), std::move(cmd), std::move(keyspace_key)); -} - -future -read_keyspace_mutation(distributed& proxy, const sstring& keyspace_name) { - schema_ptr s = keyspaces(); - auto key = partition_key::from_singular(*s, keyspace_name); - auto cmd = make_lw_shared(s->id(), s->version(), query::full_slice); - return query_partition_mutation(proxy.local(), std::move(s), std::move(cmd), std::move(key)); -} - -static semaphore the_merge_lock {1}; - -future<> merge_lock() { - // ref: #1088 - // to avoid deadlocks, we don't want long-standing calls to the shard 0 - // as they can cause a deadlock: - // - // fiber1 fiber2 - // merge_lock() (succeeds) - // merge_lock() (waits) - // invoke_on_all() (waits on merge_lock to relinquish smp::submit_to slot) - // - // so we issue the lock calls with a timeout; the slot will be relinquished, and invoke_on_all() - // can complete - return repeat([] () mutable { - return smp::submit_to(0, [] { - return the_merge_lock.try_wait(); - }).then([] (bool result) { - if (result) { - return make_ready_future(stop_iteration::yes); - } else { - static thread_local auto rand_engine = std::default_random_engine(); - auto dist = std::uniform_int_distribution(0, 100); - auto to = std::chrono::microseconds(dist(rand_engine)); - return sleep(to).then([] { - return make_ready_future(stop_iteration::no); - }); - } - }); - }); -} - -future<> merge_unlock() { - return smp::submit_to(0, [] { the_merge_lock.signal(); }); -} - -/** - * Merge remote schema in form of mutations with local and mutate ks/cf metadata objects - * (which also involves fs operations on add/drop ks/cf) - * - * @param mutations the schema changes to apply - * - * @throws ConfigurationException If one of metadata attributes has invalid value - * @throws IOException If data was corrupted during transportation or failed to apply fs operations - */ -future<> merge_schema(distributed& proxy, std::vector mutations) -{ - return merge_lock().then([&proxy, mutations = std::move(mutations)] () mutable { - return do_merge_schema(proxy, std::move(mutations), true).then([&proxy] { - return update_schema_version_and_announce(proxy); - }); - }).finally([] { - return merge_unlock(); - }); -} - -future<> merge_schema(distributed& proxy, std::vector mutations, bool do_flush) -{ - return merge_lock().then([&proxy, mutations = std::move(mutations), do_flush] () mutable { - return do_merge_schema(proxy, std::move(mutations), do_flush); - }).finally([] { - return merge_unlock(); - }); -} - -// Returns names of live table definitions of given keyspace -future> -static read_table_names_of_keyspace(distributed& proxy, const sstring& keyspace_name, schema_ptr schema_table) { - auto pkey = dht::global_partitioner().decorate_key(*schema_table, partition_key::from_singular(*schema_table, keyspace_name)); - return db::system_keyspace::query(proxy, schema_table->ks_name(), schema_table->cf_name(), pkey).then([schema_table] (auto&& rs) { - return boost::copy_range>(rs->rows() | boost::adaptors::transformed([schema_table] (const query::result_set_row& row) { - const sstring name = schema_table->clustering_key_columns().begin()->name_as_text(); - return row.get_nonnull(name); - })); - }); -} - -// Call inside a seastar thread -static -std::map -read_tables_for_keyspaces(distributed& proxy, const std::set& keyspace_names, schema_ptr s) -{ - std::map result; - for (auto&& keyspace_name : keyspace_names) { - for (auto&& table_name : read_table_names_of_keyspace(proxy, keyspace_name, s).get0()) { - auto qn = qualified_name(keyspace_name, table_name); - result.emplace(qn, read_table_mutations(proxy, qn, s).get0()); - } - } - return result; -} - -mutation compact_for_schema_digest(const mutation& m) { - // Cassandra is skipping tombstones from digest calculation - // to avoid disagreements due to tombstone GC. - // See https://issues.apache.org/jira/browse/CASSANDRA-6862. - // We achieve similar effect with compact_for_compaction(). - mutation m_compacted(m); - m_compacted.partition().compact_for_compaction(*m.schema(), always_gc, gc_clock::time_point::max()); - return m_compacted; -} - -// Applies deletion of the "version" column to a system_schema.scylla_tables mutation. -static void delete_schema_version(mutation& m) { - if (m.column_family_id() != scylla_tables()->id()) { - return; - } - const column_definition& version_col = *scylla_tables()->get_column_definition(to_bytes("version")); - for (auto&& row : m.partition().clustered_rows()) { - auto&& cells = row.row().cells(); - auto&& cell = cells.find_cell(version_col.id); - api::timestamp_type t = api::new_timestamp(); - if (cell) { - t = std::max(t, cell->as_atomic_cell().timestamp()); - } - cells.apply(version_col, atomic_cell::make_dead(t, gc_clock::now())); - } -} - -static future<> do_merge_schema(distributed& proxy, std::vector mutations, bool do_flush) -{ - return seastar::async([&proxy, mutations = std::move(mutations), do_flush] () mutable { - schema_ptr s = keyspaces(); - // compare before/after schemas of the affected keyspaces only - std::set keyspaces; - std::set column_families; - for (auto&& mutation : mutations) { - keyspaces.emplace(value_cast(utf8_type->deserialize(mutation.key().get_component(*s, 0)))); - column_families.emplace(mutation.column_family_id()); - // We must force recalculation of schema version after the merge, since the resulting - // schema may be a mix of the old and new schemas. - delete_schema_version(mutation); - } - - // current state of the schema - auto&& old_keyspaces = read_schema_for_keyspaces(proxy, KEYSPACES, keyspaces).get0(); - auto&& old_column_families = read_tables_for_keyspaces(proxy, keyspaces, tables()); - auto&& old_types = read_schema_for_keyspaces(proxy, TYPES, keyspaces).get0(); - auto&& old_views = read_tables_for_keyspaces(proxy, keyspaces, views()); -#if 0 // not in 2.1.8 - /*auto& old_functions = */read_schema_for_keyspaces(proxy, FUNCTIONS, keyspaces).get0(); - /*auto& old_aggregates = */read_schema_for_keyspaces(proxy, AGGREGATES, keyspaces).get0(); -#endif - - proxy.local().mutate_locally(std::move(mutations)).get0(); - - if (do_flush) { - proxy.local().get_db().invoke_on_all([s, cfs = std::move(column_families)] (database& db) { - return parallel_for_each(cfs.begin(), cfs.end(), [&db] (auto& id) { - auto& cf = db.find_column_family(id); - return cf.flush(); - }); - }).get(); - } - - // with new data applied - auto&& new_keyspaces = read_schema_for_keyspaces(proxy, KEYSPACES, keyspaces).get0(); - auto&& new_column_families = read_tables_for_keyspaces(proxy, keyspaces, tables()); - auto&& new_types = read_schema_for_keyspaces(proxy, TYPES, keyspaces).get0(); - auto&& new_views = read_tables_for_keyspaces(proxy, keyspaces, views()); -#if 0 // not in 2.1.8 - /*auto& new_functions = */read_schema_for_keyspaces(proxy, FUNCTIONS, keyspaces).get0(); - /*auto& new_aggregates = */read_schema_for_keyspaces(proxy, AGGREGATES, keyspaces).get0(); -#endif - - std::set keyspaces_to_drop = merge_keyspaces(proxy, std::move(old_keyspaces), std::move(new_keyspaces)).get0(); - merge_types(proxy, std::move(old_types), std::move(new_types)); - merge_tables_and_views(proxy, - std::move(old_column_families), std::move(new_column_families), - std::move(old_views), std::move(new_views)); -#if 0 - mergeFunctions(oldFunctions, newFunctions); - mergeAggregates(oldAggregates, newAggregates); -#endif - proxy.local().get_db().invoke_on_all([keyspaces_to_drop = std::move(keyspaces_to_drop)] (database& db) { - // it is safe to drop a keyspace only when all nested ColumnFamilies where deleted - return do_for_each(keyspaces_to_drop, [&db] (auto keyspace_to_drop) { - db.drop_keyspace(keyspace_to_drop); - return service::get_local_migration_manager().notify_drop_keyspace(keyspace_to_drop); - }); - }).get0(); - }); -} - -future> merge_keyspaces(distributed& proxy, schema_result&& before, schema_result&& after) -{ - std::vector created; - std::vector altered; - std::set dropped; - - /* - * - we don't care about entriesOnlyOnLeft() or entriesInCommon(), because only the changes are of interest to us - * - of all entriesOnlyOnRight(), we only care about ones that have live columns; it's possible to have a ColumnFamily - * there that only has the top-level deletion, if: - * a) a pushed DROP KEYSPACE change for a keyspace hadn't ever made it to this node in the first place - * b) a pulled dropped keyspace that got dropped before it could find a way to this node - * - of entriesDiffering(), we don't care about the scenario where both pre and post-values have zero live columns: - * that means that a keyspace had been recreated and dropped, and the recreated keyspace had never found a way - * to this node - */ - auto diff = difference(before, after, indirect_equal_to>()); - - for (auto&& key : diff.entries_only_on_left) { - slogger.info("Dropping keyspace {}", key); - dropped.emplace(key); - } - for (auto&& key : diff.entries_only_on_right) { - auto&& value = after[key]; - slogger.info("Creating keyspace {}", key); - created.emplace_back(schema_result_value_type{key, std::move(value)}); - } - for (auto&& key : diff.entries_differing) { - slogger.info("Altering keyspace {}", key); - altered.emplace_back(key); - } - return do_with(std::move(created), [&proxy, altered = std::move(altered)] (auto& created) mutable { - return do_with(std::move(altered), [&proxy, &created](auto& altered) { - return proxy.local().get_db().invoke_on_all([&created, &altered] (database& db) { - return do_for_each(created, [&db](auto&& val) { - auto ksm = create_keyspace_from_schema_partition(val); - return db.create_keyspace(ksm).then([ksm] { - return service::get_local_migration_manager().notify_create_keyspace(ksm); - }); - }).then([&altered, &db]() { - return do_for_each(altered, [&db](auto& name) { - return db.update_keyspace(name); - }); - }); - }); - }); - }).then([dropped = std::move(dropped)] () { - return make_ready_future>(dropped); - }); -} - -struct schema_diff { - struct dropped_schema { - global_schema_ptr schema; - utils::joinpoint jp{[] { - return make_ready_future(db_clock::now()); - }}; - }; - - std::vector created; - std::vector altered; - std::vector dropped; - - size_t size() const { - return created.size() + altered.size() + dropped.size(); - } -}; - -template -static schema_diff diff_table_or_view(distributed& proxy, - std::map&& before, - std::map&& after, - CreateSchema&& create_schema) -{ - schema_diff d; - auto diff = difference(before, after); - for (auto&& key : diff.entries_only_on_left) { - auto&& s = proxy.local().get_db().local().find_schema(key.keyspace_name, key.table_name); - slogger.info("Dropping {}.{} id={} version={}", s->ks_name(), s->cf_name(), s->id(), s->version()); - d.dropped.emplace_back(schema_diff::dropped_schema{s}); - } - for (auto&& key : diff.entries_only_on_right) { - auto s = create_schema(std::move(after.at(key))); - slogger.info("Creating {}.{} id={} version={}", s->ks_name(), s->cf_name(), s->id(), s->version()); - d.created.emplace_back(s); - } - for (auto&& key : diff.entries_differing) { - auto s = create_schema(std::move(after.at(key))); - slogger.info("Altering {}.{} id={} version={}", s->ks_name(), s->cf_name(), s->id(), s->version()); - d.altered.emplace_back(s); - } - return d; -} - -// see the comments for merge_keyspaces() -// Atomically publishes schema changes. In particular, this function ensures -// that when a base schema and a subset of its views are modified together (i.e., -// upon an alter table or alter type statement), then they are published together -// as well, without any deferring in-between. -static void merge_tables_and_views(distributed& proxy, - std::map&& tables_before, - std::map&& tables_after, - std::map&& views_before, - std::map&& views_after) -{ - auto tables_diff = diff_table_or_view(proxy, std::move(tables_before), std::move(tables_after), [] (auto&& sm) { - return create_table_from_mutations(std::move(sm)); - }); - auto views_diff = diff_table_or_view(proxy, std::move(views_before), std::move(views_after), [] (auto&& sm) { - return create_view_from_mutations(std::move(sm)); - }); - - proxy.local().get_db().invoke_on_all([&] (database& db) { - return seastar::async([&] { - parallel_for_each(boost::range::join(tables_diff.created, views_diff.created), [&] (global_schema_ptr& gs) { - return db.add_column_family_and_make_directory(gs); - }).get(); - for (auto&& gs : boost::range::join(tables_diff.created, views_diff.created)) { - db.find_column_family(gs).mark_ready_for_writes(); - } - std::vector columns_changed; - columns_changed.reserve(tables_diff.altered.size() + views_diff.altered.size()); - for (auto&& gs : boost::range::join(tables_diff.altered, views_diff.altered)) { - columns_changed.push_back(db.update_column_family(gs)); - } - parallel_for_each(boost::range::join(tables_diff.dropped, views_diff.dropped), [&] (schema_diff::dropped_schema& dt) { - auto& s = *dt.schema.get(); - return db.drop_column_family(s.ks_name(), s.cf_name(), [&] { return dt.jp.value(); }); - }).get(); - - auto& mm = service::get_local_migration_manager(); - auto it = columns_changed.begin(); - std::vector> notifications; - notifications.reserve(tables_diff.size() + views_diff.size()); - auto notify = [&] (auto& r, auto&& f) { boost::range::transform(r, std::back_inserter(notifications), f); }; - notify(tables_diff.created, [&] (auto&& gs) { return mm.notify_create_column_family(gs); }); - notify(tables_diff.altered, [&] (auto&& gs) { return mm.notify_update_column_family(gs, *it++); }); - notify(tables_diff.dropped, [&] (auto&& dt) { return mm.notify_drop_column_family(dt.schema); }); - notify(views_diff.created, [&] (auto&& gs) { return mm.notify_create_view(view_ptr(gs)); }); - notify(views_diff.altered, [&] (auto&& gs) { return mm.notify_update_view(view_ptr(gs), *it++); }); - notify(views_diff.dropped, [&] (auto&& dt) { return mm.notify_drop_view(view_ptr(dt.schema)); }); - when_all(notifications.rbegin(), notifications.rend()).get(); - }); - }).get(); -} - -static inline void collect_types(std::set& keys, schema_result& result, std::vector& to) -{ - for (auto&& key : keys) { - auto&& value = result[key]; - auto types = create_types_from_schema_partition(schema_result_value_type{key, std::move(value)}); - std::move(types.begin(), types.end(), std::back_inserter(to)); - } -} - - // see the comments for merge_keyspaces() -static void merge_types(distributed& proxy, schema_result&& before, schema_result&& after) -{ - std::vector created, altered, dropped; - - auto diff = difference(before, after, indirect_equal_to>()); - - collect_types(diff.entries_only_on_left, before, dropped); // Keyspaces with no more types - collect_types(diff.entries_only_on_right, after, created); // New keyspaces with types - - for (auto&& key : diff.entries_differing) { - // The user types of this keyspace differ, so diff the current types with the updated ones - auto current_types = proxy.local().get_db().local().find_keyspace(key).metadata()->user_types()->get_all_types(); - decltype(current_types) updated_types; - auto ts = create_types_from_schema_partition(schema_result_value_type{key, std::move(after[key])}); - updated_types.reserve(ts.size()); - for (auto&& type : ts) { - updated_types[type->_name] = std::move(type); - } - - auto delta = difference(current_types, updated_types, indirect_equal_to()); - - for (auto&& key : delta.entries_only_on_left) { - dropped.emplace_back(current_types[key]); - } - for (auto&& key : delta.entries_only_on_right) { - created.emplace_back(std::move(updated_types[key])); - } - for (auto&& key : delta.entries_differing) { - altered.emplace_back(std::move(updated_types[key])); - } - } - - proxy.local().get_db().invoke_on_all([&created, &dropped, &altered] (database& db) { - return seastar::async([&] { - for (auto&& type : created) { - auto user_type = dynamic_pointer_cast(parse_type(type->name())); - db.find_keyspace(user_type->_keyspace).add_user_type(user_type); - service::get_local_migration_manager().notify_create_user_type(user_type).get(); - } - for (auto&& type : dropped) { - auto user_type = dynamic_pointer_cast(parse_type(type->name())); - db.find_keyspace(user_type->_keyspace).remove_user_type(user_type); - service::get_local_migration_manager().notify_drop_user_type(user_type).get(); - } - for (auto&& type : altered) { - auto user_type = dynamic_pointer_cast(parse_type(type->name())); - db.find_keyspace(user_type->_keyspace).add_user_type(user_type); - service::get_local_migration_manager().notify_update_user_type(user_type).get(); - } - }); - }).get(); -} - -#if 0 - // see the comments for mergeKeyspaces() - private static void mergeFunctions(Map before, Map after) - { - List created = new ArrayList<>(); - List altered = new ArrayList<>(); - List dropped = new ArrayList<>(); - - MapDifference diff = Maps.difference(before, after); - - // New keyspace with functions - for (Map.Entry entry : diff.entriesOnlyOnRight().entrySet()) - if (entry.getValue().hasColumns()) - created.addAll(createFunctionsFromFunctionsPartition(new Row(entry.getKey(), entry.getValue())).values()); - - for (Map.Entry> entry : diff.entriesDiffering().entrySet()) - { - ColumnFamily pre = entry.getValue().leftValue(); - ColumnFamily post = entry.getValue().rightValue(); - - if (pre.hasColumns() && post.hasColumns()) - { - MapDifference delta = - Maps.difference(createFunctionsFromFunctionsPartition(new Row(entry.getKey(), pre)), - createFunctionsFromFunctionsPartition(new Row(entry.getKey(), post))); - - dropped.addAll(delta.entriesOnlyOnLeft().values()); - created.addAll(delta.entriesOnlyOnRight().values()); - Iterables.addAll(altered, Iterables.transform(delta.entriesDiffering().values(), new Function, UDFunction>() - { - public UDFunction apply(MapDifference.ValueDifference pair) - { - return pair.rightValue(); - } - })); - } - else if (pre.hasColumns()) - { - dropped.addAll(createFunctionsFromFunctionsPartition(new Row(entry.getKey(), pre)).values()); - } - else if (post.hasColumns()) - { - created.addAll(createFunctionsFromFunctionsPartition(new Row(entry.getKey(), post)).values()); - } - } - - for (UDFunction udf : created) - Schema.instance.addFunction(udf); - for (UDFunction udf : altered) - Schema.instance.updateFunction(udf); - for (UDFunction udf : dropped) - Schema.instance.dropFunction(udf); - } - - // see the comments for mergeKeyspaces() - private static void mergeAggregates(Map before, Map after) - { - List created = new ArrayList<>(); - List altered = new ArrayList<>(); - List dropped = new ArrayList<>(); - - MapDifference diff = Maps.difference(before, after); - - // New keyspace with functions - for (Map.Entry entry : diff.entriesOnlyOnRight().entrySet()) - if (entry.getValue().hasColumns()) - created.addAll(createAggregatesFromAggregatesPartition(new Row(entry.getKey(), entry.getValue())).values()); - - for (Map.Entry> entry : diff.entriesDiffering().entrySet()) - { - ColumnFamily pre = entry.getValue().leftValue(); - ColumnFamily post = entry.getValue().rightValue(); - - if (pre.hasColumns() && post.hasColumns()) - { - MapDifference delta = - Maps.difference(createAggregatesFromAggregatesPartition(new Row(entry.getKey(), pre)), - createAggregatesFromAggregatesPartition(new Row(entry.getKey(), post))); - - dropped.addAll(delta.entriesOnlyOnLeft().values()); - created.addAll(delta.entriesOnlyOnRight().values()); - Iterables.addAll(altered, Iterables.transform(delta.entriesDiffering().values(), new Function, UDAggregate>() - { - public UDAggregate apply(MapDifference.ValueDifference pair) - { - return pair.rightValue(); - } - })); - } - else if (pre.hasColumns()) - { - dropped.addAll(createAggregatesFromAggregatesPartition(new Row(entry.getKey(), pre)).values()); - } - else if (post.hasColumns()) - { - created.addAll(createAggregatesFromAggregatesPartition(new Row(entry.getKey(), post)).values()); - } - } - - for (UDAggregate udf : created) - Schema.instance.addAggregate(udf); - for (UDAggregate udf : altered) - Schema.instance.updateAggregate(udf); - for (UDAggregate udf : dropped) - Schema.instance.dropAggregate(udf); - } -#endif - -template -void set_cell_or_clustered(mutation& m, const clustering_key & ckey, Args && ...args) { - m.set_clustered_cell(ckey, std::forward(args)...); -} - -template -void set_cell_or_clustered(mutation& m, const exploded_clustering_prefix & ckey, Args && ...args) { - m.set_cell(ckey, std::forward(args)...); -} - -template -static atomic_cell_or_collection -make_map_mutation(const Map& map, - const column_definition& column, - api::timestamp_type timestamp, - Func&& f) -{ - auto column_type = static_pointer_cast(column.type); - auto ktyp = column_type->get_keys_type(); - auto vtyp = column_type->get_values_type(); - - if (column_type->is_multi_cell()) { - map_type_impl::mutation mut; - - for (auto&& entry : map) { - auto te = f(entry); - mut.cells.emplace_back(ktyp->decompose(te.first), atomic_cell::make_live(timestamp, vtyp->decompose(te.second))); - } - - auto col_mut = column_type->serialize_mutation_form(std::move(mut)); - return atomic_cell_or_collection::from_collection_mutation(std::move(col_mut)); - } else { - map_type_impl::native_type tmp; - tmp.reserve(map.size()); - std::transform(map.begin(), map.end(), std::inserter(tmp, tmp.end()), f); - return atomic_cell::make_live(timestamp, column_type->decompose(make_map_value(column_type, std::move(tmp)))); - } -} - -template -static atomic_cell_or_collection -make_map_mutation(const Map& map, - const column_definition& column, - api::timestamp_type timestamp) -{ - return make_map_mutation(map, column, timestamp, [](auto&& p) { return std::forward(p); }); -} - -template -static void store_map(mutation& m, const K& ckey, const bytes& name, api::timestamp_type timestamp, const Map& map) { - auto s = m.schema(); - auto column = s->get_column_definition(name); - assert(column); - set_cell_or_clustered(m, ckey, *column, make_map_mutation(map, *column, timestamp)); -} - -/* - * Keyspace metadata serialization/deserialization. - */ - -std::vector make_create_keyspace_mutations(lw_shared_ptr keyspace, api::timestamp_type timestamp, bool with_tables_and_types_and_functions) -{ - std::vector mutations; - schema_ptr s = keyspaces(); - auto pkey = partition_key::from_singular(*s, keyspace->name()); - mutation m(pkey, s); - auto ckey = clustering_key_prefix::make_empty(); - m.set_cell(ckey, "durable_writes", keyspace->durable_writes(), timestamp); - - { - auto map = keyspace->strategy_options(); - map["class"] = keyspace->strategy_name(); - store_map(m, ckey, "replication", timestamp, map); - } - - mutations.emplace_back(std::move(m)); - - if (with_tables_and_types_and_functions) { - for (auto&& kv : keyspace->user_types()->get_all_types()) { - add_type_to_schema_mutation(kv.second, timestamp, mutations); - } - for (auto&& s : keyspace->cf_meta_data() | boost::adaptors::map_values) { - add_table_or_view_to_schema_mutation(s, timestamp, true, mutations); - } - } - return mutations; -} - -std::vector make_drop_keyspace_mutations(lw_shared_ptr keyspace, api::timestamp_type timestamp) -{ - std::vector mutations; - for (auto&& schema_table : all_tables()) { - auto pkey = partition_key::from_exploded(*schema_table, {utf8_type->decompose(keyspace->name())}); - mutation m{pkey, schema_table}; - m.partition().apply(tombstone{timestamp, gc_clock::now()}); - mutations.emplace_back(std::move(m)); - } - auto&& schema = db::system_keyspace::built_indexes(); - auto pkey = partition_key::from_exploded(*schema, {utf8_type->decompose(keyspace->name())}); - mutation m{pkey, schema}; - m.partition().apply(tombstone{timestamp, gc_clock::now()}); - mutations.emplace_back(std::move(m)); - return mutations; -} - -/** - * Deserialize only Keyspace attributes without nested tables or types - * - * @param partition Keyspace attributes in serialized form - */ -lw_shared_ptr create_keyspace_from_schema_partition(const schema_result_value_type& result) -{ - auto&& rs = result.second; - if (rs->empty()) { - throw std::runtime_error("query result has no rows"); - } - auto&& row = rs->row(0); - auto keyspace_name = row.get_nonnull("keyspace_name"); - // We get called from multiple shards with result set originating on only one of them. - // Cannot use copying accessors for "deep" types like map, because we will hit shared_ptr asserts - // (or screw up shared pointers) - const auto& replication = value_cast(row.get_data_value("replication")); - - std::map strategy_options; - for (auto& p : replication) { - strategy_options.emplace(value_cast(p.first), value_cast(p.second)); - } - auto strategy_name = strategy_options["class"]; - strategy_options.erase("class"); - bool durable_writes = row.get_nonnull("durable_writes"); - return make_lw_shared(keyspace_name, strategy_name, strategy_options, durable_writes); -} - -template -static std::map get_map(const query::result_set_row& row, const sstring& name) { - std::map map; - - auto values = row.get_nonnull(name); - for (auto&& entry : values) { - map.emplace(value_cast(entry.first), value_cast(entry.second)); - }; - - return map; -} - -template -static std::vector get_list(const query::result_set_row& row, const sstring& name) { - std::vector list; - - auto values = row.get_nonnull(name); - for (auto&& v : values) { - list.emplace_back(value_cast(v)); - }; - - return list; -} - -std::vector create_types_from_schema_partition(const schema_result_value_type& result) -{ - cql_type_parser::raw_builder builder(result.first); - for (auto&& row : result.second->rows()) { - builder.add(row.get_nonnull("type_name"), - get_list(row, "field_names"), - get_list(row, "field_types")); - } - return builder.build(); -} - -/* - * User type metadata serialization/deserialization - */ - -template -static atomic_cell_or_collection -make_list_mutation(const std::vector& values, - const column_definition& column, - api::timestamp_type timestamp, - Func&& f) -{ - auto column_type = static_pointer_cast(column.type); - auto vtyp = column_type->get_elements_type(); - - if (column_type->is_multi_cell()) { - list_type_impl::mutation m; - m.cells.reserve(values.size()); - m.tomb.timestamp = timestamp - 1; - m.tomb.deletion_time = gc_clock::now(); - - for (auto&& value : values) { - auto dv = f(value); - auto uuid = utils::UUID_gen::get_time_UUID_bytes(); - m.cells.emplace_back( - bytes(reinterpret_cast(uuid.data()), uuid.size()), - atomic_cell::make_live(timestamp, vtyp->decompose(std::move(dv)))); - } - - auto list_mut = column_type->serialize_mutation_form(std::move(m)); - return atomic_cell_or_collection::from_collection_mutation(std::move(list_mut)); - } else { - list_type_impl::native_type tmp; - tmp.reserve(values.size()); - std::transform(values.begin(), values.end(), std::back_inserter(tmp), f); - return atomic_cell::make_live(timestamp, column_type->decompose(make_list_value(column_type, std::move(tmp)))); - } -} - -void add_type_to_schema_mutation(user_type type, api::timestamp_type timestamp, std::vector& mutations) -{ - schema_ptr s = types(); - auto pkey = partition_key::from_singular(*s, type->_keyspace); - auto ckey = clustering_key::from_singular(*s, type->get_name_as_string()); - mutation m{pkey, s}; - - auto field_names_column = s->get_column_definition("field_names"); - auto field_names = make_list_mutation(type->field_names(), *field_names_column, timestamp, [](auto&& name) { - return utf8_type->deserialize(name); - }); - m.set_clustered_cell(ckey, *field_names_column, std::move(field_names)); - - auto field_types_column = s->get_column_definition("field_types"); - auto field_types = make_list_mutation(type->field_types(), *field_types_column, timestamp, [](auto&& type) { - return data_value(type->as_cql3_type()->to_string()); - }); - m.set_clustered_cell(ckey, *field_types_column, std::move(field_types)); - - mutations.emplace_back(std::move(m)); -} - -future> make_create_type_mutations(lw_shared_ptr keyspace, user_type type, api::timestamp_type timestamp) -{ - std::vector mutations; - add_type_to_schema_mutation(type, timestamp, mutations); - - // Include the serialized keyspace in case the target node missed a CREATE KEYSPACE migration (see CASSANDRA-5631). - return read_keyspace_mutation(service::get_storage_proxy(), keyspace->name()).then(push_back_and_return{std::move(mutations)}); -} - -future> make_drop_type_mutations(lw_shared_ptr keyspace, user_type type, api::timestamp_type timestamp) -{ - std::vector mutations; - schema_ptr s = types(); - auto pkey = partition_key::from_singular(*s, type->_keyspace); - auto ckey = clustering_key::from_singular(*s, type->get_name_as_string()); - mutation m{pkey, s}; - m.partition().apply_delete(*s, ckey, tombstone(timestamp, gc_clock::now())); - mutations.emplace_back(std::move(m)); - - // Include the serialized keyspace in case the target node missed a CREATE KEYSPACE migration (see CASSANDRA-5631). - return read_keyspace_mutation(service::get_storage_proxy(), keyspace->name()).then(push_back_and_return{std::move(mutations)}); -} - -/* - * Table metadata serialization/deserialization. - */ - -future> make_create_table_mutations(lw_shared_ptr keyspace, schema_ptr table, api::timestamp_type timestamp) -{ - std::vector mutations; - add_table_or_view_to_schema_mutation(table, timestamp, true, mutations); - - // Include the serialized keyspace in case the target node missed a CREATE KEYSPACE migration (see CASSANDRA-5631). - return read_keyspace_mutation(service::get_storage_proxy(), keyspace->name()).then(push_back_and_return{std::move(mutations)}); -} - -static void add_table_params_to_mutations(mutation& m, const clustering_key& ckey, schema_ptr table, api::timestamp_type timestamp) { - m.set_clustered_cell(ckey, "bloom_filter_fp_chance", table->bloom_filter_fp_chance(), timestamp); - m.set_clustered_cell(ckey, "comment", table->comment(), timestamp); - m.set_clustered_cell(ckey, "dclocal_read_repair_chance", table->dc_local_read_repair_chance(), timestamp); - m.set_clustered_cell(ckey, "default_time_to_live", table->default_time_to_live().count(), timestamp); - m.set_clustered_cell(ckey, "gc_grace_seconds", table->gc_grace_seconds().count(), timestamp); - m.set_clustered_cell(ckey, "max_index_interval", table->max_index_interval(), timestamp); - m.set_clustered_cell(ckey, "memtable_flush_period_in_ms", table->memtable_flush_period(), timestamp); - m.set_clustered_cell(ckey, "min_index_interval", table->min_index_interval(), timestamp); - m.set_clustered_cell(ckey, "read_repair_chance", table->read_repair_chance(), timestamp); - m.set_clustered_cell(ckey, "speculative_retry", table->speculative_retry().to_sstring(), timestamp); - m.set_clustered_cell(ckey, "crc_check_chance", table->crc_check_chance(), timestamp); - - store_map(m, ckey, "caching", timestamp, table->caching_options().to_map()); - - { - auto map = table->compaction_strategy_options(); - map["class"] = sstables::compaction_strategy::name(table->configured_compaction_strategy()); - store_map(m, ckey, "compaction", timestamp, map); - } - - store_map(m, ckey, "compression", timestamp, table->get_compressor_params().get_options()); - store_map(m, ckey, "extensions", timestamp, std::map()); -} - -static data_type expand_user_type(data_type); - -static std::vector expand_user_types(const std::vector& types) { - std::vector result; - result.reserve(types.size()); - std::transform(types.begin(), types.end(), std::back_inserter(result), &expand_user_type); - return result; -} - -static data_type expand_user_type(data_type original) { - if (original->is_user_type()) { - return tuple_type_impl::get_instance( - expand_user_types( - static_pointer_cast( - original)->field_types())); - } - if (original->is_tuple()) { - return tuple_type_impl::get_instance( - expand_user_types( - static_pointer_cast< - const tuple_type_impl>( - original)->all_types())); - } - if (original->is_reversed()) { - return reversed_type_impl::get_instance( - expand_user_type(original->underlying_type())); - } - - if (original->is_collection()) { - - auto ct = static_pointer_cast(original); - - if (ct->is_list()) { - return list_type_impl::get_instance( - expand_user_type(ct->value_comparator()), - ct->is_multi_cell()); - } - if (ct->is_map()) { - return map_type_impl::get_instance( - expand_user_type(ct->name_comparator()), - expand_user_type(ct->value_comparator()), - ct->is_multi_cell()); - } - if (ct->is_set()) { - return set_type_impl::get_instance( - expand_user_type(ct->name_comparator()), - ct->is_multi_cell()); - } - } - - return original; -} - -static void add_dropped_column_to_schema_mutation(schema_ptr table, const sstring& name, const schema::dropped_column& column, api::timestamp_type timestamp, mutation& m) { - auto ckey = clustering_key::from_exploded(*dropped_columns(), {utf8_type->decompose(table->cf_name()), utf8_type->decompose(name)}); - m.set_clustered_cell(ckey, "dropped_time", column.timestamp, timestamp); - - /* - * From origin: - * we never store actual UDT names in dropped column types (so that we can safely drop types if nothing refers to - * them anymore), so before storing dropped columns in schema we expand UDTs to tuples. See expandUserTypes method. - * Because of that, we can safely pass Types.none() to parse() - */ - m.set_clustered_cell(ckey, "type", expand_user_type(column.type)->as_cql3_type()->to_string(), timestamp); -} - -static mutation make_scylla_tables_mutation(schema_ptr table, api::timestamp_type timestamp) { - schema_ptr s = tables(); - auto pkey = partition_key::from_singular(*s, table->ks_name()); - auto ckey = clustering_key::from_singular(*s, table->cf_name()); - mutation m(pkey, scylla_tables()); - m.set_clustered_cell(ckey, "version", utils::UUID(table->version()), timestamp); - return m; -} - -static schema_mutations make_table_mutations(schema_ptr table, api::timestamp_type timestamp, bool with_columns_and_triggers) -{ - // When adding new schema properties, don't set cells for default values so that - // both old and new nodes will see the same version during rolling upgrades. - - // For property that can be null (and can be changed), we insert tombstones, to make sure - // we don't keep a property the user has removed - schema_ptr s = tables(); - auto pkey = partition_key::from_singular(*s, table->ks_name()); - mutation m{pkey, s}; - auto ckey = clustering_key::from_singular(*s, table->cf_name()); - m.set_clustered_cell(ckey, "id", table->id(), timestamp); - - auto scylla_tables_mutation = make_scylla_tables_mutation(table, timestamp); - - { - list_type_impl::native_type flags; - if (table->is_super()) { - flags.emplace_back("super"); - } - if (table->is_dense()) { - flags.emplace_back("dense"); - } - if (table->is_compound()) { - flags.emplace_back("compound"); - } - if (table->is_counter()) { - flags.emplace_back("counter"); - } - - m.set_clustered_cell(ckey, "flags", make_list_value(s->get_column_definition("flags")->type, flags), timestamp); - } - - add_table_params_to_mutations(m, ckey, table, timestamp); - - mutation columns_mutation(pkey, columns()); - mutation dropped_columns_mutation(pkey, dropped_columns()); - mutation indices_mutation(pkey, indexes()); - - if (with_columns_and_triggers) { - for (auto&& column : table->v3().all_columns()) { - add_column_to_schema_mutation(table, column, timestamp, columns_mutation); - } - for (auto&& index : table->indices()) { - add_index_to_schema_mutation(table, index, timestamp, indices_mutation); - } - // TODO: triggers - - for (auto&& e : table->dropped_columns()) { - add_dropped_column_to_schema_mutation(table, e.first, e.second, timestamp, dropped_columns_mutation); - } - } - - return schema_mutations{std::move(m), std::move(columns_mutation), std::move(indices_mutation), std::move(dropped_columns_mutation), - std::move(scylla_tables_mutation)}; -} - -void add_table_or_view_to_schema_mutation(schema_ptr s, api::timestamp_type timestamp, bool with_columns, std::vector& mutations) -{ - make_schema_mutations(s, timestamp, with_columns).copy_to(mutations); -} - -static void make_update_indices_mutations( - schema_ptr old_table, - schema_ptr new_table, - api::timestamp_type timestamp, - std::vector& mutations) -{ - mutation indices_mutation(partition_key::from_singular(*indexes(), old_table->ks_name()), indexes()); - - auto diff = difference(old_table->all_indices(), new_table->all_indices()); - - // indices that are no longer needed - for (auto&& name : diff.entries_only_on_left) { - const index_metadata& index = old_table->all_indices().at(name); - drop_index_from_schema_mutation(old_table, index, timestamp, mutations); - } - - // newly added indices and old indices with updated attributes - for (auto&& name : boost::range::join(diff.entries_differing, diff.entries_only_on_right)) { - const index_metadata& index = new_table->all_indices().at(name); - add_index_to_schema_mutation(new_table, index, timestamp, indices_mutation); - } - - mutations.emplace_back(std::move(indices_mutation)); -} - -static void add_drop_column_to_mutations(schema_ptr table, const sstring& name, const schema::dropped_column& dc, api::timestamp_type timestamp, std::vector& mutations) { - schema_ptr s = dropped_columns(); - auto pkey = partition_key::from_singular(*s, table->ks_name()); - auto ckey = clustering_key::from_exploded(*s, {utf8_type->decompose(table->cf_name()), utf8_type->decompose(name)}); - mutation m(pkey, s); - add_dropped_column_to_schema_mutation(table, name, dc, timestamp, m); - mutations.emplace_back(std::move(m)); -} - -static void make_update_columns_mutations(schema_ptr old_table, - schema_ptr new_table, - api::timestamp_type timestamp, - bool from_thrift, - std::vector& mutations) { - mutation columns_mutation(partition_key::from_singular(*columns(), old_table->ks_name()), columns()); - - auto diff = difference(old_table->v3().columns_by_name(), new_table->v3().columns_by_name()); - - // columns that are no longer needed - for (auto&& name : diff.entries_only_on_left) { - // Thrift only knows about the REGULAR ColumnDefinition type, so don't consider other type - // are being deleted just because they are not here. - const column_definition& column = *old_table->v3().columns_by_name().at(name); - if (from_thrift && !column.is_regular()) { - continue; - } - - drop_column_from_schema_mutation(columns(), old_table, column.name_as_text(), timestamp, mutations); - } - - // newly added columns and old columns with updated attributes - for (auto&& name : boost::range::join(diff.entries_differing, diff.entries_only_on_right)) { - const column_definition& column = *new_table->v3().columns_by_name().at(name); - add_column_to_schema_mutation(new_table, column, timestamp, columns_mutation); - } - - mutations.emplace_back(std::move(columns_mutation)); - - // dropped columns - auto dc_diff = difference(old_table->dropped_columns(), new_table->dropped_columns()); - - // newly dropped columns - // columns added then dropped again - for (auto& name : boost::range::join(dc_diff.entries_differing, dc_diff.entries_only_on_right)) { - add_drop_column_to_mutations(new_table, name, new_table->dropped_columns().at(name), timestamp, mutations); - } -} - -future> make_update_table_mutations(lw_shared_ptr keyspace, - schema_ptr old_table, - schema_ptr new_table, - api::timestamp_type timestamp, - bool from_thrift) -{ - std::vector mutations; - add_table_or_view_to_schema_mutation(new_table, timestamp, false, mutations); - make_update_indices_mutations(old_table, new_table, timestamp, mutations); - make_update_columns_mutations(std::move(old_table), std::move(new_table), timestamp, from_thrift, mutations); - - warn(unimplemented::cause::TRIGGERS); -#if 0 - MapDifference triggerDiff = Maps.difference(oldTable.getTriggers(), newTable.getTriggers()); - - // dropped triggers - for (TriggerDefinition trigger : triggerDiff.entriesOnlyOnLeft().values()) - dropTriggerFromSchemaMutation(oldTable, trigger, timestamp, mutation); - - // newly created triggers - for (TriggerDefinition trigger : triggerDiff.entriesOnlyOnRight().values()) - addTriggerToSchemaMutation(newTable, trigger, timestamp, mutation); - -#endif - // Include the serialized keyspace in case the target node missed a CREATE KEYSPACE migration (see CASSANDRA-5631). - return read_keyspace_mutation(service::get_storage_proxy(), keyspace->name()).then(push_back_and_return{std::move(mutations)}); -} - -static void make_drop_table_or_view_mutations(schema_ptr schema_table, - schema_ptr table_or_view, - api::timestamp_type timestamp, - std::vector& mutations) { - auto pkey = partition_key::from_singular(*schema_table, table_or_view->ks_name()); - mutation m{pkey, schema_table}; - auto ckey = clustering_key::from_singular(*schema_table, table_or_view->cf_name()); - m.partition().apply_delete(*schema_table, ckey, tombstone(timestamp, gc_clock::now())); - mutations.emplace_back(m); - for (auto& column : table_or_view->v3().all_columns()) { - drop_column_from_schema_mutation(columns(), table_or_view, column.name_as_text(), timestamp, mutations); - } - for (auto& column : table_or_view->dropped_columns() | boost::adaptors::map_keys) { - drop_column_from_schema_mutation(dropped_columns(), table_or_view, column, timestamp, mutations); - } - { - mutation m{pkey, scylla_tables()}; - m.partition().apply_delete(*scylla_tables(), ckey, tombstone(timestamp, gc_clock::now())); - mutations.emplace_back(m); - } -} - -future> make_drop_table_mutations(lw_shared_ptr keyspace, schema_ptr table, api::timestamp_type timestamp) -{ - std::vector mutations; - make_drop_table_or_view_mutations(tables(), std::move(table), timestamp, mutations); - -#if 0 - for (TriggerDefinition trigger : table.getTriggers().values()) - dropTriggerFromSchemaMutation(table, trigger, timestamp, mutation); - - // TODO: get rid of in #6717 - ColumnFamily indexCells = mutation.addOrGet(SystemKeyspace.BuiltIndexes); - for (String indexName : Keyspace.open(keyspace.name).getColumnFamilyStore(table.cfName).getBuiltIndexes()) - indexCells.addTombstone(indexCells.getComparator().makeCellName(indexName), ldt, timestamp); -#endif - // Include the serialized keyspace in case the target node missed a CREATE KEYSPACE migration (see CASSANDRA-5631). - return read_keyspace_mutation(service::get_storage_proxy(), keyspace->name()).then(push_back_and_return{std::move(mutations)}); -} - -static future read_table_mutations(distributed& proxy, const qualified_name& table, schema_ptr s) -{ - return when_all_succeed( - read_schema_partition_for_table(proxy, s, table.keyspace_name, table.table_name), - read_schema_partition_for_table(proxy, columns(), table.keyspace_name, table.table_name), - read_schema_partition_for_table(proxy, dropped_columns(), table.keyspace_name, table.table_name), - read_schema_partition_for_table(proxy, indexes(), table.keyspace_name, table.table_name), - read_schema_partition_for_table(proxy, scylla_tables(), table.keyspace_name, table.table_name)).then( - [] (mutation cf_m, mutation col_m, mutation dropped_m, mutation idx_m, mutation st_m) { - return schema_mutations{std::move(cf_m), std::move(col_m), std::move(idx_m), std::move(dropped_m), std::move(st_m)}; - }); -#if 0 - // FIXME: - Row serializedTriggers = readSchemaPartitionForTable(TRIGGERS, ksName, cfName); - try - { - for (TriggerDefinition trigger : createTriggersFromTriggersPartition(serializedTriggers)) - cfm.addTriggerDefinition(trigger); - } - catch (InvalidRequestException e) - { - throw new RuntimeException(e); - } -#endif -} - -future create_table_from_name(distributed& proxy, const sstring& keyspace, const sstring& table) -{ - return do_with(qualified_name(keyspace, table), [&proxy] (auto&& qn) { - return read_table_mutations(proxy, qn, tables()).then([qn] (schema_mutations sm) { - if (!sm.live()) { - throw std::runtime_error(sprint("%s:%s not found in the schema definitions keyspace.", qn.keyspace_name, qn.table_name)); - } - return create_table_from_mutations(std::move(sm)); - }); - }); -} - -/** - * Deserialize tables from low-level schema representation, all of them belong to the same keyspace - * - * @return map containing name of the table and its metadata for faster lookup - */ -future> create_tables_from_tables_partition(distributed& proxy, const schema_result::mapped_type& result) -{ - auto tables = make_lw_shared>(); - return parallel_for_each(result->rows().begin(), result->rows().end(), [&proxy, tables] (auto&& row) { - return create_table_from_table_row(proxy, row).then([tables] (schema_ptr&& cfm) { - tables->emplace(cfm->cf_name(), std::move(cfm)); - }); - }).then([tables] { - return std::move(*tables); - }); -} - -#if 0 - public static CFMetaData createTableFromTablePartitionAndColumnsPartition(Row serializedTable, Row serializedColumns) - { - String query = String.format("SELECT * FROM %s.%s", SystemKeyspace.NAME, COLUMNFAMILIES); - return createTableFromTableRowAndColumnsPartition(QueryProcessor.resultify(query, serializedTable).one(), serializedColumns); - } -#endif - -/** - * Deserialize table metadata from low-level representation - * - * @return Metadata deserialized from schema - */ -static future create_table_from_table_row(distributed& proxy, const query::result_set_row& row) -{ - auto ks_name = row.get_nonnull("keyspace_name"); - auto cf_name = row.get_nonnull("table_name"); - return create_table_from_name(proxy, ks_name, cf_name); -} - -static void prepare_builder_from_table_row(schema_builder& builder, const query::result_set_row& table_row) -{ - // These row reads have been purposefully reordered to match the origin counterpart. For easier matching. - if (table_row.has("bloom_filter_fp_chance")) { - builder.set_bloom_filter_fp_chance(table_row.get_nonnull("bloom_filter_fp_chance")); - } else { - builder.set_bloom_filter_fp_chance(builder.get_bloom_filter_fp_chance()); - } - - if (table_row.has("caching")) { - auto map = get_map(table_row, "caching"); - builder.set_caching_options(caching_options::from_map(map)); - } - - if (table_row.has("comment")) { - builder.set_comment(table_row.get_nonnull("comment")); - } - - if (table_row.has("compaction")) { - auto map = get_map(table_row, "compaction"); - - auto i = map.find("class"); - if (i != map.end()) { - try { - builder.set_compaction_strategy(sstables::compaction_strategy::type(i->second)); - map.erase(i); - } catch (const exceptions::configuration_exception& e) { - // If compaction strategy class isn't supported, fallback to size tiered. - slogger.warn("Falling back to size-tiered compaction strategy after the problem: {}", e.what()); - builder.set_compaction_strategy(sstables::compaction_strategy_type::size_tiered); - } - } - if (map.count("max_threshold")) { - builder.set_max_compaction_threshold(std::stoi(map["max_threshold"])); - } - if (map.count("min_threshold")) { - builder.set_min_compaction_threshold(std::stoi(map["min_threshold"])); - } - if (map.count("enabled")) { - builder.set_compaction_enabled(boost::algorithm::iequals(map["enabled"], "true")); - } - - builder.set_compaction_strategy_options(map); - } - - if (table_row.has("compression")) { - auto map = get_map(table_row, "compression"); - compression_parameters cp(map); - builder.set_compressor_params(cp); - } - - if (table_row.has("dclocal_read_repair_chance")) { - builder.set_dc_local_read_repair_chance(table_row.get_nonnull("dclocal_read_repair_chance")); - } - - if (table_row.has("default_time_to_live")) { - builder.set_default_time_to_live(gc_clock::duration(table_row.get_nonnull("default_time_to_live"))); - } - - if (table_row.has("extensions")) { - auto map = get_map(table_row, "extensions"); - // TODO: extensions - } - - if (table_row.has("gc_grace_seconds")) { - builder.set_gc_grace_seconds(table_row.get_nonnull("gc_grace_seconds")); - } - - if (table_row.has("min_index_interval")) { - builder.set_min_index_interval(table_row.get_nonnull("min_index_interval")); - } - - if (table_row.has("memtable_flush_period_in_ms")) { - builder.set_memtable_flush_period(table_row.get_nonnull("memtable_flush_period_in_ms")); - } - - if (table_row.has("max_index_interval")) { - builder.set_max_index_interval(table_row.get_nonnull("max_index_interval")); - } - - if (table_row.has("read_repair_chance")) { - builder.set_read_repair_chance(table_row.get_nonnull("read_repair_chance")); - } - - if (table_row.has("crc_check_chance")) { - builder.set_crc_check_chance(table_row.get_nonnull("crc_check_chance")); - } - - if (table_row.has("speculative_retry")) { - builder.set_speculative_retry(table_row.get_nonnull("speculative_retry")); - } -} - -schema_ptr create_table_from_mutations(schema_mutations sm, std::experimental::optional version) -{ - auto table_rs = query::result_set(sm.columnfamilies_mutation()); - query::result_set_row table_row = table_rs.row(0); - - auto ks_name = table_row.get_nonnull("keyspace_name"); - auto cf_name = table_row.get_nonnull("table_name"); - auto id = table_row.get_nonnull("id"); - schema_builder builder{ks_name, cf_name, id}; - - auto cf = cf_type::standard; - auto is_dense = false; - auto is_counter = false; - auto is_compound = false; - auto flags = table_row.get("flags"); - - if (flags) { - for (auto& s : *flags) { - if (s == "super") { - // cf = cf_type::super; - fail(unimplemented::cause::SUPER); - } else if (s == "dense") { - is_dense = true; - } else if (s == "compound") { - is_compound = true; - } else if (s == "counter") { - is_counter = true; - } - } - } - - std::vector column_defs = create_columns_from_column_rows( - query::result_set(sm.columns_mutation()), - ks_name, - cf_name,/*, - fullRawComparator, */ - cf == cf_type::super); - - - builder.set_is_dense(is_dense); - builder.set_is_compound(is_compound); - builder.set_is_counter(is_counter); - - prepare_builder_from_table_row(builder, table_row); - - v3_columns columns(std::move(column_defs), is_dense, is_compound); - columns.apply_to(builder); - - std::vector index_defs; - if (sm.indices_mutation()) { - index_defs = create_indices_from_index_rows(query::result_set(sm.indices_mutation().value()), ks_name, cf_name); - } - for (auto&& index : index_defs) { - builder.with_index(index); - } - - if (sm.dropped_columns_mutation()) { - query::result_set dcr(*sm.dropped_columns_mutation()); - for (auto& row : dcr.rows()) { - auto name = row.get_nonnull("column_name"); - auto type = cql_type_parser::parse(ks_name, row.get_nonnull("type")); - auto time = row.get_nonnull("dropped_time"); - builder.without_column(name, type, time.time_since_epoch().count()); - } - } - - if (version) { - builder.with_version(*version); - } else { - builder.with_version(sm.digest()); - } - return builder.build(); -} - -/* - * Column metadata serialization/deserialization. - */ - -static void add_column_to_schema_mutation(schema_ptr table, - const column_definition& column, - api::timestamp_type timestamp, - mutation& m) -{ - auto ckey = clustering_key::from_exploded(*m.schema(), {utf8_type->decompose(table->cf_name()), - utf8_type->decompose(column.name_as_text())}); - - auto order = "NONE"; - if (column.is_clustering_key()) { - order = "ASC"; - } - auto type = column.type; - if (type->is_reversed()) { - type = type->underlying_type(); - if (column.is_clustering_key()) { - order = "DESC"; - } - } - auto pos = -1; - if (column.is_primary_key()) { - pos = int32_t(table->position(column)); - } - - m.set_clustered_cell(ckey, "column_name_bytes", data_value(column.name()), timestamp); - m.set_clustered_cell(ckey, "kind", serialize_kind(column.kind), timestamp); - m.set_clustered_cell(ckey, "position", pos, timestamp); - m.set_clustered_cell(ckey, "clustering_order", sstring(order), timestamp); - m.set_clustered_cell(ckey, "type", type->as_cql3_type()->to_string(), timestamp); -} - -sstring serialize_kind(column_kind kind) -{ - switch (kind) { - case column_kind::partition_key: return "partition_key"; - case column_kind::clustering_key: return "clustering"; - case column_kind::static_column: return "static"; - case column_kind::regular_column: return "regular"; - default: throw std::invalid_argument("unknown column kind"); - } -} - -column_kind deserialize_kind(sstring kind) { - if (kind == "partition_key") { - return column_kind::partition_key; - } else if (kind == "clustering_key" || kind == "clustering") { - return column_kind::clustering_key; - } else if (kind == "static") { - return column_kind::static_column; - } else if (kind == "regular") { - return column_kind::regular_column; - } else if (kind == "compact_value") { // backward compatibility - return column_kind::regular_column; - } else { - throw std::invalid_argument("unknown column kind: " + kind); - } -} - -sstring serialize_index_kind(index_metadata_kind kind) -{ - switch (kind) { - case index_metadata_kind::keys: return "KEYS"; - case index_metadata_kind::composites: return "COMPOSITES"; - case index_metadata_kind::custom: return "CUSTOM"; - } - throw std::invalid_argument("unknown index kind"); -} - -index_metadata_kind deserialize_index_kind(sstring kind) { - if (kind == "KEYS") { - return index_metadata_kind::keys; - } else if (kind == "COMPOSITES") { - return index_metadata_kind::composites; - } else if (kind == "CUSTOM") { - return index_metadata_kind::custom; - } else { - throw std::invalid_argument("unknown column kind: " + kind); - } -} - -static void add_index_to_schema_mutation(schema_ptr table, - const index_metadata& index, - api::timestamp_type timestamp, - mutation& m) -{ - auto ckey = clustering_key::from_exploded(*m.schema(), {utf8_type->decompose(table->cf_name()), utf8_type->decompose(index.name())}); - m.set_clustered_cell(ckey, "kind", serialize_index_kind(index.kind()), timestamp); - store_map(m, ckey, "options", timestamp, index.options()); -} - -static void drop_index_from_schema_mutation(schema_ptr table, const index_metadata& index, long timestamp, std::vector& mutations) -{ - schema_ptr s = indexes(); - auto pkey = partition_key::from_singular(*s, table->ks_name()); - auto ckey = clustering_key::from_exploded(*s, {utf8_type->decompose(table->cf_name()), utf8_type->decompose(index.name())}); - mutation m{pkey, s}; - m.partition().apply_delete(*s, ckey, tombstone(timestamp, gc_clock::now())); - mutations.push_back(std::move(m)); -} - -static void drop_column_from_schema_mutation( - schema_ptr schema_table, - schema_ptr table, - const sstring& column_name, - long timestamp, - std::vector& mutations) -{ - auto pkey = partition_key::from_singular(*schema_table, table->ks_name()); - auto ckey = clustering_key::from_exploded(*schema_table, {utf8_type->decompose(table->cf_name()), - utf8_type->decompose(column_name)}); - - mutation m{pkey, schema_table}; - m.partition().apply_delete(*schema_table, ckey, tombstone(timestamp, gc_clock::now())); - mutations.emplace_back(m); -} - -static std::vector create_columns_from_column_rows(const query::result_set& rows, - const sstring& keyspace, - const sstring& table, /*, - AbstractType rawComparator, */ - bool is_super) -{ - std::vector columns; - for (auto&& row : rows.rows()) { - auto kind = deserialize_kind(row.get_nonnull("kind")); - auto type = cql_type_parser::parse(keyspace, row.get_nonnull("type")); - auto name_bytes = row.get_nonnull("column_name_bytes"); - column_id position = row.get_nonnull("position"); - - if (row.has("clustering_order")) { - auto order = row.get_nonnull("clustering_order"); - std::transform(order.begin(), order.end(), order.begin(), ::toupper); - if (order == "DESC") { - type = reversed_type_impl::get_instance(type); - } - } - - columns.emplace_back(name_bytes, type, kind, position); - } - return columns; -} - -static std::vector create_indices_from_index_rows(const query::result_set& rows, - const sstring& keyspace, - const sstring& table) -{ - return boost::copy_range>(rows.rows() | boost::adaptors::transformed([&keyspace, &table] (auto&& row) { - return create_index_from_index_row(row, keyspace, table); - })); -} - -static index_metadata create_index_from_index_row(const query::result_set_row& row, - sstring keyspace, - sstring table) -{ - auto index_name = row.get_nonnull("index_name"); - index_options_map options; - auto map = row.get_nonnull("options"); - for (auto&& entry : map) { - options.emplace(value_cast(entry.first), value_cast(entry.second)); - } - index_metadata_kind kind = deserialize_index_kind(row.get_nonnull("kind")); - return index_metadata{index_name, options, kind}; -} - -/* - * View metadata serialization/deserialization. - */ - -view_ptr create_view_from_mutations(schema_mutations sm, std::experimental::optional version) { - auto table_rs = query::result_set(sm.columnfamilies_mutation()); - query::result_set_row row = table_rs.row(0); - - auto ks_name = row.get_nonnull("keyspace_name"); - auto cf_name = row.get_nonnull("view_name"); - auto id = row.get_nonnull("id"); - - schema_builder builder{ks_name, cf_name, id}; - prepare_builder_from_table_row(builder, row); - - auto column_defs = create_columns_from_column_rows(query::result_set(sm.columns_mutation()), ks_name, cf_name, false); - for (auto&& cdef : column_defs) { - builder.with_column(cdef); - } - - if (version) { - builder.with_version(*version); - } else { - builder.with_version(sm.digest()); - } - - auto base_id = row.get_nonnull("base_table_id"); - auto base_name = row.get_nonnull("base_table_name"); - auto include_all_columns = row.get_nonnull("include_all_columns"); - auto where_clause = row.get_nonnull("where_clause"); - - builder.with_view_info(std::move(base_id), std::move(base_name), include_all_columns, std::move(where_clause)); - return view_ptr(builder.build()); -} - -static future create_view_from_table_row(distributed& proxy, const query::result_set_row& row) { - qualified_name qn(row.get_nonnull("keyspace_name"), row.get_nonnull("view_name")); - return do_with(std::move(qn), [&proxy] (auto&& qn) { - return read_table_mutations(proxy, qn, views()).then([&qn] (schema_mutations sm) { - if (!sm.live()) { - throw std::runtime_error(sprint("%s:%s not found in the view definitions keyspace.", qn.keyspace_name, qn.table_name)); - } - return create_view_from_mutations(std::move(sm)); - }); - }); -} - -/** - * Deserialize views from low-level schema representation, all of them belong to the same keyspace - * - * @return vector containing the view definitions - */ -future> create_views_from_schema_partition(distributed& proxy, const schema_result::mapped_type& result) -{ - auto views = make_lw_shared>(); - return parallel_for_each(result->rows().begin(), result->rows().end(), [&proxy, views = std::move(views)] (auto&& row) { - return create_view_from_table_row(proxy, row).then([views] (auto&& v) { - views->push_back(std::move(v)); - }); - }).then([views] { - return std::move(*views); - }); -} - -static schema_mutations make_view_mutations(view_ptr view, api::timestamp_type timestamp, bool with_columns) -{ - // When adding new schema properties, don't set cells for default values so that - // both old and new nodes will see the same version during rolling upgrades. - - // For properties that can be null (and can be changed), we insert tombstones, to make sure - // we don't keep a property the user has removed - schema_ptr s = views(); - auto pkey = partition_key::from_singular(*s, view->ks_name()); - mutation m{pkey, s}; - auto ckey = clustering_key::from_singular(*s, view->cf_name()); - - m.set_clustered_cell(ckey, "base_table_id", view->view_info()->base_id(), timestamp); - m.set_clustered_cell(ckey, "base_table_name", view->view_info()->base_name(), timestamp); - m.set_clustered_cell(ckey, "where_clause", view->view_info()->where_clause(), timestamp); - m.set_clustered_cell(ckey, "bloom_filter_fp_chance", view->bloom_filter_fp_chance(), timestamp); - m.set_clustered_cell(ckey, "include_all_columns", view->view_info()->include_all_columns(), timestamp); - m.set_clustered_cell(ckey, "id", view->id(), timestamp); - - add_table_params_to_mutations(m, ckey, view, timestamp); - - - mutation columns_mutation(pkey, columns()); - mutation dropped_columns_mutation(pkey, dropped_columns()); - mutation indices_mutation(pkey, indexes()); - - if (with_columns) { - for (auto&& column : view->v3().all_columns()) { - add_column_to_schema_mutation(view, column, timestamp, columns_mutation); - } - - for (auto&& e : view->dropped_columns()) { - add_dropped_column_to_schema_mutation(view, e.first, e.second, timestamp, dropped_columns_mutation); - } - for (auto&& index : view->indices()) { - add_index_to_schema_mutation(view, index, timestamp, indices_mutation); - } - } - - auto scylla_tables_mutation = make_scylla_tables_mutation(view, timestamp); - - return schema_mutations{std::move(m), std::move(columns_mutation), std::move(indices_mutation), std::move(dropped_columns_mutation), - std::move(scylla_tables_mutation)}; -} - -schema_mutations make_schema_mutations(schema_ptr s, api::timestamp_type timestamp, bool with_columns) -{ - return s->is_view() ? make_view_mutations(view_ptr(s), timestamp, with_columns) : make_table_mutations(s, timestamp, with_columns); -} - -future> make_create_view_mutations(lw_shared_ptr keyspace, view_ptr view, api::timestamp_type timestamp) -{ - std::vector mutations; - // And also the serialized base table. - auto base = keyspace->cf_meta_data().at(view->view_info()->base_name()); - add_table_or_view_to_schema_mutation(base, timestamp, true, mutations); - add_table_or_view_to_schema_mutation(view, timestamp, true, mutations); - - // Include the serialized keyspace in case the target node missed a CREATE KEYSPACE migration (see CASSANDRA-5631). - return read_keyspace_mutation(service::get_storage_proxy(), keyspace->name()).then(push_back_and_return{std::move(mutations)}); -} - -/** - * Note: new_view can be generated due to an ALTER on its base table; in that - * case, the new base schema isn't yet loaded, thus can't be accessed from this - * function. - */ -future> make_update_view_mutations(lw_shared_ptr keyspace, - view_ptr old_view, - view_ptr new_view, - api::timestamp_type timestamp, - bool include_base) -{ - std::vector mutations; - if (include_base) { - // Include the serialized base table mutations in case the target node is missing them. - auto base = keyspace->cf_meta_data().at(new_view->view_info()->base_name()); - add_table_or_view_to_schema_mutation(base, timestamp, true, mutations); - } - add_table_or_view_to_schema_mutation(new_view, timestamp, false, mutations); - make_update_columns_mutations(old_view, new_view, timestamp, false, mutations); - - // Include the serialized keyspace in case the target node missed a CREATE KEYSPACE migration (see CASSANDRA-5631). - return read_keyspace_mutation(service::get_storage_proxy(), keyspace->name()).then(push_back_and_return{std::move(mutations)}); -} - -future> make_drop_view_mutations(lw_shared_ptr keyspace, view_ptr view, api::timestamp_type timestamp) { - std::vector mutations; - make_drop_table_or_view_mutations(views(), view, timestamp, mutations); - // Include the serialized keyspace in case the target node missed a CREATE KEYSPACE migration (see CASSANDRA-5631). - return read_keyspace_mutation(service::get_storage_proxy(), keyspace->name()).then(push_back_and_return{std::move(mutations)}); -} - -#if 0 - private static AbstractType getComponentComparator(AbstractType rawComparator, Integer componentIndex) - { - return (componentIndex == null || (componentIndex == 0 && !(rawComparator instanceof CompositeType))) - ? rawComparator - : ((CompositeType)rawComparator).types.get(componentIndex); - } - - /* - * Trigger metadata serialization/deserialization. - */ - - private static void addTriggerToSchemaMutation(CFMetaData table, TriggerDefinition trigger, long timestamp, Mutation mutation) - { - ColumnFamily cells = mutation.addOrGet(Triggers); - Composite prefix = Triggers.comparator.make(table.cfName, trigger.name); - CFRowAdder adder = new CFRowAdder(cells, prefix, timestamp); - adder.addMapEntry("trigger_options", "class", trigger.classOption); - } - - private static void dropTriggerFromSchemaMutation(CFMetaData table, TriggerDefinition trigger, long timestamp, Mutation mutation) - { - ColumnFamily cells = mutation.addOrGet(Triggers); - int ldt = (int) (System.currentTimeMillis() / 1000); - - Composite prefix = Triggers.comparator.make(table.cfName, trigger.name); - cells.addAtom(new RangeTombstone(prefix, prefix.end(), timestamp, ldt)); - } - - /** - * Deserialize triggers from storage-level representation. - * - * @param partition storage-level partition containing the trigger definitions - * @return the list of processed TriggerDefinitions - */ - private static List createTriggersFromTriggersPartition(Row partition) - { - List triggers = new ArrayList<>(); - String query = String.format("SELECT * FROM %s.%s", SystemKeyspace.NAME, TRIGGERS); - for (UntypedResultSet.Row row : QueryProcessor.resultify(query, partition)) - { - String name = row.getString("trigger_name"); - String classOption = row.getMap("trigger_options", UTF8Type.instance, UTF8Type.instance).get("class"); - triggers.add(new TriggerDefinition(name, classOption)); - } - return triggers; - } - - /* - * UDF metadata serialization/deserialization. - */ - - public static Mutation makeCreateFunctionMutation(KSMetaData keyspace, UDFunction function, long timestamp) - { - // Include the serialized keyspace in case the target node missed a CREATE KEYSPACE migration (see CASSANDRA-5631). - Mutation mutation = makeCreateKeyspaceMutation(keyspace, timestamp, false); - addFunctionToSchemaMutation(function, timestamp, mutation); - return mutation; - } - - private static void addFunctionToSchemaMutation(UDFunction function, long timestamp, Mutation mutation) - { - ColumnFamily cells = mutation.addOrGet(Functions); - Composite prefix = Functions.comparator.make(function.name().name, UDHelper.calculateSignature(function)); - CFRowAdder adder = new CFRowAdder(cells, prefix, timestamp); - - adder.resetCollection("argument_names"); - adder.resetCollection("argument_types"); - - for (int i = 0; i < function.argNames().size(); i++) - { - adder.addListEntry("argument_names", function.argNames().get(i).bytes); - adder.addListEntry("argument_types", function.argTypes().get(i).toString()); - } - - adder.add("body", function.body()); - adder.add("is_deterministic", function.isDeterministic()); - adder.add("language", function.language()); - adder.add("return_type", function.returnType().toString()); - } - - public static Mutation makeDropFunctionMutation(KSMetaData keyspace, UDFunction function, long timestamp) - { - // Include the serialized keyspace in case the target node missed a CREATE KEYSPACE migration (see CASSANDRA-5631). - Mutation mutation = makeCreateKeyspaceMutation(keyspace, timestamp, false); - - ColumnFamily cells = mutation.addOrGet(Functions); - int ldt = (int) (System.currentTimeMillis() / 1000); - - Composite prefix = Functions.comparator.make(function.name().name, UDHelper.calculateSignature(function)); - cells.addAtom(new RangeTombstone(prefix, prefix.end(), timestamp, ldt)); - - return mutation; - } - - private static Map createFunctionsFromFunctionsPartition(Row partition) - { - Map functions = new HashMap<>(); - String query = String.format("SELECT * FROM %s.%s", SystemKeyspace.NAME, FUNCTIONS); - for (UntypedResultSet.Row row : QueryProcessor.resultify(query, partition)) - { - UDFunction function = createFunctionFromFunctionRow(row); - functions.put(UDHelper.calculateSignature(function), function); - } - return functions; - } - - private static UDFunction createFunctionFromFunctionRow(UntypedResultSet.Row row) - { - String ksName = row.getString("keyspace_name"); - String functionName = row.getString("function_name"); - FunctionName name = new FunctionName(ksName, functionName); - - List argNames = new ArrayList<>(); - if (row.has("argument_names")) - for (String arg : row.getList("argument_names", UTF8Type.instance)) - argNames.add(new ColumnIdentifier(arg, true)); - - List> argTypes = new ArrayList<>(); - if (row.has("argument_types")) - for (String type : row.getList("argument_types", UTF8Type.instance)) - argTypes.add(parseType(type)); - - AbstractType returnType = parseType(row.getString("return_type")); - - boolean isDeterministic = row.getBoolean("is_deterministic"); - String language = row.getString("language"); - String body = row.getString("body"); - - try - { - return UDFunction.create(name, argNames, argTypes, returnType, language, body, isDeterministic); - } - catch (InvalidRequestException e) - { - slogger.error(String.format("Cannot load function '%s' from schema: this function won't be available (on this node)", name), e); - return UDFunction.createBrokenFunction(name, argNames, argTypes, returnType, language, body, e); - } - } - - /* - * Aggregate UDF metadata serialization/deserialization. - */ - - public static Mutation makeCreateAggregateMutation(KSMetaData keyspace, UDAggregate aggregate, long timestamp) - { - // Include the serialized keyspace in case the target node missed a CREATE KEYSPACE migration (see CASSANDRA-5631). - Mutation mutation = makeCreateKeyspaceMutation(keyspace, timestamp, false); - addAggregateToSchemaMutation(aggregate, timestamp, mutation); - return mutation; - } - - private static void addAggregateToSchemaMutation(UDAggregate aggregate, long timestamp, Mutation mutation) - { - ColumnFamily cells = mutation.addOrGet(Aggregates); - Composite prefix = Aggregates.comparator.make(aggregate.name().name, UDHelper.calculateSignature(aggregate)); - CFRowAdder adder = new CFRowAdder(cells, prefix, timestamp); - - adder.resetCollection("argument_types"); - adder.add("return_type", aggregate.returnType().toString()); - adder.add("state_func", aggregate.stateFunction().name().name); - if (aggregate.stateType() != null) - adder.add("state_type", aggregate.stateType().toString()); - if (aggregate.finalFunction() != null) - adder.add("final_func", aggregate.finalFunction().name().name); - if (aggregate.initialCondition() != null) - adder.add("initcond", aggregate.initialCondition()); - - for (AbstractType argType : aggregate.argTypes()) - adder.addListEntry("argument_types", argType.toString()); - } - - private static Map createAggregatesFromAggregatesPartition(Row partition) - { - Map aggregates = new HashMap<>(); - String query = String.format("SELECT * FROM %s.%s", SystemKeyspace.NAME, AGGREGATES); - for (UntypedResultSet.Row row : QueryProcessor.resultify(query, partition)) - { - UDAggregate aggregate = createAggregateFromAggregateRow(row); - aggregates.put(UDHelper.calculateSignature(aggregate), aggregate); - } - return aggregates; - } - - private static UDAggregate createAggregateFromAggregateRow(UntypedResultSet.Row row) - { - String ksName = row.getString("keyspace_name"); - String functionName = row.getString("aggregate_name"); - FunctionName name = new FunctionName(ksName, functionName); - - List types = row.getList("argument_types", UTF8Type.instance); - - List> argTypes; - if (types == null) - { - argTypes = Collections.emptyList(); - } - else - { - argTypes = new ArrayList<>(types.size()); - for (String type : types) - argTypes.add(parseType(type)); - } - - AbstractType returnType = parseType(row.getString("return_type")); - - FunctionName stateFunc = new FunctionName(ksName, row.getString("state_func")); - FunctionName finalFunc = row.has("final_func") ? new FunctionName(ksName, row.getString("final_func")) : null; - AbstractType stateType = row.has("state_type") ? parseType(row.getString("state_type")) : null; - ByteBuffer initcond = row.has("initcond") ? row.getBytes("initcond") : null; - - try - { - return UDAggregate.create(name, argTypes, returnType, stateFunc, finalFunc, stateType, initcond); - } - catch (InvalidRequestException reason) - { - return UDAggregate.createBroken(name, argTypes, returnType, initcond, reason); - } - } - - public static Mutation makeDropAggregateMutation(KSMetaData keyspace, UDAggregate aggregate, long timestamp) - { - // Include the serialized keyspace in case the target node missed a CREATE KEYSPACE migration (see CASSANDRA-5631). - Mutation mutation = makeCreateKeyspaceMutation(keyspace, timestamp, false); - - ColumnFamily cells = mutation.addOrGet(Aggregates); - int ldt = (int) (System.currentTimeMillis() / 1000); - - Composite prefix = Aggregates.comparator.make(aggregate.name().name, UDHelper.calculateSignature(aggregate)); - cells.addAtom(new RangeTombstone(prefix, prefix.end(), timestamp, ldt)); - - return mutation; - } -#endif - -data_type parse_type(sstring str) -{ - return db::marshal::type_parser::parse(str); -} - -std::vector all_tables() { - return { - keyspaces(), tables(), scylla_tables(), columns(), dropped_columns(), triggers(), - views(), indexes(), types(), functions(), aggregates(), - }; -} - -namespace legacy { - -table_schema_version schema_mutations::digest() const { - md5_hasher h; - db::schema_tables::feed_hash_for_schema_digest(h, _columnfamilies); - db::schema_tables::feed_hash_for_schema_digest(h, _columns); - return utils::UUID_gen::get_name_UUID(h.finalize()); -} - -future read_table_mutations(distributed& proxy, - sstring keyspace_name, sstring table_name, schema_ptr s) -{ - return read_schema_partition_for_table(proxy, s, keyspace_name, table_name) - .then([&proxy, keyspace_name, table_name] (mutation cf_m) { - return read_schema_partition_for_table(proxy, db::system_keyspace::legacy::columns(), keyspace_name, table_name) - .then([cf_m = std::move(cf_m)] (mutation col_m) { - return schema_mutations{std::move(cf_m), std::move(col_m)}; - }); - }); -} - -} // namespace legacy - -} // namespace schema_tables -} // namespace schema diff --git a/scylla/db/schema_tables.hh b/scylla/db/schema_tables.hh deleted file mode 100644 index 63597c2..0000000 --- a/scylla/db/schema_tables.hh +++ /dev/null @@ -1,192 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "service/storage_proxy.hh" -#include "mutation.hh" -#include "schema.hh" -#include "hashing.hh" -#include "schema_mutations.hh" - -#include -#include - -namespace query { -class result_set; -} - -namespace db { -namespace schema_tables { - -using schema_result = std::map>; -using schema_result_value_type = std::pair>; - -namespace v3 { - -static constexpr auto NAME = "system_schema"; -static constexpr auto KEYSPACES = "keyspaces"; -static constexpr auto TABLES = "tables"; -static constexpr auto SCYLLA_TABLES = "scylla_tables"; -static constexpr auto COLUMNS = "columns"; -static constexpr auto DROPPED_COLUMNS = "dropped_columns"; -static constexpr auto TRIGGERS = "triggers"; -static constexpr auto VIEWS = "views"; -static constexpr auto TYPES = "types"; -static constexpr auto FUNCTIONS = "functions"; -static constexpr auto AGGREGATES = "aggregates"; -static constexpr auto INDEXES = "indexes"; - -schema_ptr columns(); -schema_ptr dropped_columns(); -schema_ptr indexes(); -schema_ptr tables(); -schema_ptr scylla_tables(); -schema_ptr views(); - -} - -namespace legacy { - -class schema_mutations { - mutation _columnfamilies; - mutation _columns; -public: - schema_mutations(mutation columnfamilies, mutation columns) - : _columnfamilies(std::move(columnfamilies)) - , _columns(std::move(columns)) - { } - table_schema_version digest() const; -}; - -future read_table_mutations(distributed& proxy, - sstring keyspace_name, sstring table_name, schema_ptr s); - -} - -using namespace v3; - -// Change on non-backwards compatible changes of schema mutations. -// Replication of schema between nodes with different version is inhibited. -extern const sstring version; - -extern std::vector ALL; - -std::vector all_tables(); - -// saves/creates "ks" + all tables etc, while first deleting all old schema entries (will be rewritten) -future<> save_system_schema(const sstring & ks); - -// saves/creates "system_schema" keyspace -future<> save_system_keyspace_schema(); - -future calculate_schema_digest(distributed& proxy); - -future> convert_schema_to_mutations(distributed& proxy); - -future -read_schema_partition_for_keyspace(distributed& proxy, const sstring& schema_table_name, const sstring& keyspace_name); -future read_keyspace_mutation(distributed&, const sstring& keyspace_name); - -future<> merge_schema(distributed& proxy, std::vector mutations); - -future<> merge_schema(distributed& proxy, std::vector mutations, bool do_flush); - -future> merge_keyspaces(distributed& proxy, schema_result&& before, schema_result&& after); - -std::vector make_create_keyspace_mutations(lw_shared_ptr keyspace, api::timestamp_type timestamp, bool with_tables_and_types_and_functions = true); - -std::vector make_drop_keyspace_mutations(lw_shared_ptr keyspace, api::timestamp_type timestamp); - -lw_shared_ptr create_keyspace_from_schema_partition(const schema_result_value_type& partition); - -future> make_create_type_mutations(lw_shared_ptr keyspace, user_type type, api::timestamp_type timestamp); - -std::vector create_types_from_schema_partition(const schema_result_value_type& result); - -future> make_drop_type_mutations(lw_shared_ptr keyspace, user_type type, api::timestamp_type timestamp); - -void add_type_to_schema_mutation(user_type type, api::timestamp_type timestamp, std::vector& mutations); - -future> make_create_table_mutations(lw_shared_ptr keyspace, schema_ptr table, api::timestamp_type timestamp); - -future> make_update_table_mutations( - lw_shared_ptr keyspace, - schema_ptr old_table, - schema_ptr new_table, - api::timestamp_type timestamp, - bool from_thrift); - -future> create_tables_from_tables_partition(distributed& proxy, const schema_result::mapped_type& result); - -future> make_drop_table_mutations(lw_shared_ptr keyspace, schema_ptr table, api::timestamp_type timestamp); - -schema_ptr create_table_from_mutations(schema_mutations, std::experimental::optional version = {}); - -view_ptr create_view_from_mutations(schema_mutations, std::experimental::optional version = {}); - -future> create_views_from_schema_partition(distributed& proxy, const schema_result::mapped_type& result); - -schema_mutations make_schema_mutations(schema_ptr s, api::timestamp_type timestamp, bool with_columns); - -void add_table_or_view_to_schema_mutation(schema_ptr view, api::timestamp_type timestamp, bool with_columns, std::vector& mutations); - -future> make_create_view_mutations(lw_shared_ptr keyspace, view_ptr view, api::timestamp_type timestamp); - -future> make_update_view_mutations(lw_shared_ptr keyspace, view_ptr old_view, view_ptr new_view, api::timestamp_type timestamp, bool include_base); - -future> make_drop_view_mutations(lw_shared_ptr keyspace, view_ptr view, api::timestamp_type timestamp); - -sstring serialize_kind(column_kind kind); -column_kind deserialize_kind(sstring kind); -data_type parse_type(sstring str); - -sstring serialize_index_kind(index_metadata_kind kind); -index_metadata_kind deserialize_index_kind(sstring kind); - -mutation compact_for_schema_digest(const mutation& m); - -template -void feed_hash_for_schema_digest(Hasher& h, const mutation& m) { - feed_hash(h, compact_for_schema_digest(m)); -} - -} // namespace schema_tables -} // namespace db diff --git a/scylla/db/size_estimates_virtual_reader.hh b/scylla/db/size_estimates_virtual_reader.hh deleted file mode 100644 index aaed660..0000000 --- a/scylla/db/size_estimates_virtual_reader.hh +++ /dev/null @@ -1,285 +0,0 @@ -/* - * Copyright (C) 2016 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include -#include -#include -#include - -#include "clustering_bounds_comparator.hh" -#include "database.hh" -#include "db/system_keyspace.hh" -#include "dht/i_partitioner.hh" -#include "mutation_reader.hh" -#include "partition_range_compat.hh" -#include "range.hh" -#include "service/storage_service.hh" -#include "stdx.hh" -#include "streamed_mutation.hh" - -namespace db { - -namespace size_estimates { - -class size_estimates_mutation_reader final : public mutation_reader::impl { - struct token_range { - bytes start; - bytes end; - }; - schema_ptr _schema; - const dht::partition_range& _prange; - const query::partition_slice& _slice; - using ks_range = std::vector; - stdx::optional _keyspaces; - ks_range::const_iterator _current_partition; - streamed_mutation::forwarding _fwd; -public: - size_estimates_mutation_reader(schema_ptr schema, const dht::partition_range& prange, const query::partition_slice& slice, streamed_mutation::forwarding fwd) - : _schema(schema) - , _prange(prange) - , _slice(slice) - , _fwd(fwd) - { } - - virtual future operator()() override { - // For each specified range, estimate (crudely) mean partition size and partitions count. - auto& db = service::get_local_storage_proxy().get_db().local(); - if (!_keyspaces) { - _keyspaces = get_keyspaces(*_schema, db, _prange); - _current_partition = _keyspaces->begin(); - } - if (_current_partition == _keyspaces->end()) { - return make_ready_future(); - } - return get_local_ranges().then([&db, this] (auto&& ranges) { - auto estimates = this->estimates_for_current_keyspace(db, std::move(ranges)); - auto mutations = db::system_keyspace::make_size_estimates_mutation(*_current_partition, std::move(estimates)); - ++_current_partition; - return streamed_mutation_opt(streamed_mutation_from_mutation(std::move(mutations), _fwd)); - }); - } - /** - * Returns the primary ranges for the local node. - * Used for testing as well. - */ - static future> get_local_ranges() { - auto& ss = service::get_local_storage_service(); - return ss.get_local_tokens().then([&ss] (auto&& tokens) { - auto ranges = ss.get_token_metadata().get_primary_ranges_for(std::move(tokens)); - std::vector local_ranges; - auto to_bytes = [](const stdx::optional& b) { - assert(b); - return utf8_type->decompose(dht::global_partitioner().to_sstring(b->value())); - }; - // We merge the ranges to be compatible with how Cassandra shows it's size estimates table. - // All queries will be on that table, where all entries are text and there's no notion of - // token ranges form the CQL point of view. - auto left_inf = boost::find_if(ranges, [] (auto&& r) { - return !r.start() || r.start()->value() == dht::minimum_token(); - }); - auto right_inf = boost::find_if(ranges, [] (auto&& r) { - return !r.end() || r.start()->value() == dht::maximum_token(); - }); - if (left_inf != right_inf && left_inf != ranges.end() && right_inf != ranges.end()) { - local_ranges.push_back(token_range{to_bytes(right_inf->start()), to_bytes(left_inf->end())}); - ranges.erase(left_inf); - ranges.erase(right_inf); - } - for (auto&& r : ranges) { - local_ranges.push_back(token_range{to_bytes(r.start()), to_bytes(r.end())}); - } - boost::sort(local_ranges, [] (auto&& tr1, auto&& tr2) { - return utf8_type->less(tr1.start, tr2.start); - }); - return local_ranges; - }); - } -private: - struct virtual_row { - const bytes& cf_name; - const token_range& tokens; - clustering_key_prefix as_key() const { - return clustering_key_prefix::from_exploded(std::vector{cf_name, tokens.start, tokens.end}); - } - }; - struct virtual_row_comparator { - schema_ptr _schema; - virtual_row_comparator(schema_ptr schema) : _schema(schema) { } - bool operator()(const clustering_key_prefix& key1, const clustering_key_prefix& key2) { - return clustering_key_prefix::prefix_equality_less_compare(*_schema)(key1, key2); - } - bool operator()(const virtual_row& row, const clustering_key_prefix& key) { - return operator()(row.as_key(), key); - } - bool operator()(const clustering_key_prefix& key, const virtual_row& row) { - return operator()(key, row.as_key()); - } - }; - class virtual_row_iterator : public std::iterator { - std::reference_wrapper> _cf_names; - std::reference_wrapper> _ranges; - size_t _cf_names_idx = 0; - size_t _ranges_idx = 0; - public: - struct end_iterator_tag {}; - virtual_row_iterator(const std::vector& cf_names, const std::vector& ranges) - : _cf_names(ref(cf_names)) - , _ranges(ref(ranges)) - { } - virtual_row_iterator(const std::vector& cf_names, const std::vector& ranges, end_iterator_tag) - : _cf_names(ref(cf_names)) - , _ranges(ref(ranges)) - , _cf_names_idx(cf_names.size()) - , _ranges_idx(ranges.size()) - { } - virtual_row_iterator& operator++() { - if (++_ranges_idx == _ranges.get().size() && ++_cf_names_idx < _cf_names.get().size()) { - _ranges_idx = 0; - } - return *this; - } - virtual_row_iterator operator++(int) { - virtual_row_iterator i(*this); - ++(*this); - return i; - } - const value_type operator*() const { - return { _cf_names.get()[_cf_names_idx], _ranges.get()[_ranges_idx] }; - } - bool operator==(const virtual_row_iterator& i) const { - return _cf_names_idx == i._cf_names_idx - && _ranges_idx == i._ranges_idx; - } - bool operator!=(const virtual_row_iterator& i) const { - return !(*this == i); - } - }; - - std::vector - estimates_for_current_keyspace(const database& db, std::vector local_ranges) const { - auto pkey = partition_key::from_single_value(*_schema, utf8_type->decompose(*_current_partition)); - auto cfs = db.find_keyspace(*_current_partition).metadata()->cf_meta_data(); - auto cf_names = boost::copy_range>(cfs | boost::adaptors::transformed([] (auto&& cf) { - return utf8_type->decompose(cf.first); - })); - boost::sort(cf_names, [] (auto&& n1, auto&& n2) { - return utf8_type->less(n1, n2); - }); - std::vector estimates; - for (auto& range : _slice.row_ranges(*_schema, pkey)) { - auto rows = boost::make_iterator_range( - virtual_row_iterator(cf_names, local_ranges), - virtual_row_iterator(cf_names, local_ranges, virtual_row_iterator::end_iterator_tag())); - auto rows_to_estimate = range.slice(rows, virtual_row_comparator(_schema)); - for (auto&& r : rows_to_estimate) { - auto& cf = db.find_column_family(*_current_partition, utf8_type->to_string(r.cf_name)); - estimates.push_back(estimate(cf, r.tokens)); - if (estimates.size() >= _slice.partition_row_limit()) { - return estimates; - } - } - } - return estimates; - } - - /** - * Returns the keyspaces, ordered by name, as selected by the partition_range. - */ - static ks_range get_keyspaces(const schema& s, const database& db, dht::partition_range range) { - struct keyspace_less_comparator { - const schema& _s; - keyspace_less_comparator(const schema& s) : _s(s) { } - dht::ring_position as_ring_position(const sstring& ks) { - auto pkey = partition_key::from_single_value(_s, utf8_type->decompose(ks)); - return dht::global_partitioner().decorate_key(_s, std::move(pkey)); - } - bool operator()(const sstring& ks1, const sstring& ks2) { - return as_ring_position(ks1).less_compare(_s, as_ring_position(ks2)); - } - bool operator()(const sstring& ks, const dht::ring_position& rp) { - return as_ring_position(ks).less_compare(_s, rp); - } - bool operator()(const dht::ring_position& rp, const sstring& ks) { - return rp.less_compare(_s, as_ring_position(ks)); - } - }; - auto keyspaces = db.get_non_system_keyspaces(); - auto cmp = keyspace_less_comparator(s); - boost::sort(keyspaces, cmp); - return boost::copy_range(range.slice(keyspaces, std::move(cmp))); - } - - /** - * Makes a wrapping range of ring_position from a nonwrapping range of token, used to select sstables. - */ - static dht::partition_range as_ring_position_range(dht::token_range& r) { - stdx::optional::bound> start_bound, end_bound; - if (r.start()) { - start_bound = {{ dht::ring_position(r.start()->value(), dht::ring_position::token_bound::start), r.start()->is_inclusive() }}; - } - if (r.end()) { - end_bound = {{ dht::ring_position(r.end()->value(), dht::ring_position::token_bound::end), r.end()->is_inclusive() }}; - } - return dht::partition_range(std::move(start_bound), std::move(end_bound), r.is_singular()); - } - - /** - * Add a new range_estimates for the specified range, considering the sstables associated with `cf`. - */ - static system_keyspace::range_estimates estimate(const column_family& cf, const token_range& r) { - int64_t count{0}; - utils::estimated_histogram hist{0}; - auto from_bytes = [] (auto& b) { - return dht::global_partitioner().from_sstring(utf8_type->to_string(b)); - }; - dht::token_range_vector ranges; - compat::unwrap_into( - wrapping_range({{ from_bytes(r.start) }}, {{ from_bytes(r.end) }}), - dht::token_comparator(), - [&] (auto&& rng) { ranges.push_back(std::move(rng)); }); - for (auto&& r : ranges) { - auto rp_range = as_ring_position_range(r); - for (auto&& sstable : cf.select_sstables(rp_range)) { - count += sstable->estimated_keys_for_range(r); - hist.merge(sstable->get_stats_metadata().estimated_row_size); - } - } - return {cf.schema(), r.start, r.end, count, count > 0 ? hist.mean() : 0}; - } -}; - -struct virtual_reader { - mutation_reader operator()(schema_ptr schema, - const dht::partition_range& range, - const query::partition_slice& slice, - const io_priority_class& pc, - tracing::trace_state_ptr trace_state, - streamed_mutation::forwarding fwd, - mutation_reader::forwarding fwd_mr) { - return make_mutation_reader(schema, range, slice, fwd); - } -}; - -} // namespace size_estimates - -} // namespace db diff --git a/scylla/db/system_keyspace.cc b/scylla/db/system_keyspace.cc deleted file mode 100644 index 9f2957f..0000000 --- a/scylla/db/system_keyspace.cc +++ /dev/null @@ -1,1778 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include -#include -#include -#include - -#include "system_keyspace.hh" -#include "types.hh" -#include "service/storage_service.hh" -#include "service/storage_proxy.hh" -#include "service/client_state.hh" -#include "service/query_state.hh" -#include "cql3/query_options.hh" -#include "cql3/query_processor.hh" -#include "utils/fb_utilities.hh" -#include "utils/hash.hh" -#include "dht/i_partitioner.hh" -#include "version.hh" -#include "thrift/server.hh" -#include "exceptions/exceptions.hh" -#include "cql3/query_processor.hh" -#include "query_context.hh" -#include "partition_slice_builder.hh" -#include "db/config.hh" -#include "schema_builder.hh" -#include "md5_hasher.hh" -#include "release.hh" -#include "log.hh" -#include "serializer.hh" -#include -#include "service/storage_proxy.hh" -#include "message/messaging_service.hh" -#include "mutation_query.hh" -#include "db/size_estimates_virtual_reader.hh" - -using days = std::chrono::duration>; - -namespace db { - -std::unique_ptr qctx = {}; - -namespace system_keyspace { - -static logging::logger slogger("system_keyspace"); -static const api::timestamp_type creation_timestamp = api::new_timestamp(); - -api::timestamp_type schema_creation_timestamp() { - return creation_timestamp; -} - -// Increase whenever changing schema of any system table. -// FIXME: Make automatic by calculating from schema structure. -static const uint16_t version_sequence_number = 1; - -table_schema_version generate_schema_version(utils::UUID table_id) { - md5_hasher h; - feed_hash(h, table_id); - feed_hash(h, version_sequence_number); - return utils::UUID_gen::get_name_UUID(h.finalize()); -} - -// Currently, the type variables (uuid_type, etc.) are thread-local reference- -// counted shared pointers. This forces us to also make the built in schemas -// below thread-local as well. -// We return schema_ptr, not schema&, because that's the "tradition" in our -// other code. -// We hide the thread_local variable inside a function, because if we later -// we remove the thread_local, we'll start having initialization order -// problems (we need the type variables to be constructed first), and using -// functions will solve this problem. So we use functions right now. - - -schema_ptr hints() { - static thread_local auto hints = [] { - schema_builder builder(make_lw_shared(schema(generate_legacy_id(NAME, HINTS), NAME, HINTS, - // partition key - {{"target_id", uuid_type}}, - // clustering key - {{"hint_id", timeuuid_type}, {"message_version", int32_type}}, - // regular columns - {{"mutation", bytes_type}}, - // static columns - {}, - // regular column name type - utf8_type, - // comment - "hints awaiting delivery" - ))); - builder.set_gc_grace_seconds(0); - builder.set_compaction_strategy_options({{ "enabled", "false" }}); - builder.with_version(generate_schema_version(builder.uuid())); - return builder.build(schema_builder::compact_storage::yes); - }(); - return hints; -} - -schema_ptr batchlog() { - static thread_local auto batchlog = [] { - schema_builder builder(make_lw_shared(schema(generate_legacy_id(NAME, BATCHLOG), NAME, BATCHLOG, - // partition key - {{"id", uuid_type}}, - // clustering key - {}, - // regular columns - {{"data", bytes_type}, {"version", int32_type}, {"written_at", timestamp_type}}, - // static columns - {}, - // regular column name type - utf8_type, - // comment - "batches awaiting replay" - // FIXME: the original Java code also had: - // operations on resulting CFMetaData: - // .compactionStrategyOptions(Collections.singletonMap("min_threshold", "2")) - ))); - builder.set_gc_grace_seconds(0); - builder.with_version(generate_schema_version(builder.uuid())); - return builder.build(schema_builder::compact_storage::no); - }(); - return batchlog; -} - -/*static*/ schema_ptr paxos() { - static thread_local auto paxos = [] { - schema_builder builder(make_lw_shared(schema(generate_legacy_id(NAME, PAXOS), NAME, PAXOS, - // partition key - {{"row_key", bytes_type}}, - // clustering key - {{"cf_id", uuid_type}}, - // regular columns - {{"in_progress_ballot", timeuuid_type}, {"most_recent_commit", bytes_type}, {"most_recent_commit_at", timeuuid_type}, {"proposal", bytes_type}, {"proposal_ballot", timeuuid_type}}, - // static columns - {}, - // regular column name type - utf8_type, - // comment - "in-progress paxos proposals" - // FIXME: the original Java code also had: - // operations on resulting CFMetaData: - // .compactionStrategyClass(LeveledCompactionStrategy.class); - ))); - builder.with_version(generate_schema_version(builder.uuid())); - return builder.build(schema_builder::compact_storage::no); - }(); - return paxos; -} - -schema_ptr built_indexes() { - static thread_local auto built_indexes = [] { - schema_builder builder(make_lw_shared(schema(generate_legacy_id(NAME, BUILT_INDEXES), NAME, BUILT_INDEXES, - // partition key - {{"table_name", utf8_type}}, - // clustering key - {{"index_name", utf8_type}}, - // regular columns - {}, - // static columns - {}, - // regular column name type - utf8_type, - // comment - "built column indexes" - ))); - builder.with_version(generate_schema_version(builder.uuid())); - return builder.build(schema_builder::compact_storage::yes); - }(); - return built_indexes; -} - -/*static*/ schema_ptr local() { - static thread_local auto local = [] { - schema_builder builder(make_lw_shared(schema(generate_legacy_id(NAME, LOCAL), NAME, LOCAL, - // partition key - {{"key", utf8_type}}, - // clustering key - {}, - // regular columns - { - {"bootstrapped", utf8_type}, - {"cluster_name", utf8_type}, - {"cql_version", utf8_type}, - {"data_center", utf8_type}, - {"gossip_generation", int32_type}, - {"host_id", uuid_type}, - {"native_protocol_version", utf8_type}, - {"partitioner", utf8_type}, - {"rack", utf8_type}, - {"release_version", utf8_type}, - {"schema_version", uuid_type}, - {"thrift_version", utf8_type}, - {"tokens", set_type_impl::get_instance(utf8_type, true)}, - {"truncated_at", map_type_impl::get_instance(uuid_type, bytes_type, true)}, - // The following 3 columns are only present up until 2.1.8 tables - {"rpc_address", inet_addr_type}, - {"broadcast_address", inet_addr_type}, - {"listen_address", inet_addr_type}, - {"supported_features", utf8_type}, - - }, - // static columns - {}, - // regular column name type - utf8_type, - // comment - "information about the local node" - ))); - builder.with_version(generate_schema_version(builder.uuid())); - return builder.build(schema_builder::compact_storage::no); - }(); - return local; -} - -/*static*/ schema_ptr peers() { - static thread_local auto peers = [] { - schema_builder builder(make_lw_shared(schema(generate_legacy_id(NAME, PEERS), NAME, PEERS, - // partition key - {{"peer", inet_addr_type}}, - // clustering key - {}, - // regular columns - { - {"data_center", utf8_type}, - {"host_id", uuid_type}, - {"preferred_ip", inet_addr_type}, - {"rack", utf8_type}, - {"release_version", utf8_type}, - {"rpc_address", inet_addr_type}, - {"schema_version", uuid_type}, - {"tokens", set_type_impl::get_instance(utf8_type, true)}, - {"supported_features", utf8_type}, - }, - // static columns - {}, - // regular column name type - utf8_type, - // comment - "information about known peers in the cluster" - ))); - builder.with_version(generate_schema_version(builder.uuid())); - return builder.build(schema_builder::compact_storage::no); - }(); - return peers; -} - -/*static*/ schema_ptr peer_events() { - static thread_local auto peer_events = [] { - schema_builder builder(make_lw_shared(schema(generate_legacy_id(NAME, PEER_EVENTS), NAME, PEER_EVENTS, - // partition key - {{"peer", inet_addr_type}}, - // clustering key - {}, - // regular columns - { - {"hints_dropped", map_type_impl::get_instance(uuid_type, int32_type, true)}, - }, - // static columns - {}, - // regular column name type - utf8_type, - // comment - "events related to peers" - ))); - builder.with_version(generate_schema_version(builder.uuid())); - return builder.build(schema_builder::compact_storage::no); - }(); - return peer_events; -} - -/*static*/ schema_ptr range_xfers() { - static thread_local auto range_xfers = [] { - schema_builder builder(make_lw_shared(schema(generate_legacy_id(NAME, RANGE_XFERS), NAME, RANGE_XFERS, - // partition key - {{"token_bytes", bytes_type}}, - // clustering key - {}, - // regular columns - {{"requested_at", timestamp_type}}, - // static columns - {}, - // regular column name type - utf8_type, - // comment - "ranges requested for transfer" - ))); - builder.with_version(generate_schema_version(builder.uuid())); - return builder.build(schema_builder::compact_storage::no); - }(); - return range_xfers; -} - -/*static*/ schema_ptr compactions_in_progress() { - static thread_local auto compactions_in_progress = [] { - schema_builder builder(make_lw_shared(schema(generate_legacy_id(NAME, COMPACTIONS_IN_PROGRESS), NAME, COMPACTIONS_IN_PROGRESS, - // partition key - {{"id", uuid_type}}, - // clustering key - {}, - // regular columns - { - {"columnfamily_name", utf8_type}, - {"inputs", set_type_impl::get_instance(int32_type, true)}, - {"keyspace_name", utf8_type}, - }, - // static columns - {}, - // regular column name type - utf8_type, - // comment - "unfinished compactions" - ))); - builder.with_version(generate_schema_version(builder.uuid())); - return builder.build(schema_builder::compact_storage::no); - }(); - return compactions_in_progress; -} - -/*static*/ schema_ptr compaction_history() { - static thread_local auto compaction_history = [] { - schema_builder builder(make_lw_shared(schema(generate_legacy_id(NAME, COMPACTION_HISTORY), NAME, COMPACTION_HISTORY, - // partition key - {{"id", uuid_type}}, - // clustering key - {}, - // regular columns - { - {"bytes_in", long_type}, - {"bytes_out", long_type}, - {"columnfamily_name", utf8_type}, - {"compacted_at", timestamp_type}, - {"keyspace_name", utf8_type}, - {"rows_merged", map_type_impl::get_instance(int32_type, long_type, true)}, - }, - // static columns - {}, - // regular column name type - utf8_type, - // comment - "week-long compaction history" - ))); - builder.set_default_time_to_live(std::chrono::duration_cast(days(7))); - builder.with_version(generate_schema_version(builder.uuid())); - return builder.build(schema_builder::compact_storage::no); - }(); - return compaction_history; -} - -/*static*/ schema_ptr sstable_activity() { - static thread_local auto sstable_activity = [] { - schema_builder builder(make_lw_shared(schema(generate_legacy_id(NAME, SSTABLE_ACTIVITY), NAME, SSTABLE_ACTIVITY, - // partition key - { - {"keyspace_name", utf8_type}, - {"columnfamily_name", utf8_type}, - {"generation", int32_type}, - }, - // clustering key - {}, - // regular columns - { - {"rate_120m", double_type}, - {"rate_15m", double_type}, - }, - // static columns - {}, - // regular column name type - utf8_type, - // comment - "historic sstable read rates" - ))); - builder.with_version(generate_schema_version(builder.uuid())); - return builder.build(schema_builder::compact_storage::no); - }(); - return sstable_activity; -} - -schema_ptr size_estimates() { - static thread_local auto size_estimates = [] { - schema_builder builder(make_lw_shared(schema(generate_legacy_id(NAME, SIZE_ESTIMATES), NAME, SIZE_ESTIMATES, - // partition key - {{"keyspace_name", utf8_type}}, - // clustering key - {{"table_name", utf8_type}, {"range_start", utf8_type}, {"range_end", utf8_type}}, - // regular columns - { - {"mean_partition_size", long_type}, - {"partitions_count", long_type}, - }, - // static columns - {}, - // regular column name type - utf8_type, - // comment - "per-table primary range size estimates" - ))); - builder.set_gc_grace_seconds(0); - builder.with_version(generate_schema_version(builder.uuid())); - return builder.build(schema_builder::compact_storage::no); - }(); - return size_estimates; -} - -namespace v3 { - -schema_ptr batches() { - static thread_local auto schema = [] { - schema_builder builder(make_lw_shared(::schema(generate_legacy_id(NAME, BATCHES), NAME, BATCHES, - // partition key - {{"id", timeuuid_type}}, - // clustering key - {}, - // regular columns - {{"mutations", list_type_impl::get_instance(bytes_type, true)}, {"version", int32_type}}, - // static columns - {}, - // regular column name type - utf8_type, - // comment - "batches awaiting replay" - ))); - builder.set_gc_grace_seconds(0); - // FIXME: the original Java code also had: - //.copy(new LocalPartitioner(TimeUUIDType.instance)) - builder.set_gc_grace_seconds(0); - builder.set_compaction_strategy(sstables::compaction_strategy_type::size_tiered); - builder.set_compaction_strategy_options({{"min_threshold", "2"}}); - builder.with_version(generate_schema_version(builder.uuid())); - return builder.build(schema_builder::compact_storage::no); - }(); - return schema; -} - -schema_ptr paxos() { - static thread_local auto schema = [] { - schema_builder builder(make_lw_shared(::schema(generate_legacy_id(NAME, PAXOS), NAME, PAXOS, - // partition key - {{"row_key", bytes_type}}, - // clustering key - {{"cf_id", uuid_type}}, - // regular columns - {{"in_progress_ballot", timeuuid_type}, - {"most_recent_commit", bytes_type}, - {"most_recent_commit_at", timeuuid_type}, - {"most_recent_commit_version", int32_type}, - {"proposal", timeuuid_type}, - {"proposal_version", int32_type} - }, - // static columns - {}, - // regular column name type - utf8_type, - // comment - "in-progress paxos proposals" - ))); - builder.set_compaction_strategy(sstables::compaction_strategy_type::leveled); - builder.with_version(generate_schema_version(builder.uuid())); - return builder.build(); - }(); - return schema; -} - -schema_ptr built_indexes() { - // identical to ours, but ours otoh is a mix-in of the 3.x series cassandra one - return db::system_keyspace::built_indexes(); -} - -schema_ptr local() { - static thread_local auto schema = [] { - schema_builder builder(make_lw_shared(::schema(generate_legacy_id(NAME, LOCAL), NAME, LOCAL, - // partition key - {{"key", utf8_type}}, - // clustering key - {}, - // regular columns - { - {"bootstrapped", utf8_type}, - {"broadcast_address", inet_addr_type}, - {"cluster_name", utf8_type}, - {"cql_version", utf8_type}, - {"data_center", utf8_type}, - {"gossip_generation", int32_type}, - {"host_id", uuid_type}, - {"listen_address", inet_addr_type}, - {"native_protocol_version", utf8_type}, - {"partitioner", utf8_type}, - {"rack", utf8_type}, - {"release_version", utf8_type}, - {"rpc_address", inet_addr_type}, - {"schema_version", uuid_type}, - {"thrift_version", utf8_type}, - {"tokens", set_type_impl::get_instance(utf8_type, true)}, - {"truncated_at", map_type_impl::get_instance(uuid_type, bytes_type, true)}, - }, - // static columns - {}, - // regular column name type - utf8_type, - // comment - "information about the local node" - ))); - builder.with_version(generate_schema_version(builder.uuid())); - return builder.build(schema_builder::compact_storage::no); - }(); - return schema; -} - -schema_ptr peers() { - // identical - return db::system_keyspace::peers(); -} - -schema_ptr peer_events() { - // identical - return db::system_keyspace::peer_events(); -} - -schema_ptr range_xfers() { - // identical - return db::system_keyspace::range_xfers(); -} - -schema_ptr compaction_history() { - // identical - return db::system_keyspace::compaction_history(); -} - -schema_ptr sstable_activity() { - // identical - return db::system_keyspace::sstable_activity(); -} - -schema_ptr size_estimates() { - // identical - return db::system_keyspace::size_estimates(); -} - -schema_ptr available_ranges() { - static thread_local auto schema = [] { - schema_builder builder(make_lw_shared(::schema(generate_legacy_id(NAME, AVAILABLE_RANGES), NAME, AVAILABLE_RANGES, - // partition key - {{"keyspace_name", utf8_type}}, - // clustering key - {}, - // regular columns - {{"ranges", set_type_impl::get_instance(bytes_type, true)}}, - // static columns - {}, - // regular column name type - utf8_type, - // comment - "available keyspace/ranges during bootstrap/replace that are ready to be served" - ))); - builder.with_version(generate_schema_version(builder.uuid())); - return builder.build(); - }(); - return schema; -} - -schema_ptr views_builds_in_progress() { - static thread_local auto schema = [] { - schema_builder builder(make_lw_shared(::schema(generate_legacy_id(NAME, VIEWS_BUILDS_IN_PROGRESS), NAME, VIEWS_BUILDS_IN_PROGRESS, - // partition key - {{"keyspace_name", utf8_type}}, - // clustering key - {{"view_name", utf8_type}}, - // regular columns - {{"last_token", utf8_type}, {"generation_number", int32_type}}, - // static columns - {}, - // regular column name type - utf8_type, - // comment - "views builds current progress" - ))); - builder.with_version(generate_schema_version(builder.uuid())); - return builder.build(); - }(); - return schema; -} - -schema_ptr built_views() { - static thread_local auto schema = [] { - schema_builder builder(make_lw_shared(::schema(generate_legacy_id(NAME, BUILT_VIEWS), NAME, BUILT_VIEWS, - // partition key - {{"keyspace_name", utf8_type}}, - // clustering key - {{"view_name", utf8_type}}, - // regular columns - {}, - // static columns - {}, - // regular column name type - utf8_type, - // comment - "built views" - ))); - builder.with_version(generate_schema_version(builder.uuid())); - return builder.build(); - }(); - return schema; -} - -} // - -namespace legacy { - -schema_ptr hints() { - static thread_local auto schema = [] { - schema_builder builder(make_lw_shared(::schema(generate_legacy_id(NAME, HINTS), NAME, HINTS, - // partition key - {{"target_id", uuid_type}}, - // clustering key - {{"hint_id", timeuuid_type}, {"message_version", int32_type}}, - // regular columns - {{"mutation", bytes_type}}, - // static columns - {}, - // regular column name type - utf8_type, - // comment - "*DEPRECATED* hints awaiting delivery" - ))); - builder.set_gc_grace_seconds(0); - builder.set_compaction_strategy(sstables::compaction_strategy_type::size_tiered); - builder.set_compaction_strategy_options({{"enabled", "false"}}); - builder.with_version(generate_schema_version(builder.uuid())); - builder.with(schema_builder::compact_storage::yes); - return builder.build(); - }(); - return schema; -} - -schema_ptr batchlog() { - static thread_local auto schema = [] { - schema_builder builder(make_lw_shared(::schema(generate_legacy_id(NAME, BATCHLOG), NAME, BATCHLOG, - // partition key - {{"id", uuid_type}}, - // clustering key - {}, - // regular columns - {{"data", bytes_type}, {"version", int32_type}, {"written_at", timestamp_type}}, - // static columns - {}, - // regular column name type - utf8_type, - // comment - "*DEPRECATED* batchlog entries" - ))); - builder.set_gc_grace_seconds(0); - builder.set_compaction_strategy(sstables::compaction_strategy_type::size_tiered); - builder.set_compaction_strategy_options({{"min_threshold", "2"}}); - builder.with(schema_builder::compact_storage::no); - builder.with_version(generate_schema_version(builder.uuid())); - return builder.build(); - }(); - return schema; -} - -static constexpr auto schema_gc_grace = std::chrono::duration_cast(days(7)).count(); - -schema_ptr keyspaces() { - static thread_local auto schema = [] { - schema_builder builder(make_lw_shared(::schema(generate_legacy_id(NAME, KEYSPACES), NAME, KEYSPACES, - // partition key - {{"keyspace_name", utf8_type}}, - // clustering key - {}, - // regular columns - { - {"durable_writes", boolean_type}, - {"strategy_class", utf8_type}, - {"strategy_options", utf8_type} - }, - // static columns - {}, - // regular column name type - utf8_type, - // comment - "*DEPRECATED* keyspace definitions" - ))); - builder.set_gc_grace_seconds(schema_gc_grace); - builder.with(schema_builder::compact_storage::yes); - builder.with_version(generate_schema_version(builder.uuid())); - return builder.build(); - }(); - return schema; -} - -schema_ptr column_families() { - static thread_local auto schema = [] { - schema_builder builder(make_lw_shared(::schema(generate_legacy_id(NAME, COLUMNFAMILIES), NAME, COLUMNFAMILIES, - // partition key - {{"keyspace_name", utf8_type}}, - // clustering key - {{"columnfamily_name", utf8_type}}, - // regular columns - { - {"bloom_filter_fp_chance", double_type}, - {"caching", utf8_type}, - {"cf_id", uuid_type}, - {"comment", utf8_type}, - {"compaction_strategy_class", utf8_type}, - {"compaction_strategy_options", utf8_type}, - {"comparator", utf8_type}, - {"compression_parameters", utf8_type}, - {"default_time_to_live", int32_type}, - {"default_validator", utf8_type}, - {"dropped_columns", map_type_impl::get_instance(utf8_type, long_type, true)}, - {"gc_grace_seconds", int32_type}, - {"is_dense", boolean_type}, - {"key_validator", utf8_type}, - {"local_read_repair_chance", double_type}, - {"max_compaction_threshold", int32_type}, - {"max_index_interval", int32_type}, - {"memtable_flush_period_in_ms", int32_type}, - {"min_compaction_threshold", int32_type}, - {"min_index_interval", int32_type}, - {"read_repair_chance", double_type}, - {"speculative_retry", utf8_type}, - {"subcomparator", utf8_type}, - {"type", utf8_type}, - // The following 4 columns are only present up until 2.1.8 tables - {"key_aliases", utf8_type}, - {"value_alias", utf8_type}, - {"column_aliases", utf8_type}, - {"index_interval", int32_type},}, - // static columns - {}, - // regular column name type - utf8_type, - // comment - "*DEPRECATED* table definitions" - ))); - builder.set_gc_grace_seconds(schema_gc_grace); - builder.with(schema_builder::compact_storage::no); - builder.with_version(generate_schema_version(builder.uuid())); - return builder.build(); - }(); - return schema; -} - -schema_ptr columns() { - static thread_local auto schema = [] { - schema_builder builder(make_lw_shared(::schema(generate_legacy_id(NAME, COLUMNS), NAME, COLUMNS, - // partition key - {{"keyspace_name", utf8_type}}, - // clustering key - {{"columnfamily_name", utf8_type}, {"column_name", utf8_type}}, - // regular columns - { - {"component_index", int32_type}, - {"index_name", utf8_type}, - {"index_options", utf8_type}, - {"index_type", utf8_type}, - {"type", utf8_type}, - {"validator", utf8_type}, - }, - // static columns - {}, - // regular column name type - utf8_type, - // comment - "column definitions" - ))); - builder.set_gc_grace_seconds(schema_gc_grace); - builder.with(schema_builder::compact_storage::no); - builder.with_version(generate_schema_version(builder.uuid())); - return builder.build(); - }(); - return schema; -} - -schema_ptr triggers() { - static thread_local auto schema = [] { - schema_builder builder(make_lw_shared(::schema(generate_legacy_id(NAME, TRIGGERS), NAME, TRIGGERS, - // partition key - {{"keyspace_name", utf8_type}}, - // clustering key - {{"columnfamily_name", utf8_type}, {"trigger_name", utf8_type}}, - // regular columns - { - {"trigger_options", map_type_impl::get_instance(utf8_type, utf8_type, true)}, - }, - // static columns - {}, - // regular column name type - utf8_type, - // comment - "trigger definitions" - ))); - builder.set_gc_grace_seconds(schema_gc_grace); - builder.with(schema_builder::compact_storage::no); - builder.with_version(generate_schema_version(builder.uuid())); - return builder.build(); - }(); - return schema; -} - -schema_ptr usertypes() { - static thread_local auto schema = [] { - schema_builder builder(make_lw_shared(::schema(generate_legacy_id(NAME, USERTYPES), NAME, USERTYPES, - // partition key - {{"keyspace_name", utf8_type}}, - // clustering key - {{"type_name", utf8_type}}, - // regular columns - { - {"field_names", list_type_impl::get_instance(utf8_type, true)}, - {"field_types", list_type_impl::get_instance(utf8_type, true)}, - }, - // static columns - {}, - // regular column name type - utf8_type, - // comment - "user defined type definitions" - ))); - builder.set_gc_grace_seconds(schema_gc_grace); - builder.with(schema_builder::compact_storage::no); - builder.with_version(generate_schema_version(builder.uuid())); - return builder.build(); - }(); - return schema; -} - -schema_ptr functions() { - /** - * Note: we have our own "legacy" version of this table (in schema_tables), - * but it is (afaik) not used, and differs slightly from the origin one. - * This is based on the origin schema, since we're more likely to encounter - * installations of that to migrate, rather than our own (if we dont use the table). - */ - static thread_local auto schema = [] { - schema_builder builder(make_lw_shared(::schema(generate_legacy_id(NAME, FUNCTIONS), NAME, FUNCTIONS, - // partition key - {{"keyspace_name", utf8_type}}, - // clustering key - {{"function_name", utf8_type},{"signature", list_type_impl::get_instance(utf8_type, false)}}, - // regular columns - { - {"argument_names", list_type_impl::get_instance(utf8_type, true)}, - {"argument_types", list_type_impl::get_instance(utf8_type, true)}, - {"body", utf8_type}, - {"language", utf8_type}, - {"return_type", utf8_type}, - {"called_on_null_input", boolean_type}, - }, - // static columns - {}, - // regular column name type - utf8_type, - // comment - "*DEPRECATED* user defined type definitions" - ))); - builder.set_gc_grace_seconds(schema_gc_grace); - builder.with(schema_builder::compact_storage::no); - builder.with_version(generate_schema_version(builder.uuid())); - return builder.build(); - }(); - return schema; -} - -schema_ptr aggregates() { - static thread_local auto schema = [] { - schema_builder builder(make_lw_shared(::schema(generate_legacy_id(NAME, AGGREGATES), NAME, AGGREGATES, - // partition key - {{"keyspace_name", utf8_type}}, - // clustering key - {{"aggregate_name", utf8_type},{"signature", list_type_impl::get_instance(utf8_type, false)}}, - // regular columns - { - {"argument_types", list_type_impl::get_instance(utf8_type, true)}, - {"final_func", utf8_type}, - {"initcond", bytes_type}, - {"return_type", utf8_type}, - {"state_func", utf8_type}, - {"state_type", utf8_type}, - }, - // static columns - {}, - // regular column name type - utf8_type, - // comment - "*DEPRECATED* user defined aggregate definition" - ))); - builder.set_gc_grace_seconds(schema_gc_grace); - builder.with(schema_builder::compact_storage::no); - builder.with_version(generate_schema_version(builder.uuid())); - return builder.build(); - }(); - return schema; -} - -} // - -static future<> setup_version() { - return gms::inet_address::lookup(qctx->db().get_config().rpc_address()).then([](gms::inet_address a) { - sstring req = sprint("INSERT INTO system.%s (key, release_version, cql_version, thrift_version, native_protocol_version, data_center, rack, partitioner, rpc_address, broadcast_address, listen_address, supported_features) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)" - , db::system_keyspace::LOCAL); - auto& snitch = locator::i_endpoint_snitch::get_local_snitch_ptr(); - - return execute_cql(req, sstring(db::system_keyspace::LOCAL), - version::release(), - cql3::query_processor::CQL_VERSION, - ::cassandra::thrift_version, - to_sstring(cql_serialization_format::latest_version), - snitch->get_datacenter(utils::fb_utilities::get_broadcast_address()), - snitch->get_rack(utils::fb_utilities::get_broadcast_address()), - sstring(dht::global_partitioner().name()), - a.addr(), - utils::fb_utilities::get_broadcast_address().addr(), - netw::get_local_messaging_service().listen_address().addr(), - service::storage_service::get_config_supported_features() - ).discard_result(); - }); -} - -future<> check_health(); -future<> force_blocking_flush(sstring cfname); - -// Changing the real load_dc_rack_info into a future would trigger a tidal wave of futurization that would spread -// even into simple string operations like get_rack() / get_dc(). We will cache those at startup, and then change -// our view of it every time we do updates on those values. -// -// The cache must be distributed, because the values themselves may not update atomically, so a shard reading that -// is different than the one that wrote, may see a corrupted value. invoke_on_all will be used to guarantee that all -// updates are propagated correctly. -struct local_cache { - std::unordered_map _cached_dc_rack_info; - bootstrap_state _state; - future<> stop() { - return make_ready_future<>(); - } -}; -static distributed _local_cache; - -static future<> build_dc_rack_info() { - return execute_cql(sprint("SELECT peer, data_center, rack from system.%s", PEERS)).then([] (::shared_ptr msg) { - return do_for_each(*msg, [] (auto& row) { - // Not ideal to assume ipv4 here, but currently this is what the cql types wraps. - net::ipv4_address peer = row.template get_as("peer"); - if (!row.has("data_center") || !row.has("rack")) { - return make_ready_future<>(); - } - gms::inet_address gms_addr(std::move(peer)); - sstring dc = row.template get_as("data_center"); - sstring rack = row.template get_as("rack"); - - locator::endpoint_dc_rack element = { dc, rack }; - return _local_cache.invoke_on_all([gms_addr = std::move(gms_addr), element = std::move(element)] (local_cache& lc) { - lc._cached_dc_rack_info.emplace(gms_addr, element); - }); - }).then([msg] { - // Keep msg alive. - }); - }); -} - -static future<> build_bootstrap_info() { - sstring req = sprint("SELECT bootstrapped FROM system.%s WHERE key = ? ", LOCAL); - return execute_cql(req, sstring(LOCAL)).then([] (auto msg) { - static auto state_map = std::unordered_map({ - { "NEEDS_BOOTSTRAP", bootstrap_state::NEEDS_BOOTSTRAP }, - { "COMPLETED", bootstrap_state::COMPLETED }, - { "IN_PROGRESS", bootstrap_state::IN_PROGRESS }, - { "DECOMMISSIONED", bootstrap_state::DECOMMISSIONED } - }); - bootstrap_state state = bootstrap_state::NEEDS_BOOTSTRAP; - - if (!msg->empty() && msg->one().has("bootstrapped")) { - state = state_map.at(msg->one().template get_as("bootstrapped")); - } - return _local_cache.invoke_on_all([state] (local_cache& lc) { - lc._state = state; - }); - }); -} - -future<> init_local_cache() { - return _local_cache.start().then([] { - engine().at_exit([] { - return _local_cache.stop(); - }); - }); -} - -future<> deinit_local_cache() { - return _local_cache.stop(); -} - -void minimal_setup(distributed& db, distributed& qp) { - qctx = std::make_unique(db, qp); -} - -future<> setup(distributed& db, distributed& qp) { - minimal_setup(db, qp); - return setup_version().then([&db] { - return update_schema_version(db.local().get_version()); - }).then([] { - return init_local_cache(); - }).then([] { - return build_dc_rack_info(); - }).then([] { - return build_bootstrap_info(); - }).then([] { - return check_health(); - }).then([] { - return db::schema_tables::save_system_keyspace_schema(); - }).then([] { - // #2514 - make sure "system" is written to system_schema.keyspaces. - return db::schema_tables::save_system_schema(NAME); - }).then([] { - return netw::get_messaging_service().invoke_on_all([] (auto& ms){ - return ms.init_local_preferred_ip_cache(); - }); - }); -} - -struct truncation_record { - static constexpr uint32_t current_magic = 0x53435452; // 'S' 'C' 'T' 'R' - - uint32_t magic; - std::vector positions; - db_clock::time_point time_stamp; -}; -} -} - -#include "idl/replay_position.dist.hh" -#include "idl/truncation_record.dist.hh" -#include "serializer_impl.hh" -#include "idl/replay_position.dist.impl.hh" -#include "idl/truncation_record.dist.impl.hh" - -namespace db { -namespace system_keyspace { - -typedef utils::UUID truncation_key; -typedef std::unordered_map truncation_map; - -static constexpr uint8_t current_version = 1; -static thread_local std::experimental::optional truncation_records; - -future<> save_truncation_records(const column_family& cf, db_clock::time_point truncated_at, replay_positions positions) { - truncation_record r; - - r.magic = truncation_record::current_magic; - r.time_stamp = truncated_at; - r.positions = std::move(positions); - - auto buf = ser::serialize_to_buffer(r, sizeof(current_version)); - - buf[0] = current_version; - - static_assert(sizeof(current_version) == 1, "using this as mark"); - assert(buf.size() & 1); // verify we've created an odd-numbered buffer - - map_type_impl::native_type tmp; - tmp.emplace_back(cf.schema()->id(), data_value(buf)); - auto map_type = map_type_impl::get_instance(uuid_type, bytes_type, true); - - sstring req = sprint("UPDATE system.%s SET truncated_at = truncated_at + ? WHERE key = '%s'", LOCAL, LOCAL); - return qctx->qp().execute_internal(req, {make_map_value(map_type, tmp)}).then([](auto rs) { - truncation_records = {}; - return force_blocking_flush(LOCAL); - }); -} - -/** - * This method is used to remove information about truncation time for specified column family - */ -future<> remove_truncation_record(utils::UUID id) { - sstring req = sprint("DELETE truncated_at[?] from system.%s WHERE key = '%s'", LOCAL, LOCAL); - return qctx->qp().execute_internal(req, {id}).then([](auto rs) { - truncation_records = {}; - return force_blocking_flush(LOCAL); - }); -} - -static future get_truncation_record(utils::UUID cf_id) { - if (!truncation_records) { - sstring req = sprint("SELECT truncated_at FROM system.%s WHERE key = '%s'", LOCAL, LOCAL); - return qctx->qp().execute_internal(req).then([cf_id](::shared_ptr rs) { - truncation_map tmp; - if (!rs->empty() && rs->one().has("truncated_at")) { - auto map = rs->one().get_map("truncated_at"); - for (auto& p : map) { - auto uuid = p.first; - auto buf = p.second; - - try { - truncation_record e; - - if (buf.size() & 1) { - // new record. - if (buf[0] != current_version) { - slogger.warn("Found truncation record of unknown version {}. Ignoring.", int(buf[0])); - continue; - } - e = ser::deserialize_from_buffer(buf, boost::type(), 1); - if (e.magic == truncation_record::current_magic) { - tmp[uuid] = e; - continue; - } - } else { - // old scylla records. (We hope) - // Read 64+64 bit RP:s, even though the - // struct (and official serial size) is 64+32. - data_input in(buf); - - slogger.debug("Reading old type record"); - while (in.avail() > sizeof(db_clock::rep)) { - auto id = in.read(); - auto pos = in.read(); - e.positions.emplace_back(id, position_type(pos)); - } - if (in.avail() == sizeof(db_clock::rep)) { - e.time_stamp = db_clock::time_point(db_clock::duration(in.read())); - tmp[uuid] = e; - continue; - } - } - } catch (std::out_of_range &) { - } - // Trying to load an origin table. - // This is useless to us, because the only usage for this - // data is commit log and batch replay, and we cannot replay - // either from origin anyway. - slogger.warn("Error reading truncation record for {}. " - "Most likely this is data from a cassandra instance." - "Make sure you have cleared commit and batch logs before upgrading.", - uuid - ); - } - } - truncation_records = std::move(tmp); - return get_truncation_record(cf_id); - }); - } - return make_ready_future((*truncation_records)[cf_id]); -} - -future<> save_truncation_record(const column_family& cf, db_clock::time_point truncated_at, db::replay_position rp) { - // TODO: this is horribly ineffective, we're doing a full flush of all system tables for all cores - // once, for each core (calling us). But right now, redesigning so that calling here (or, rather, - // save_truncation_records), is done from "somewhere higher, once per machine, not shard" is tricky. - // Mainly because drop_tables also uses truncate. And is run per-core as well. Gah. - return get_truncation_record(cf.schema()->id()).then([&cf, truncated_at, rp](truncation_record e) { - auto i = std::find_if(e.positions.begin(), e.positions.end(), [rp](replay_position& p) { - return p.shard_id() == rp.shard_id(); - }); - if (i == e.positions.end()) { - e.positions.emplace_back(rp); - } else { - *i = rp; - } - return save_truncation_records(cf, std::max(truncated_at, e.time_stamp), e.positions); - }); -} - -future get_truncated_position(utils::UUID cf_id, uint32_t shard) { - return get_truncated_position(std::move(cf_id)).then([shard](replay_positions positions) { - for (auto& rp : positions) { - if (shard == rp.shard_id()) { - return make_ready_future(rp); - } - } - return make_ready_future(); - }); -} - - future get_truncated_position(utils::UUID cf_id) { - return get_truncation_record(cf_id).then([](truncation_record e) { - return make_ready_future(e.positions); - }); -} - -future get_truncated_at(utils::UUID cf_id) { - return get_truncation_record(cf_id).then([](truncation_record e) { - return make_ready_future(e.time_stamp); - }); -} - -set_type_impl::native_type prepare_tokens(std::unordered_set& tokens) { - set_type_impl::native_type tset; - for (auto& t: tokens) { - tset.push_back(dht::global_partitioner().to_sstring(t)); - } - return tset; -} - -std::unordered_set decode_tokens(set_type_impl::native_type& tokens) { - std::unordered_set tset; - for (auto& t: tokens) { - auto str = value_cast(t); - assert(str == dht::global_partitioner().to_sstring(dht::global_partitioner().from_sstring(str))); - tset.insert(dht::global_partitioner().from_sstring(str)); - } - return tset; -} - -/** - * Record tokens being used by another node - */ -future<> update_tokens(gms::inet_address ep, std::unordered_set tokens) -{ - if (ep == utils::fb_utilities::get_broadcast_address()) { - return remove_endpoint(ep); - } - - sstring req = sprint("INSERT INTO system.%s (peer, tokens) VALUES (?, ?)", PEERS); - auto set_type = set_type_impl::get_instance(utf8_type, true); - return execute_cql(req, ep.addr(), make_set_value(set_type, prepare_tokens(tokens))).discard_result().then([] { - return force_blocking_flush(PEERS); - }); -} - -future> update_local_tokens( - const std::unordered_set add_tokens, - const std::unordered_set rm_tokens) { - return get_saved_tokens().then([add_tokens = std::move(add_tokens), rm_tokens = std::move(rm_tokens)] (auto tokens) { - for (auto& x : rm_tokens) { - tokens.erase(x); - } - for (auto& x : add_tokens) { - tokens.insert(x); - } - return update_tokens(tokens).then([tokens] { - return tokens; - }); - }); -} - -future>> load_tokens() { - sstring req = sprint("SELECT peer, tokens FROM system.%s", PEERS); - return execute_cql(req).then([] (::shared_ptr cql_result) { - std::unordered_map> ret; - for (auto& row : *cql_result) { - auto peer = gms::inet_address(row.get_as("peer")); - if (row.has("tokens")) { - auto blob = row.get_blob("tokens"); - auto cdef = peers()->get_column_definition("tokens"); - auto deserialized = cdef->type->deserialize(blob); - auto tokens = value_cast(deserialized); - ret.emplace(peer, decode_tokens(tokens)); - } - } - return ret; - }); -} - -future> load_host_ids() { - sstring req = sprint("SELECT peer, host_id FROM system.%s", PEERS); - return execute_cql(req).then([] (::shared_ptr cql_result) { - std::unordered_map ret; - for (auto& row : *cql_result) { - auto peer = gms::inet_address(row.get_as("peer")); - if (row.has("host_id")) { - ret.emplace(peer, row.get_as("host_id")); - } - } - return ret; - }); -} - -future> load_peer_features() { - sstring req = sprint("SELECT peer, supported_features FROM system.%s", PEERS); - return execute_cql(req).then([] (::shared_ptr cql_result) { - std::unordered_map ret; - for (auto& row : *cql_result) { - if (row.has("supported_features")) { - ret.emplace(row.get_as("peer"), - row.get_as("supported_features")); - } - } - return ret; - }); -} - -future<> update_preferred_ip(gms::inet_address ep, gms::inet_address preferred_ip) { - sstring req = sprint("INSERT INTO system.%s (peer, preferred_ip) VALUES (?, ?)", PEERS); - return execute_cql(req, ep.addr(), preferred_ip.addr()).discard_result().then([] { - return force_blocking_flush(PEERS); - }); -} - -future> get_preferred_ips() { - sstring req = sprint("SELECT peer, preferred_ip FROM system.%s", PEERS); - return execute_cql(req).then([] (::shared_ptr cql_res_set) { - std::unordered_map res; - - for (auto& r : *cql_res_set) { - if (r.has("preferred_ip")) { - res.emplace(gms::inet_address(r.get_as("peer")), - gms::inet_address(r.get_as("preferred_ip"))); - } - } - - return res; - }); -} - -template -static future<> update_cached_values(gms::inet_address ep, sstring column_name, Value value) { - return make_ready_future<>(); -} - -template <> -future<> update_cached_values(gms::inet_address ep, sstring column_name, sstring value) { - return _local_cache.invoke_on_all([ep = std::move(ep), - column_name = std::move(column_name), - value = std::move(value)] (local_cache& lc) { - if (column_name == "data_center") { - lc._cached_dc_rack_info[ep].dc = value; - } else if (column_name == "rack") { - lc._cached_dc_rack_info[ep].rack = value; - } - return make_ready_future<>(); - }); -} - -template -future<> update_peer_info(gms::inet_address ep, sstring column_name, Value value) { - if (ep == utils::fb_utilities::get_broadcast_address()) { - return make_ready_future<>(); - } - - return update_cached_values(ep, column_name, value).then([ep, column_name, value] { - sstring clause = sprint("(peer, %s) VALUES (?, ?)", column_name); - sstring req = sprint("INSERT INTO system.%s " + clause, PEERS); - return execute_cql(req, ep.addr(), value).discard_result(); - }); -} -// sets are not needed, since tokens are updated by another method -template future<> update_peer_info(gms::inet_address ep, sstring column_name, sstring); -template future<> update_peer_info(gms::inet_address ep, sstring column_name, utils::UUID); -template future<> update_peer_info(gms::inet_address ep, sstring column_name, net::ipv4_address); - -future<> update_hints_dropped(gms::inet_address ep, utils::UUID time_period, int value) { - // with 30 day TTL - sstring req = sprint("UPDATE system.%s USING TTL 2592000 SET hints_dropped[ ? ] = ? WHERE peer = ?", PEER_EVENTS); - return execute_cql(req, time_period, value, ep.addr()).discard_result(); -} - -future<> update_schema_version(utils::UUID version) { - sstring req = sprint("INSERT INTO system.%s (key, schema_version) VALUES (?, ?)", LOCAL); - return execute_cql(req, sstring(LOCAL), version).discard_result(); -} - -/** - * Remove stored tokens being used by another node - */ -future<> remove_endpoint(gms::inet_address ep) { - return _local_cache.invoke_on_all([ep] (local_cache& lc) { - lc._cached_dc_rack_info.erase(ep); - }).then([ep] { - sstring req = sprint("DELETE FROM system.%s WHERE peer = ?", PEERS); - return execute_cql(req, ep.addr()).discard_result(); - }).then([] { - return force_blocking_flush(PEERS); - }); -} - - /** - * This method is used to update the System Keyspace with the new tokens for this node - */ -future<> update_tokens(std::unordered_set tokens) { - if (tokens.empty()) { - throw std::invalid_argument("remove_endpoint should be used instead"); - } - - sstring req = sprint("INSERT INTO system.%s (key, tokens) VALUES (?, ?)", LOCAL); - auto set_type = set_type_impl::get_instance(utf8_type, true); - return execute_cql(req, sstring(LOCAL), make_set_value(set_type, prepare_tokens(tokens))).discard_result().then([] { - return force_blocking_flush(LOCAL); - }); -} - -future<> force_blocking_flush(sstring cfname) { - assert(qctx); - return qctx->_db.invoke_on_all([cfname = std::move(cfname)](database& db) { - // if (!Boolean.getBoolean("cassandra.unsafesystem")) - column_family& cf = db.find_column_family(NAME, cfname); - return cf.flush(); - }); -} - -/** - * One of three things will happen if you try to read the system keyspace: - * 1. files are present and you can read them: great - * 2. no files are there: great (new node is assumed) - * 3. files are present but you can't read them: bad - */ -future<> check_health() { - using namespace cql_transport::messages; - sstring req = sprint("SELECT cluster_name FROM system.%s WHERE key=?", LOCAL); - return execute_cql(req, sstring(LOCAL)).then([] (::shared_ptr msg) { - if (msg->empty() || !msg->one().has("cluster_name")) { - // this is a brand new node - sstring ins_req = sprint("INSERT INTO system.%s (key, cluster_name) VALUES (?, ?)", LOCAL); - return execute_cql(ins_req, sstring(LOCAL), qctx->db().get_config().cluster_name()).discard_result(); - } else { - auto saved_cluster_name = msg->one().get_as("cluster_name"); - auto cluster_name = qctx->db().get_config().cluster_name(); - - if (cluster_name != saved_cluster_name) { - throw exceptions::configuration_exception("Saved cluster name " + saved_cluster_name + " != configured name " + cluster_name); - } - - return make_ready_future<>(); - } - }); -} - -future> get_saved_tokens() { - sstring req = sprint("SELECT tokens FROM system.%s WHERE key = ?", LOCAL); - return execute_cql(req, sstring(LOCAL)).then([] (auto msg) { - if (msg->empty() || !msg->one().has("tokens")) { - return make_ready_future>(); - } - - auto blob = msg->one().get_blob("tokens"); - auto cdef = local()->get_column_definition("tokens"); - auto deserialized = cdef->type->deserialize(blob); - auto tokens = value_cast(deserialized); - - return make_ready_future>(decode_tokens(tokens)); - }); -} - -bool bootstrap_complete() { - return get_bootstrap_state() == bootstrap_state::COMPLETED; -} - -bool bootstrap_in_progress() { - return get_bootstrap_state() == bootstrap_state::IN_PROGRESS; -} - -bool was_decommissioned() { - return get_bootstrap_state() == bootstrap_state::DECOMMISSIONED; -} - -bootstrap_state get_bootstrap_state() { - return _local_cache.local()._state; -} - -future<> set_bootstrap_state(bootstrap_state state) { - static std::unordered_map> state_to_name({ - { bootstrap_state::NEEDS_BOOTSTRAP, "NEEDS_BOOTSTRAP" }, - { bootstrap_state::COMPLETED, "COMPLETED" }, - { bootstrap_state::IN_PROGRESS, "IN_PROGRESS" }, - { bootstrap_state::DECOMMISSIONED, "DECOMMISSIONED" } - }); - - sstring state_name = state_to_name.at(state); - - sstring req = sprint("INSERT INTO system.%s (key, bootstrapped) VALUES (?, ?)", LOCAL); - return execute_cql(req, sstring(LOCAL), state_name).discard_result().then([state] { - return force_blocking_flush(LOCAL).then([state] { - return _local_cache.invoke_on_all([state] (local_cache& lc) { - lc._state = state; - }); - }); - }); -} - -future -is_index_built(const sstring& ks_name, const sstring& index_name) { - auto req = sprint("SELECT index_name FROM %s.\"%s\" WHERE table_name=? AND index_name=?", NAME, BUILT_INDEXES); - return execute_cql(req, ks_name, index_name).then([](::shared_ptr result) { - return make_ready_future(!result->empty()); - }); -} - -future<> -set_index_built(const sstring& ks_name, const sstring& index_name) { - auto req = sprint("INSERT INTO %s.\"%s\" (table_name, index_name) VALUES (?, ?)", NAME, BUILT_INDEXES); - return execute_cql(req, ks_name, index_name).discard_result().then([] { - return force_blocking_flush(BUILT_INDEXES); - }); -} - -future<> -set_index_removed(const sstring& ks_name, const sstring& index_name) { - auto req = sprint("DELETE FROM %s.\"%s\" WHERE table_name = ? AND index_name = ?", NAME, BUILT_INDEXES); - return execute_cql(req, ks_name, index_name).discard_result().then([] { - return force_blocking_flush(BUILT_INDEXES); - }); -} - -std::vector all_tables() { - std::vector r; - auto schema_tables = db::schema_tables::all_tables(); - std::copy(schema_tables.begin(), schema_tables.end(), std::back_inserter(r)); - r.insert(r.end(), { built_indexes(), hints(), batchlog(), paxos(), local(), - peers(), peer_events(), range_xfers(), - compactions_in_progress(), compaction_history(), - sstable_activity(), size_estimates(), - }); - // legacy schema - r.insert(r.end(), { - // TODO: once we migrate hints/batchlog and add convertor - // legacy::hints(), legacy::batchlog(), - legacy::keyspaces(), legacy::column_families(), - legacy::columns(), legacy::triggers(), legacy::usertypes(), - legacy::functions(), legacy::aggregates(), }); - - return r; -} - -static void maybe_add_virtual_reader(schema_ptr s, database& db) { - if (s.get() == size_estimates().get()) { - db.find_column_family(s).set_virtual_reader(mutation_source(db::size_estimates::virtual_reader())); - } -} - -static bool maybe_write_in_user_memory(schema_ptr s, database& db) { - return (s.get() == batchlog().get()); -} - -void make(database& db, bool durable, bool volatile_testing_only) { - for (auto&& table : all_tables()) { - auto ks_name = table->ks_name(); - if (!db.has_keyspace(ks_name)) { - auto ksm = make_lw_shared(ks_name, - "org.apache.cassandra.locator.LocalStrategy", - std::map{}, - durable - ); - auto kscfg = db.make_keyspace_config(*ksm); - kscfg.enable_disk_reads = !volatile_testing_only; - kscfg.enable_disk_writes = !volatile_testing_only; - kscfg.enable_commitlog = !volatile_testing_only; - kscfg.enable_cache = true; - // don't make system keyspace reads wait for user reads - kscfg.read_concurrency_config.sem = &db.system_keyspace_read_concurrency_sem(); - kscfg.read_concurrency_config.timeout = {}; - kscfg.read_concurrency_config.max_queue_length = std::numeric_limits::max(); - // don't make system keyspace writes wait for user writes (if under pressure) - kscfg.dirty_memory_manager = &db._system_dirty_memory_manager; - keyspace _ks{ksm, std::move(kscfg)}; - auto rs(locator::abstract_replication_strategy::create_replication_strategy(NAME, "LocalStrategy", service::get_local_storage_service().get_token_metadata(), ksm->strategy_options())); - _ks.set_replication_strategy(std::move(rs)); - db.add_keyspace(ks_name, std::move(_ks)); - } - auto& ks = db.find_keyspace(ks_name); - auto cfg = ks.make_column_family_config(*table, db.get_config()); - if (maybe_write_in_user_memory(table, db)) { - cfg.dirty_memory_manager = &db._dirty_memory_manager; - } - db.add_column_family(ks, table, std::move(cfg)); - maybe_add_virtual_reader(table, db); - } -} - -future get_local_host_id() { - using namespace cql_transport::messages; - sstring req = sprint("SELECT host_id FROM system.%s WHERE key=?", LOCAL); - return execute_cql(req, sstring(LOCAL)).then([] (::shared_ptr msg) { - auto new_id = [] { - auto host_id = utils::make_random_uuid(); - return set_local_host_id(host_id); - }; - if (msg->empty() || !msg->one().has("host_id")) { - return new_id(); - } - - auto host_id = msg->one().get_as("host_id"); - return make_ready_future(host_id); - }); -} - -future set_local_host_id(const utils::UUID& host_id) { - sstring req = sprint("INSERT INTO system.%s (key, host_id) VALUES (?, ?)", LOCAL); - return execute_cql(req, sstring(LOCAL), host_id).then([] (auto msg) { - return force_blocking_flush(LOCAL); - }).then([host_id] { - return host_id; - }); -} - -std::unordered_map -load_dc_rack_info() { - return _local_cache.local()._cached_dc_rack_info; -} - - -future>> -query_mutations(distributed& proxy, const sstring& cf_name) { - return query_mutations(proxy, db::system_keyspace::NAME, cf_name); -} - -future> -query(distributed& proxy, const sstring& cf_name) { - return query(proxy, db::system_keyspace::NAME, cf_name); -} - -future> -query(distributed& proxy, const sstring& cf_name, const dht::decorated_key& key, query::clustering_range row_range) -{ - return query(proxy, db::system_keyspace::NAME, cf_name, key, row_range); -} - -future>> -query_mutations(distributed& proxy, const sstring& ks_name, const sstring& cf_name) { - database& db = proxy.local().get_db().local(); - schema_ptr schema = db.find_schema(ks_name, cf_name); - auto slice = partition_slice_builder(*schema).build(); - auto cmd = make_lw_shared(schema->id(), schema->version(), - std::move(slice), std::numeric_limits::max()); - return proxy.local().query_mutations_locally(std::move(schema), std::move(cmd), query::full_partition_range) - .then([] (foreign_ptr> rr, auto ht) { return std::move(rr); }); -} - -future> -query(distributed& proxy, const sstring& ks_name, const sstring& cf_name) { - database& db = proxy.local().get_db().local(); - schema_ptr schema = db.find_schema(ks_name, cf_name); - auto slice = partition_slice_builder(*schema).build(); - auto cmd = make_lw_shared(schema->id(), schema->version(), - std::move(slice), std::numeric_limits::max()); - return proxy.local().query(schema, cmd, {query::full_partition_range}, db::consistency_level::ONE, nullptr).then([schema, cmd] (auto&& result) { - return make_lw_shared(query::result_set::from_raw_result(schema, cmd->slice, *result)); - }); -} - -future> -query(distributed& proxy, const sstring& ks_name, const sstring& cf_name, const dht::decorated_key& key, query::clustering_range row_range) -{ - auto&& db = proxy.local().get_db().local(); - auto schema = db.find_schema(ks_name, cf_name); - auto slice = partition_slice_builder(*schema) - .with_range(std::move(row_range)) - .build(); - auto cmd = make_lw_shared(schema->id(), schema->version(), std::move(slice), query::max_rows); - return proxy.local().query(schema, cmd, {dht::partition_range::make_singular(key)}, db::consistency_level::ONE, nullptr).then([schema, cmd] (auto&& result) { - return make_lw_shared(query::result_set::from_raw_result(schema, cmd->slice, *result)); - }); -} - -static map_type_impl::native_type prepare_rows_merged(std::unordered_map& rows_merged) { - map_type_impl::native_type tmp; - for (auto& r: rows_merged) { - int32_t first = r.first; - int64_t second = r.second; - auto map_element = std::make_pair(data_value(first), data_value(second)); - tmp.push_back(std::move(map_element)); - } - return tmp; -} - -future<> update_compaction_history(sstring ksname, sstring cfname, int64_t compacted_at, int64_t bytes_in, int64_t bytes_out, - std::unordered_map rows_merged) -{ - // don't write anything when the history table itself is compacted, since that would in turn cause new compactions - if (ksname == "system" && cfname == COMPACTION_HISTORY) { - return make_ready_future<>(); - } - - auto map_type = map_type_impl::get_instance(int32_type, long_type, true); - - sstring req = sprint("INSERT INTO system.%s (id, keyspace_name, columnfamily_name, compacted_at, bytes_in, bytes_out, rows_merged) VALUES (?, ?, ?, ?, ?, ?, ?)" - , COMPACTION_HISTORY); - - return execute_cql(req, utils::UUID_gen::get_time_UUID(), ksname, cfname, compacted_at, bytes_in, bytes_out, - make_map_value(map_type, prepare_rows_merged(rows_merged))).discard_result(); -} - -future> get_compaction_history() -{ - sstring req = sprint("SELECT * from system.%s", COMPACTION_HISTORY); - return execute_cql(req).then([] (::shared_ptr msg) { - std::vector history; - - for (auto& row : *msg) { - compaction_history_entry entry; - entry.id = row.get_as("id"); - entry.ks = row.get_as("keyspace_name"); - entry.cf = row.get_as("columnfamily_name"); - entry.compacted_at = row.get_as("compacted_at"); - entry.bytes_in = row.get_as("bytes_in"); - entry.bytes_out = row.get_as("bytes_out"); - if (row.has("rows_merged")) { - entry.rows_merged = row.get_map("rows_merged"); - } - history.push_back(std::move(entry)); - } - return std::move(history); - }); -} - - -future increment_and_get_generation() { - auto req = sprint("SELECT gossip_generation FROM system.%s WHERE key='%s'", LOCAL, LOCAL); - return qctx->qp().execute_internal(req).then([] (auto rs) { - int generation; - if (rs->empty() || !rs->one().has("gossip_generation")) { - // seconds-since-epoch isn't a foolproof new generation - // (where foolproof is "guaranteed to be larger than the last one seen at this ip address"), - // but it's as close as sanely possible - generation = service::get_generation_number(); - } else { - // Other nodes will ignore gossip messages about a node that have a lower generation than previously seen. - int stored_generation = rs->one().template get_as("gossip_generation") + 1; - int now = service::get_generation_number(); - if (stored_generation >= now) { - slogger.warn("Using stored Gossip Generation {} as it is greater than current system time {}." - "See CASSANDRA-3654 if you experience problems", stored_generation, now); - generation = stored_generation; - } else { - generation = now; - } - } - auto req = sprint("INSERT INTO system.%s (key, gossip_generation) VALUES ('%s', ?)", LOCAL, LOCAL); - return qctx->qp().execute_internal(req, {generation}).then([generation] (auto rs) { - return force_blocking_flush(LOCAL); - }).then([generation] { - return make_ready_future(generation); - }); - }); -} - -mutation make_size_estimates_mutation(const sstring& ks, std::vector estimates) { - auto&& schema = db::system_keyspace::size_estimates(); - auto timestamp = api::new_timestamp(); - mutation m_to_apply{partition_key::from_single_value(*schema, utf8_type->decompose(ks)), schema}; - - for (auto&& e : estimates) { - auto ck = clustering_key_prefix(std::vector{ - utf8_type->decompose(e.schema->cf_name()), e.range_start_token, e.range_end_token}); - - m_to_apply.set_clustered_cell(ck, "mean_partition_size", e.mean_partition_size, timestamp); - m_to_apply.set_clustered_cell(ck, "partitions_count", e.partitions_count, timestamp); - } - - return m_to_apply; -} - -} // namespace system_keyspace -} // namespace db diff --git a/scylla/db/system_keyspace.hh b/scylla/db/system_keyspace.hh deleted file mode 100644 index a15cc08..0000000 --- a/scylla/db/system_keyspace.hh +++ /dev/null @@ -1,651 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include -#include -#include "schema.hh" -#include "db/schema_tables.hh" -#include "utils/UUID.hh" -#include "gms/inet_address.hh" -#include "query-result-set.hh" -#include "locator/token_metadata.hh" -#include "db_clock.hh" -#include "db/commitlog/replay_position.hh" -#include - -namespace service { - -class storage_proxy; - -} - -namespace cql3 { - class query_processor; -} - -bool is_system_keyspace(const sstring& ks_name); - -namespace db { -namespace system_keyspace { - -static constexpr auto NAME = "system"; -static constexpr auto HINTS = "hints"; -static constexpr auto BATCHLOG = "batchlog"; -static constexpr auto PAXOS = "paxos"; -static constexpr auto BUILT_INDEXES = "IndexInfo"; -static constexpr auto LOCAL = "local"; -static constexpr auto PEERS = "peers"; -static constexpr auto PEER_EVENTS = "peer_events"; -static constexpr auto RANGE_XFERS = "range_xfers"; -static constexpr auto COMPACTIONS_IN_PROGRESS = "compactions_in_progress"; -static constexpr auto COMPACTION_HISTORY = "compaction_history"; -static constexpr auto SSTABLE_ACTIVITY = "sstable_activity"; -static constexpr auto SIZE_ESTIMATES = "size_estimates"; - -namespace v3 { -static constexpr auto BATCHES = "batches"; -static constexpr auto PAXOS = "paxos"; -static constexpr auto BUILT_INDEXES = "IndexInfo"; -static constexpr auto LOCAL = "local"; -static constexpr auto PEERS = "peers"; -static constexpr auto PEER_EVENTS = "peer_events"; -static constexpr auto RANGE_XFERS = "range_xfers"; -static constexpr auto COMPACTION_HISTORY = "compaction_history"; -static constexpr auto SSTABLE_ACTIVITY = "sstable_activity"; -static constexpr auto SIZE_ESTIMATES = "size_estimates"; -static constexpr auto AVAILABLE_RANGES = "available_ranges"; -static constexpr auto VIEWS_BUILDS_IN_PROGRESS = "views_builds_in_progress"; -static constexpr auto BUILT_VIEWS = "built_views"; -} - -namespace legacy { -static constexpr auto HINTS = "hints"; -static constexpr auto BATCHLOG = "batchlog"; -static constexpr auto KEYSPACES = "schema_keyspaces"; -static constexpr auto COLUMNFAMILIES = "schema_columnfamilies"; -static constexpr auto COLUMNS = "schema_columns"; -static constexpr auto TRIGGERS = "schema_triggers"; -static constexpr auto USERTYPES = "schema_usertypes"; -static constexpr auto FUNCTIONS = "schema_functions"; -static constexpr auto AGGREGATES = "schema_aggregates"; -} - -// Partition estimates for a given range of tokens. -struct range_estimates { - schema_ptr schema; - bytes range_start_token; - bytes range_end_token; - int64_t partitions_count; - int64_t mean_partition_size; -}; - -extern schema_ptr hints(); -extern schema_ptr batchlog(); -extern schema_ptr built_indexes(); // TODO (from Cassandra): make private - -namespace legacy { - -schema_ptr keyspaces(); -schema_ptr column_families(); -schema_ptr columns(); -schema_ptr triggers(); -schema_ptr usertypes(); -schema_ptr functions(); -schema_ptr aggregates(); - -} - -table_schema_version generate_schema_version(utils::UUID table_id); - -// Only for testing. -void minimal_setup(distributed& db, distributed& qp); - -future<> init_local_cache(); -future<> deinit_local_cache(); -future<> setup(distributed& db, distributed& qp); -future<> update_schema_version(utils::UUID version); -future<> update_tokens(std::unordered_set tokens); -future<> update_tokens(gms::inet_address ep, std::unordered_set tokens); - -future<> update_preferred_ip(gms::inet_address ep, gms::inet_address preferred_ip); -future> get_preferred_ips(); - -template -future<> update_peer_info(gms::inet_address ep, sstring column_name, Value value); - -future<> remove_endpoint(gms::inet_address ep); - -future<> update_hints_dropped(gms::inet_address ep, utils::UUID time_period, int value); - -std::vector all_tables(); -void make(database& db, bool durable, bool volatile_testing_only = false); - -future -is_index_built(const sstring& ks_name, const sstring& index_name); -future<> -set_index_built(const sstring& ks_name, const sstring& index_name); -future<> -set_index_removed(const sstring& ks_name, const sstring& index_name); - -future>> -query_mutations(distributed& proxy, const sstring& cf_name); - -// Returns all data from given system table. -// Intended to be used by code which is not performance critical. -future> query(distributed& proxy, const sstring& cf_name); - -// Returns a slice of given system table. -// Intended to be used by code which is not performance critical. -future> query( - distributed& proxy, - const sstring& cf_name, - const dht::decorated_key& key, - query::clustering_range row_ranges = query::clustering_range::make_open_ended_both_sides()); - -/// overloads - -future>> -query_mutations(distributed& proxy, - const sstring& ks_name, - const sstring& cf_name); - -// Returns all data from given system table. -// Intended to be used by code which is not performance critical. -future> query(distributed& proxy, - const sstring& ks_name, - const sstring& cf_name); - -// Returns a slice of given system table. -// Intended to be used by code which is not performance critical. -future> query( - distributed& proxy, - const sstring& ks_name, - const sstring& cf_name, - const dht::decorated_key& key, - query::clustering_range row_ranges = query::clustering_range::make_open_ended_both_sides()); - - -/** - * Return a map of IP addresses containing a map of dc and rack info - */ -std::unordered_map -load_dc_rack_info(); - -#if 0 - public static KSMetaData definition() - { - Iterable tables = - Iterables.concat(LegacySchemaTables.All, - Arrays.asList(BuiltIndexes, - Hints, - Batchlog, - Paxos, - Local, - Peers, - PeerEvents, - RangeXfers, - CompactionsInProgress, - CompactionHistory, - SSTableActivity)); - return new KSMetaData(NAME, LocalStrategy.class, Collections.emptyMap(), true, tables); - } - - private static volatile Map> truncationRecords; - private static volatile Map> truncationRecords; -#endif - -enum class bootstrap_state { - NEEDS_BOOTSTRAP, - COMPLETED, - IN_PROGRESS, - DECOMMISSIONED -}; - -#if 0 - private static DecoratedKey decorate(ByteBuffer key) - { - return StorageService.getPartitioner().decorateKey(key); - } - - public static void finishStartup() - { - setupVersion(); - LegacySchemaTables.saveSystemKeyspaceSchema(); - } - - private static void setupVersion() - { - String req = "INSERT INTO system.%s (key, release_version, cql_version, thrift_version, native_protocol_version, data_center, rack, partitioner) VALUES (?, ?, ?, ?, ?, ?, ?, ?)"; - IEndpointSnitch snitch = DatabaseDescriptor.getEndpointSnitch(); - executeOnceInternal(String.format(req, LOCAL), - LOCAL, - FBUtilities.getReleaseVersionString(), - QueryProcessor.CQL_VERSION.toString(), - cassandraConstants.VERSION, - String.valueOf(Server.CURRENT_VERSION), - snitch.getDatacenter(FBUtilities.getBroadcastAddress()), - snitch.getRack(FBUtilities.getBroadcastAddress()), - DatabaseDescriptor.getPartitioner().getClass().getName()); - } - - /** - * Write compaction log, except columfamilies under system keyspace. - * - * @param cfs cfs to compact - * @param toCompact sstables to compact - * @return compaction task id or null if cfs is under system keyspace - */ - public static UUID startCompaction(ColumnFamilyStore cfs, Iterable toCompact) - { - if (NAME.equals(cfs.keyspace.getName())) - return null; - - UUID compactionId = UUIDGen.getTimeUUID(); - Iterable generations = Iterables.transform(toCompact, new Function() - { - public Integer apply(SSTableReader sstable) - { - return sstable.descriptor.generation; - } - }); - String req = "INSERT INTO system.%s (id, keyspace_name, columnfamily_name, inputs) VALUES (?, ?, ?, ?)"; - executeInternal(String.format(req, COMPACTIONS_IN_PROGRESS), compactionId, cfs.keyspace.getName(), cfs.name, Sets.newHashSet(generations)); - forceBlockingFlush(COMPACTIONS_IN_PROGRESS); - return compactionId; - } - - /** - * Deletes the entry for this compaction from the set of compactions in progress. The compaction does not need - * to complete successfully for this to be called. - * @param taskId what was returned from {@code startCompaction} - */ - public static void finishCompaction(UUID taskId) - { - assert taskId != null; - - executeInternal(String.format("DELETE FROM system.%s WHERE id = ?", COMPACTIONS_IN_PROGRESS), taskId); - forceBlockingFlush(COMPACTIONS_IN_PROGRESS); - } - - /** - * Returns a Map whose keys are KS.CF pairs and whose values are maps from sstable generation numbers to the - * task ID of the compaction they were participating in. - */ - public static Map, Map> getUnfinishedCompactions() - { - String req = "SELECT * FROM system.%s"; - UntypedResultSet resultSet = executeInternal(String.format(req, COMPACTIONS_IN_PROGRESS)); - - Map, Map> unfinishedCompactions = new HashMap<>(); - for (UntypedResultSet.Row row : resultSet) - { - String keyspace = row.getString("keyspace_name"); - String columnfamily = row.getString("columnfamily_name"); - Set inputs = row.getSet("inputs", Int32Type.instance); - UUID taskID = row.getUUID("id"); - - Pair kscf = Pair.create(keyspace, columnfamily); - Map generationToTaskID = unfinishedCompactions.get(kscf); - if (generationToTaskID == null) - generationToTaskID = new HashMap<>(inputs.size()); - - for (Integer generation : inputs) - generationToTaskID.put(generation, taskID); - - unfinishedCompactions.put(kscf, generationToTaskID); - } - return unfinishedCompactions; - } - - public static void discardCompactionsInProgress() - { - ColumnFamilyStore compactionLog = Keyspace.open(NAME).getColumnFamilyStore(COMPACTIONS_IN_PROGRESS); - compactionLog.truncateBlocking(); - } - - public static TabularData getCompactionHistory() throws OpenDataException - { - UntypedResultSet queryResultSet = executeInternal(String.format("SELECT * from system.%s", COMPACTION_HISTORY)); - return CompactionHistoryTabularData.from(queryResultSet); - } -#endif - struct compaction_history_entry { - utils::UUID id; - sstring ks; - sstring cf; - int64_t compacted_at = 0; - int64_t bytes_in = 0; - int64_t bytes_out = 0; - // Key: number of rows merged - // Value: counter - std::unordered_map rows_merged; - }; - - future<> update_compaction_history(sstring ksname, sstring cfname, int64_t compacted_at, int64_t bytes_in, int64_t bytes_out, - std::unordered_map rows_merged); - future> get_compaction_history(); - - typedef std::vector replay_positions; - - future<> save_truncation_record(const column_family&, db_clock::time_point truncated_at, db::replay_position); - future<> save_truncation_records(const column_family&, db_clock::time_point truncated_at, replay_positions); - future<> remove_truncation_record(utils::UUID); - future get_truncated_position(utils::UUID); - future get_truncated_position(utils::UUID, uint32_t shard); - future get_truncated_at(utils::UUID); - -#if 0 - - /** - * Record tokens being used by another node - */ - public static synchronized void updateTokens(InetAddress ep, Collection tokens) - { - if (ep.equals(FBUtilities.getBroadcastAddress())) - { - removeEndpoint(ep); - return; - } - - String req = "INSERT INTO system.%s (peer, tokens) VALUES (?, ?)"; - executeInternal(String.format(req, PEERS), ep, tokensAsSet(tokens)); - } - - public static synchronized void updatePreferredIP(InetAddress ep, InetAddress preferred_ip) - { - String req = "INSERT INTO system.%s (peer, preferred_ip) VALUES (?, ?)"; - executeInternal(String.format(req, PEERS), ep, preferred_ip); - forceBlockingFlush(PEERS); - } - - public static synchronized void updatePeerInfo(InetAddress ep, String columnName, Object value) - { - if (ep.equals(FBUtilities.getBroadcastAddress())) - return; - - String req = "INSERT INTO system.%s (peer, %s) VALUES (?, ?)"; - executeInternal(String.format(req, PEERS, columnName), ep, value); - } - - public static synchronized void updateHintsDropped(InetAddress ep, UUID timePeriod, int value) - { - // with 30 day TTL - String req = "UPDATE system.%s USING TTL 2592000 SET hints_dropped[ ? ] = ? WHERE peer = ?"; - executeInternal(String.format(req, PEER_EVENTS), timePeriod, value, ep); - } - - public static synchronized void updateSchemaVersion(UUID version) - { - String req = "INSERT INTO system.%s (key, schema_version) VALUES ('%s', ?)"; - executeInternal(String.format(req, LOCAL, LOCAL), version); - } - - private static Set tokensAsSet(Collection tokens) - { - Token.TokenFactory factory = StorageService.getPartitioner().getTokenFactory(); - Set s = new HashSet<>(tokens.size()); - for (Token tk : tokens) - s.add(factory.toString(tk)); - return s; - } - - private static Collection deserializeTokens(Collection tokensStrings) - { - Token.TokenFactory factory = StorageService.getPartitioner().getTokenFactory(); - List tokens = new ArrayList<>(tokensStrings.size()); - for (String tk : tokensStrings) - tokens.add(factory.fromString(tk)); - return tokens; - } - - /** - * Remove stored tokens being used by another node - */ - public static synchronized void removeEndpoint(InetAddress ep) - { - String req = "DELETE FROM system.%s WHERE peer = ?"; - executeInternal(String.format(req, PEERS), ep); - } - - /** - * This method is used to update the System Keyspace with the new tokens for this node - */ - public static synchronized void updateTokens(Collection tokens) - { - assert !tokens.isEmpty() : "removeEndpoint should be used instead"; - String req = "INSERT INTO system.%s (key, tokens) VALUES ('%s', ?)"; - executeInternal(String.format(req, LOCAL, LOCAL), tokensAsSet(tokens)); - forceBlockingFlush(LOCAL); - } -#endif - - /** - * Convenience method to update the list of tokens in the local system keyspace. - * - * @param addTokens tokens to add - * @param rmTokens tokens to remove - * @return the collection of persisted tokens - */ - future> update_local_tokens( - const std::unordered_set add_tokens, - const std::unordered_set rm_tokens); - - /** - * Return a map of stored tokens to IP addresses - * - */ - future>> load_tokens(); - - /** - * Return a map of store host_ids to IP addresses - * - */ - future> load_host_ids(); - - future> get_saved_tokens(); - - future> load_peer_features(); - -future increment_and_get_generation(); -bool bootstrap_complete(); -bool bootstrap_in_progress(); -bootstrap_state get_bootstrap_state(); -bool was_decommissioned(); -future<> set_bootstrap_state(bootstrap_state state); - -#if 0 - public static boolean isIndexBuilt(String keyspaceName, String indexName) - { - ColumnFamilyStore cfs = Keyspace.open(NAME).getColumnFamilyStore(BUILT_INDEXES); - QueryFilter filter = QueryFilter.getNamesFilter(decorate(ByteBufferUtil.bytes(keyspaceName)), - BUILT_INDEXES, - FBUtilities.singleton(cfs.getComparator().makeCellName(indexName), cfs.getComparator()), - System.currentTimeMillis()); - return ColumnFamilyStore.removeDeleted(cfs.getColumnFamily(filter), Integer.MAX_VALUE) != null; - } - - public static void setIndexBuilt(String keyspaceName, String indexName) - { - ColumnFamily cf = ArrayBackedSortedColumns.factory.create(NAME, BUILT_INDEXES); - cf.addColumn(new BufferCell(cf.getComparator().makeCellName(indexName), ByteBufferUtil.EMPTY_BYTE_BUFFER, FBUtilities.timestampMicros())); - new Mutation(NAME, ByteBufferUtil.bytes(keyspaceName), cf).apply(); - } - - public static void setIndexRemoved(String keyspaceName, String indexName) - { - Mutation mutation = new Mutation(NAME, ByteBufferUtil.bytes(keyspaceName)); - mutation.delete(BUILT_INDEXES, BuiltIndexes.comparator.makeCellName(indexName), FBUtilities.timestampMicros()); - mutation.apply(); - } -#endif - - /** - * Read the host ID from the system keyspace, creating (and storing) one if - * none exists. - */ - future get_local_host_id(); - - /** - * Sets the local host ID explicitly. Should only be called outside of SystemTable when replacing a node. - */ - future set_local_host_id(const utils::UUID& host_id); - -#if 0 - - public static PaxosState loadPaxosState(ByteBuffer key, CFMetaData metadata) - { - String req = "SELECT * FROM system.%s WHERE row_key = ? AND cf_id = ?"; - UntypedResultSet results = executeInternal(String.format(req, PAXOS), key, metadata.cfId); - if (results.isEmpty()) - return new PaxosState(key, metadata); - UntypedResultSet.Row row = results.one(); - Commit promised = row.has("in_progress_ballot") - ? new Commit(key, row.getUUID("in_progress_ballot"), ArrayBackedSortedColumns.factory.create(metadata)) - : Commit.emptyCommit(key, metadata); - // either we have both a recently accepted ballot and update or we have neither - Commit accepted = row.has("proposal") - ? new Commit(key, row.getUUID("proposal_ballot"), ColumnFamily.fromBytes(row.getBytes("proposal"))) - : Commit.emptyCommit(key, metadata); - // either most_recent_commit and most_recent_commit_at will both be set, or neither - Commit mostRecent = row.has("most_recent_commit") - ? new Commit(key, row.getUUID("most_recent_commit_at"), ColumnFamily.fromBytes(row.getBytes("most_recent_commit"))) - : Commit.emptyCommit(key, metadata); - return new PaxosState(promised, accepted, mostRecent); - } - - public static void savePaxosPromise(Commit promise) - { - String req = "UPDATE system.%s USING TIMESTAMP ? AND TTL ? SET in_progress_ballot = ? WHERE row_key = ? AND cf_id = ?"; - executeInternal(String.format(req, PAXOS), - UUIDGen.microsTimestamp(promise.ballot), - paxosTtl(promise.update.metadata), - promise.ballot, - promise.key, - promise.update.id()); - } - - public static void savePaxosProposal(Commit proposal) - { - executeInternal(String.format("UPDATE system.%s USING TIMESTAMP ? AND TTL ? SET proposal_ballot = ?, proposal = ? WHERE row_key = ? AND cf_id = ?", PAXOS), - UUIDGen.microsTimestamp(proposal.ballot), - paxosTtl(proposal.update.metadata), - proposal.ballot, - proposal.update.toBytes(), - proposal.key, - proposal.update.id()); - } - - private static int paxosTtl(CFMetaData metadata) - { - // keep paxos state around for at least 3h - return Math.max(3 * 3600, metadata.getGcGraceSeconds()); - } - - public static void savePaxosCommit(Commit commit) - { - // We always erase the last proposal (with the commit timestamp to no erase more recent proposal in case the commit is old) - // even though that's really just an optimization since SP.beginAndRepairPaxos will exclude accepted proposal older than the mrc. - String cql = "UPDATE system.%s USING TIMESTAMP ? AND TTL ? SET proposal_ballot = null, proposal = null, most_recent_commit_at = ?, most_recent_commit = ? WHERE row_key = ? AND cf_id = ?"; - executeInternal(String.format(cql, PAXOS), - UUIDGen.microsTimestamp(commit.ballot), - paxosTtl(commit.update.metadata), - commit.ballot, - commit.update.toBytes(), - commit.key, - commit.update.id()); - } - - /** - * Returns a RestorableMeter tracking the average read rate of a particular SSTable, restoring the last-seen rate - * from values in system.sstable_activity if present. - * @param keyspace the keyspace the sstable belongs to - * @param table the table the sstable belongs to - * @param generation the generation number for the sstable - */ - public static RestorableMeter getSSTableReadMeter(String keyspace, String table, int generation) - { - String cql = "SELECT * FROM system.%s WHERE keyspace_name=? and columnfamily_name=? and generation=?"; - UntypedResultSet results = executeInternal(String.format(cql, SSTABLE_ACTIVITY), keyspace, table, generation); - - if (results.isEmpty()) - return new RestorableMeter(); - - UntypedResultSet.Row row = results.one(); - double m15rate = row.getDouble("rate_15m"); - double m120rate = row.getDouble("rate_120m"); - return new RestorableMeter(m15rate, m120rate); - } - - /** - * Writes the current read rates for a given SSTable to system.sstable_activity - */ - public static void persistSSTableReadMeter(String keyspace, String table, int generation, RestorableMeter meter) - { - // Store values with a one-day TTL to handle corner cases where cleanup might not occur - String cql = "INSERT INTO system.%s (keyspace_name, columnfamily_name, generation, rate_15m, rate_120m) VALUES (?, ?, ?, ?, ?) USING TTL 864000"; - executeInternal(String.format(cql, SSTABLE_ACTIVITY), - keyspace, - table, - generation, - meter.fifteenMinuteRate(), - meter.twoHourRate()); - } - - /** - * Clears persisted read rates from system.sstable_activity for SSTables that have been deleted. - */ - public static void clearSSTableReadMeter(String keyspace, String table, int generation) - { - String cql = "DELETE FROM system.%s WHERE keyspace_name=? AND columnfamily_name=? and generation=?"; - executeInternal(String.format(cql, SSTABLE_ACTIVITY), keyspace, table, generation); - } -#endif - - api::timestamp_type schema_creation_timestamp(); - -/** - * Builds a mutation for SIZE_ESTIMATES_CF containing the specified estimates. - */ -mutation make_size_estimates_mutation(const sstring& ks, std::vector estimates); - -} // namespace system_keyspace -} // namespace db diff --git a/scylla/db/view/view.cc b/scylla/db/view/view.cc deleted file mode 100644 index a44a5b1..0000000 --- a/scylla/db/view/view.cc +++ /dev/null @@ -1,900 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2017 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include -#include - -#include -#include - -#include "clustering_bounds_comparator.hh" -#include "cql3/statements/select_statement.hh" -#include "cql3/util.hh" -#include "db/view/view.hh" -#include "gms/inet_address.hh" -#include "keys.hh" -#include "locator/network_topology_strategy.hh" -#include "service/storage_service.hh" -#include "view_info.hh" - -static logging::logger vlogger("view"); - -view_info::view_info(const schema& schema, const raw_view_info& raw_view_info) - : _schema(schema) - , _raw(raw_view_info) -{ } - -cql3::statements::select_statement& view_info::select_statement() const { - if (!_select_statement) { - std::vector included; - if (!include_all_columns()) { - included.reserve(_schema.all_columns().size()); - boost::transform(_schema.all_columns(), std::back_inserter(included), std::mem_fn(&column_definition::name_as_text)); - } - auto raw = cql3::util::build_select_statement(base_name(), where_clause(), std::move(included)); - raw->prepare_keyspace(_schema.ks_name()); - raw->set_bound_variables({}); - cql3::cql_stats ignored; - auto prepared = raw->prepare(service::get_local_storage_proxy().get_db().local(), ignored, true); - _select_statement = static_pointer_cast(prepared->statement); - } - return *_select_statement; -} - -const query::partition_slice& view_info::partition_slice() const { - if (!_partition_slice) { - _partition_slice = select_statement().make_partition_slice(cql3::query_options({ })); - } - return *_partition_slice; -} - -const dht::partition_range_vector& view_info::partition_ranges() const { - if (!_partition_ranges) { - _partition_ranges = select_statement().get_restrictions()->get_partition_key_ranges(cql3::query_options({ })); - } - return *_partition_ranges; -} - -const column_definition* view_info::view_column(const schema& base, column_id base_id) const { - // FIXME: Map base column_ids to view_column_ids, which can be something like - // a boost::small_vector where the position is the base column_id, and the - // value is either empty or the view's column_id. - return _schema.get_column_definition(base.regular_column_at(base_id).name()); -} - -stdx::optional view_info::base_non_pk_column_in_view_pk(const schema& base) const { - if (!_base_non_pk_column_in_view_pk) { - _base_non_pk_column_in_view_pk.emplace(stdx::nullopt); - for (auto&& view_col : boost::range::join(_schema.partition_key_columns(), _schema.clustering_key_columns())) { - auto* base_col = base.get_column_definition(view_col.name()); - if (!base_col->is_primary_key()) { - _base_non_pk_column_in_view_pk.emplace(base_col->id); - break; - } - } - } - return *_base_non_pk_column_in_view_pk; -} - -namespace db { - -namespace view { - -bool partition_key_matches(const schema& base, const view_info& view, const dht::decorated_key& key) { - return view.select_statement().get_restrictions()->get_partition_key_restrictions()->is_satisfied_by( - base, key.key(), clustering_key_prefix::make_empty(), row(), cql3::query_options({ }), gc_clock::now()); -} - -bool clustering_prefix_matches(const schema& base, const view_info& view, const partition_key& key, const clustering_key_prefix& ck) { - return view.select_statement().get_restrictions()->get_clustering_columns_restrictions()->is_satisfied_by( - base, key, ck, row(), cql3::query_options({ }), gc_clock::now()); -} - -bool may_be_affected_by(const schema& base, const view_info& view, const dht::decorated_key& key, const rows_entry& update) { - // We can guarantee that the view won't be affected if: - // - the primary key is excluded by the view filter (note that this isn't true of the filter on regular columns: - // even if an update don't match a view condition on a regular column, that update can still invalidate a - // pre-existing entry) - note that the upper layers should already have checked the partition key; - // - the update doesn't modify any of the columns impacting the view (where "impacting" the view means that column - // is neither included in the view, nor used by the view filter). - if (!clustering_prefix_matches(base, view, key.key(), update.key())) { - return false; - } - - // We want to check if the update modifies any of the columns that are part of the view (in which case the view is - // affected). But iff the view includes all the base table columns, or the update has either a row deletion or a - // row marker, we know the view is affected right away. - if (view.include_all_columns() || update.row().deleted_at() || update.row().marker().is_live()) { - return true; - } - - bool affected = false; - update.row().cells().for_each_cell_until([&] (column_id id, const atomic_cell_or_collection& cell) { - affected = view.view_column(base, id); - return stop_iteration(affected); - }); - return affected; -} - -static bool update_requires_read_before_write(const schema& base, - const std::vector& views, - const dht::decorated_key& key, - const rows_entry& update) { - for (auto&& v : views) { - view_info& vf = *v->view_info(); - // A view whose primary key contains only the base's primary key columns doesn't require a read-before-write. - // However, if the view has restrictions on regular columns, then a write that doesn't match those filters - // needs to add a tombstone (assuming a previous update matched those filter and created a view entry); for - // now we just do a read-before-write in that case. - if (!vf.base_non_pk_column_in_view_pk(base) - && vf.select_statement().get_restrictions()->get_non_pk_restriction().empty()) { - continue; - } - if (may_be_affected_by(base, vf, key, update)) { - return true; - } - } - return false; -} - -bool matches_view_filter(const schema& base, const view_info& view, const partition_key& key, const clustering_row& update, gc_clock::time_point now) { - return clustering_prefix_matches(base, view, key, update.key()) - && boost::algorithm::all_of( - view.select_statement().get_restrictions()->get_non_pk_restriction() | boost::adaptors::map_values, - [&] (auto&& r) { - return r->is_satisfied_by(base, key, update.key(), update.cells(), cql3::query_options({ }), now); - }); -} - -class view_updates final { - view_ptr _view; - const view_info& _view_info; - schema_ptr _base; - std::unordered_map _updates; -public: - explicit view_updates(view_ptr view, schema_ptr base) - : _view(std::move(view)) - , _view_info(*_view->view_info()) - , _base(std::move(base)) - , _updates(8, partition_key::hashing(*_base), partition_key::equality(*_base)) { - } - - void move_to(std::vector& mutations) && { - auto& partitioner = dht::global_partitioner(); - std::transform(_updates.begin(), _updates.end(), std::back_inserter(mutations), [&, this] (auto&& m) { - return mutation(_view, partitioner.decorate_key(*_base, std::move(m.first)), std::move(m.second)); - }); - } - - void generate_update(const partition_key& base_key, const clustering_row& update, const stdx::optional& existing, gc_clock::time_point now); -private: - mutation_partition& partition_for(partition_key&& key) { - auto it = _updates.find(key); - if (it != _updates.end()) { - return it->second; - } - return _updates.emplace(std::move(key), mutation_partition(_view)).first->second; - } - row_marker compute_row_marker(const clustering_row& base_row) const; - deletable_row& get_view_row(const partition_key& base_key, const clustering_row& update); - void create_entry(const partition_key& base_key, const clustering_row& update, gc_clock::time_point now); - void delete_old_entry(const partition_key& base_key, const clustering_row& existing, const row_tombstone& t, gc_clock::time_point now); - void do_delete_old_entry(const partition_key& base_key, const clustering_row& existing, const row_tombstone& t, gc_clock::time_point now); - void update_entry(const partition_key& base_key, const clustering_row& update, const clustering_row& existing, gc_clock::time_point now); - void replace_entry(const partition_key& base_key, const clustering_row& update, const clustering_row& existing, gc_clock::time_point now) { - create_entry(base_key, update, now); - delete_old_entry(base_key, existing, row_tombstone(), now); - } -}; - -row_marker view_updates::compute_row_marker(const clustering_row& base_row) const { - /* - * We need to compute both the timestamp and expiration. - * - * For the timestamp, it makes sense to use the bigger timestamp for all view PK columns. - * - * This is more complex for the expiration. We want to maintain consistency between the base and the view, so the - * entry should only exist as long as the base row exists _and_ has non-null values for all the columns that are part - * of the view PK. - * Which means we really have 2 cases: - * 1) There is a column that is not in the base PK but is in the view PK. In that case, as long as that column - * lives, the view entry does too, but as soon as it expires (or is deleted for that matter) the entry also - * should expire. So the expiration for the view is the one of that column, regardless of any other expiration. - * To take an example of that case, if you have: - * CREATE TABLE t (a int, b int, c int, PRIMARY KEY (a, b)) - * CREATE MATERIALIZED VIEW mv AS SELECT * FROM t WHERE c IS NOT NULL AND a IS NOT NULL AND b IS NOT NULL PRIMARY KEY (c, a, b) - * INSERT INTO t(a, b) VALUES (0, 0) USING TTL 3; - * UPDATE t SET c = 0 WHERE a = 0 AND b = 0; - * then even after 3 seconds elapsed, the row will still exist (it just won't have a "row marker" anymore) and so - * the MV should still have a corresponding entry. - * 2) The columns for the base and view PKs are exactly the same. In that case, the view entry should live - * as long as the base row lives. This means the view entry should only expire once *everything* in the - * base row has expired. So, the row TTL should be the max of any other TTL. This is particularly important - * in the case where the base row has a TTL, but a column *absent* from the view holds a greater TTL. - */ - - auto marker = base_row.marker(); - auto col_id = _view_info.base_non_pk_column_in_view_pk(*_base); - if (col_id) { - // Note: multi-cell columns can't be part of the primary key. - auto cell = base_row.cells().cell_at(*col_id).as_atomic_cell(); - auto timestamp = std::max(marker.timestamp(), cell.timestamp()); - return cell.is_live_and_has_ttl() ? row_marker(timestamp, cell.ttl(), cell.expiry()) : row_marker(timestamp); - } - - if (!marker.is_expiring()) { - return marker; - } - - auto ttl = marker.ttl(); - auto expiry = marker.expiry(); - auto maybe_update_expiry_and_ttl = [&] (atomic_cell_view&& cell) { - // Note: Cassandra compares cell.ttl() here, but that seems very wrong. - // See CASSANDRA-13127. - if (cell.is_live_and_has_ttl() && cell.expiry() > expiry) { - expiry = cell.expiry(); - ttl = cell.ttl(); - } - }; - - base_row.cells().for_each_cell([&] (column_id id, const atomic_cell_or_collection& c) { - auto& def = _base->regular_column_at(id); - if (def.is_atomic()) { - maybe_update_expiry_and_ttl(c.as_atomic_cell()); - } else { - static_pointer_cast(def.type)->for_each_cell(c.as_collection_mutation(), maybe_update_expiry_and_ttl); - } - }); - - return row_marker(marker.timestamp(), ttl, expiry); -} - -deletable_row& view_updates::get_view_row(const partition_key& base_key, const clustering_row& update) { - auto get_value = boost::adaptors::transformed([&, this] (const column_definition& cdef) { - auto* base_col = _base->get_column_definition(cdef.name()); - assert(base_col); - switch (base_col->kind) { - case column_kind::partition_key: - return base_key.get_component(*_base, base_col->position()); - case column_kind::clustering_key: - return update.key().get_component(*_base, base_col->position()); - default: - auto& c = update.cells().cell_at(base_col->id); - if (base_col->is_atomic()) { - return c.as_atomic_cell().value(); - } - return c.as_collection_mutation().data; - } - }); - auto& partition = partition_for(partition_key::from_range(_view->partition_key_columns() | get_value)); - auto ckey = clustering_key::from_range(_view->clustering_key_columns() | get_value); - return partition.clustered_row(*_view, std::move(ckey)); -} - -static const column_definition* view_column(const schema& base, const schema& view, column_id base_id) { - // FIXME: Map base column_ids to view_column_ids, which can be something like - // a boost::small_vector where the position is the base column_id, and the - // value is either empty or the view's column_id. - return view.get_column_definition(base.regular_column_at(base_id).name()); -} - -static void add_cells_to_view(const schema& base, const schema& view, const row& base_cells, row& view_cells) { - base_cells.for_each_cell([&] (column_id id, const atomic_cell_or_collection& c) { - auto* view_col = view_column(base, view, id); - if (view_col && !view_col->is_primary_key()) { - view_cells.append_cell(view_col->id, c); - } - }); -} - -/** - * Creates a view entry corresponding to the provided base row. - * This method checks that the base row does match the view filter before applying anything. - */ -void view_updates::create_entry(const partition_key& base_key, const clustering_row& update, gc_clock::time_point now) { - if (!matches_view_filter(*_base, _view_info, base_key, update, now)) { - return; - } - deletable_row& r = get_view_row(base_key, update); - r.apply(compute_row_marker(update)); - r.apply(update.tomb()); - add_cells_to_view(*_base, *_view, update.cells(), r.cells()); -} - -/** - * Deletes the view entry corresponding to the provided base row. - * This method checks that the base row does match the view filter before bothering. - */ -void view_updates::delete_old_entry(const partition_key& base_key, const clustering_row& existing, const row_tombstone& t, gc_clock::time_point now) { - // Before deleting an old entry, make sure it was matching the view filter - // (otherwise there is nothing to delete) - if (matches_view_filter(*_base, _view_info, base_key, existing, now)) { - do_delete_old_entry(base_key, existing, t, now); - } -} - -void view_updates::do_delete_old_entry(const partition_key& base_key, const clustering_row& existing, const row_tombstone& t, gc_clock::time_point now) { - if (t) { - get_view_row(base_key, existing).apply(t); - return; - } - // We delete the old row using a shadowable row tombstone, making sure that - // the tombstone deletes everything in the row (or it might still show up). - // FIXME: If the entry is "resurrected" by a later update, we would need to - // ensure that the timestamp for the entry then is bigger than the tombstone - // we're just inserting, which is not currently guaranteed. See CASSANDRA-11500 - // for details. - auto ts = existing.marker().timestamp(); - auto set_max_ts = [&ts] (atomic_cell_view&& cell) { - ts = std::max(ts, cell.timestamp()); - }; - existing.cells().for_each_cell([&, this] (column_id id, const atomic_cell_or_collection& cell) { - auto* def = _view_info.view_column(*_base, id); - if (!def) { - return; - } - if (def->is_atomic()) { - set_max_ts(cell.as_atomic_cell()); - } else { - static_pointer_cast(def->type)->for_each_cell(cell.as_collection_mutation(), set_max_ts); - } - }); - get_view_row(base_key, existing).apply(shadowable_tombstone(ts, now)); -} - -/** - * Creates the updates to apply to the existing view entry given the base table row before - * and after the update, assuming that the update hasn't changed to which view entry the - * row corresponds (that is, we know the columns composing the view PK haven't changed). - * - * This method checks that the base row (before and after) matches the view filter before - * applying anything. - */ -void view_updates::update_entry(const partition_key& base_key, const clustering_row& update, const clustering_row& existing, gc_clock::time_point now) { - // While we know update and existing correspond to the same view entry, - // they may not match the view filter. - if (!matches_view_filter(*_base, _view_info, base_key, existing, now)) { - create_entry(base_key, update, now); - return; - } - if (!matches_view_filter(*_base, _view_info, base_key, update, now)) { - do_delete_old_entry(base_key, existing, row_tombstone(), now); - return; - } - - deletable_row& r = get_view_row(base_key, update); - r.apply(compute_row_marker(update)); - r.apply(update.tomb()); - - auto diff = update.cells().difference(*_base, column_kind::regular_column, existing.cells()); - add_cells_to_view(*_base, *_view, diff, r.cells()); -} - -void view_updates::generate_update( - const partition_key& base_key, - const clustering_row& update, - const stdx::optional& existing, - gc_clock::time_point now) { - // Note that the base PK columns in update and existing are the same, since we're intrinsically dealing - // with the same base row. So we have to check 3 things: - // 1) that the clustering key doesn't have a null, which can happen for compact tables. If that's the case, - // there is no corresponding entries. - // 2) if there is a column not part of the base PK in the view PK, whether it is changed by the update. - // 3) whether the update actually matches the view SELECT filter - - if (!update.key().is_full(*_base)) { - return; - } - - auto col_id = _view_info.base_non_pk_column_in_view_pk(*_base); - if (!col_id) { - // The view key is necessarily the same pre and post update. - if (existing && !existing->empty()) { - if (update.empty()) { - delete_old_entry(base_key, *existing, update.tomb(), now); - } else { - update_entry(base_key, update, *existing, now); - } - } else if (!update.empty()) { - create_entry(base_key, update, now); - } - return; - } - - auto* after = update.cells().find_cell(*col_id); - // Note: multi-cell columns can't be part of the primary key. - if (existing) { - auto* before = existing->cells().find_cell(*col_id); - if (before && before->as_atomic_cell().is_live()) { - if (after && after->as_atomic_cell().is_live()) { - auto cmp = compare_atomic_cell_for_merge(before->as_atomic_cell(), after->as_atomic_cell()); - if (cmp == 0) { - update_entry(base_key, update, *existing, now); - } else { - replace_entry(base_key, update, *existing, now); - } - } else { - delete_old_entry(base_key, *existing, update.tomb(), now); - } - return; - } - } - - // No existing row or the cell wasn't live - if (after && after->as_atomic_cell().is_live()) { - create_entry(base_key, update, now); - } -} - -class view_update_builder { - schema_ptr _schema; // The base schema - std::vector _view_updates; - streamed_mutation _updates; - streamed_mutation_opt _existings; - range_tombstone_accumulator _update_tombstone_tracker; - range_tombstone_accumulator _existing_tombstone_tracker; - mutation_fragment_opt _update; - mutation_fragment_opt _existing; - gc_clock::time_point _now; -public: - - view_update_builder(schema_ptr s, - std::vector&& views_to_update, - streamed_mutation&& updates, - streamed_mutation_opt&& existings) - : _schema(std::move(s)) - , _view_updates(std::move(views_to_update)) - , _updates(std::move(updates)) - , _existings(std::move(existings)) - , _update_tombstone_tracker(*_schema, false) - , _existing_tombstone_tracker(*_schema, false) - , _now(gc_clock::now()) { - _update_tombstone_tracker.set_partition_tombstone(_updates.partition_tombstone()); - if (_existings) { - _existing_tombstone_tracker.set_partition_tombstone(_existings->partition_tombstone()); - } - } - - future> build(); - -private: - void generate_update(clustering_row&& update, stdx::optional&& existing); - future on_results(); - - future advance_all() { - auto existings_f = _existings ? (*_existings)() : make_ready_future>(); - return when_all(_updates(), std::move(existings_f)).then([this] (auto&& fragments) mutable { - _update = std::move(std::get(std::get<0>(fragments).get())); - _existing = std::move(std::get(std::get<1>(fragments).get())); - return stop_iteration::no; - }); - } - - future advance_updates() { - return _updates().then([this] (auto&& update) mutable { - _update = std::move(update); - return stop_iteration::no; - }); - } - - future advance_existings() { - if (!_existings) { - return make_ready_future(stop_iteration::no); - } - return (*_existings)().then([this] (auto&& existing) mutable { - _existing = std::move(existing); - return stop_iteration::no; - }); - } - - future stop() const { - return make_ready_future(stop_iteration::yes); - } -}; - -future> view_update_builder::build() { - return advance_all().then([this] (auto&& ignored) { - return repeat([this] { - return this->on_results(); - }); - }).then([this] { - std::vector mutations; - for (auto&& update : _view_updates) { - std::move(update).move_to(mutations); - } - return mutations; - }); -} - -void view_update_builder::generate_update(clustering_row&& update, stdx::optional&& existing) { - // If we have no update at all, we shouldn't get there. - if (update.empty()) { - throw std::logic_error("Empty materialized view updated"); - } - - auto gc_before = _now - _schema->gc_grace_seconds(); - - // We allow existing to be disengaged, which we treat the same as an empty row. - if (existing) { - existing->marker().compact_and_expire(tombstone(), _now, always_gc, gc_before); - existing->cells().compact_and_expire(*_schema, column_kind::regular_column, row_tombstone(), _now, always_gc, gc_before); - update.apply(*_schema, *existing); - } - - update.marker().compact_and_expire(tombstone(), _now, always_gc, gc_before); - update.cells().compact_and_expire(*_schema, column_kind::regular_column, row_tombstone(), _now, always_gc, gc_before); - - for (auto&& v : _view_updates) { - v.generate_update(_updates.key(), update, existing, _now); - } -} - -static void apply_tracked_tombstones(range_tombstone_accumulator& tracker, clustering_row& row) { - for (auto&& rt : tracker.range_tombstones_for_row(row.key())) { - row.apply(rt.tomb); - } -} - -future view_update_builder::on_results() { - if (_update && _existing) { - int cmp = position_in_partition::tri_compare(*_schema)(_update->position(), _existing->position()); - if (cmp < 0) { - // We have an update where there was nothing before - if (_update->is_range_tombstone()) { - _update_tombstone_tracker.apply(std::move(_update->as_range_tombstone())); - } else if (_update->is_clustering_row()) { - auto& update = _update->as_mutable_clustering_row(); - apply_tracked_tombstones(_update_tombstone_tracker, update); - auto tombstone = _existing_tombstone_tracker.current_tombstone(); - auto existing = tombstone - ? stdx::optional(stdx::in_place, update.key(), row_tombstone(std::move(tombstone)), row_marker(), ::row()) - : stdx::nullopt; - generate_update(std::move(update), std::move(existing)); - } - return advance_updates(); - } - if (cmp > 0) { - // We have something existing but no update (which will happen either because it's a range tombstone marker in - // existing, or because we've fetched the existing row due to some partition/range deletion in the updates) - if (_existing->is_range_tombstone()) { - _existing_tombstone_tracker.apply(std::move(_existing->as_range_tombstone())); - } else if (_existing->is_clustering_row()) { - auto& existing = _existing->as_mutable_clustering_row(); - apply_tracked_tombstones(_existing_tombstone_tracker, existing); - auto tombstone = _update_tombstone_tracker.current_tombstone(); - // The way we build the read command used for existing rows, we should always have a non-empty - // tombstone, since we wouldn't have read the existing row otherwise. We don't assert that in case the - // read method ever changes. - if (tombstone) { - auto update = clustering_row(existing.key(), row_tombstone(std::move(tombstone)), row_marker(), ::row()); - generate_update(std::move(update), { std::move(existing) }); - } - } - return advance_existings(); - } - // We're updating a row that had pre-existing data - if (_update->is_range_tombstone()) { - assert(_existing->is_range_tombstone()); - _existing_tombstone_tracker.apply(std::move(*_existing).as_range_tombstone()); - _update_tombstone_tracker.apply(std::move(*_update).as_range_tombstone()); - } else if (_update->is_clustering_row()) { - assert(!_existing->is_range_tombstone()); - apply_tracked_tombstones(_update_tombstone_tracker, _update->as_mutable_clustering_row()); - apply_tracked_tombstones(_existing_tombstone_tracker, _existing->as_mutable_clustering_row()); - generate_update(std::move(*_update).as_clustering_row(), { std::move(*_existing).as_clustering_row() }); - } - return advance_all(); - } - - auto tombstone = _update_tombstone_tracker.current_tombstone(); - if (tombstone && _existing) { - // We don't care if it's a range tombstone, as we're only looking for existing entries that get deleted - if (_existing->is_clustering_row()) { - auto& existing = _existing->as_clustering_row(); - auto update = clustering_row(existing.key(), row_tombstone(std::move(tombstone)), row_marker(), ::row()); - generate_update(std::move(update), { std::move(existing) }); - } - return advance_existings(); - } - - // If we have updates and it's a range tombstone, it removes nothing pre-exisiting, so we can ignore it - if (_update && _update->is_clustering_row()) { - generate_update(std::move(*_update).as_clustering_row(), { }); - return advance_updates(); - } - - return stop(); -} - -future> generate_view_updates( - const schema_ptr& base, - std::vector&& views_to_update, - streamed_mutation&& updates, - streamed_mutation_opt&& existings) { - auto vs = boost::copy_range>(views_to_update | boost::adaptors::transformed([&] (auto&& v) { - return view_updates(std::move(v), base); - })); - auto builder = std::make_unique(base, std::move(vs), std::move(updates), std::move(existings)); - auto f = builder->build(); - return f.finally([builder = std::move(builder)] { }); -} - -query::clustering_row_ranges calculate_affected_clustering_ranges(const schema& base, - const dht::decorated_key& key, - const mutation_partition& mp, - const std::vector& views) { - std::vector> row_ranges; - std::vector> view_row_ranges; - clustering_key_prefix_view::tri_compare cmp(base); - if (mp.partition_tombstone() || !mp.row_tombstones().empty()) { - for (auto&& v : views) { - // FIXME: #2371 - if (v->view_info()->select_statement().get_restrictions()->has_unrestricted_clustering_columns()) { - view_row_ranges.push_back(nonwrapping_range::make_open_ended_both_sides()); - break; - } - for (auto&& r : v->view_info()->partition_slice().default_row_ranges()) { - view_row_ranges.push_back(r.transform(std::mem_fn(&clustering_key_prefix::view))); - } - } - } - if (mp.partition_tombstone()) { - std::swap(row_ranges, view_row_ranges); - } else { - // FIXME: Optimize, as most often than not clustering keys will not be restricted. - for (auto&& rt : mp.row_tombstones()) { - nonwrapping_range rtr( - bound_view::to_range_bound(rt.start_bound()), - bound_view::to_range_bound(rt.end_bound())); - for (auto&& vr : view_row_ranges) { - auto overlap = rtr.intersection(vr, cmp); - if (overlap) { - row_ranges.push_back(std::move(overlap).value()); - } - } - } - } - - for (auto&& row : mp.clustered_rows()) { - if (update_requires_read_before_write(base, views, key, row)) { - row_ranges.emplace_back(row.key()); - } - } - - // Note that the views could have restrictions on regular columns, - // but even if that's the case we shouldn't apply those when we read, - // because even if an existing row doesn't match the view filter, the - // update can change that in which case we'll need to know the existing - // content, in case the view includes a column that is not included in - // this mutation. - - //FIXME: Unfortunate copy. - return boost::copy_range( - nonwrapping_range::deoverlap(std::move(row_ranges), cmp) - | boost::adaptors::transformed([] (auto&& v) { - return std::move(v).transform([] (auto&& ckv) { return clustering_key_prefix(ckv); }); - })); - -} - -// Calculate the node ("natural endpoint") to which this node should send -// a view update. -// -// A materialized view table is in the same keyspace as its base table, -// and in particular both have the same replication factor. Therefore it -// is possible, for a particular base partition and related view partition -// to "pair" between the base replicas and view replicas holding those -// partitions. The first (in ring order) base replica is paired with the -// first view replica, the second with the second, and so on. The purpose -// of this function is to find, assuming that this node is one of the base -// replicas for a given partition, the paired view replica. -// -// If the keyspace's replication strategy is a NetworkTopologyStrategy, -// we pair only nodes in the same datacenter. -// If one of the base replicas also happens to be a view replica, it is -// paired with itself (with the other nodes paired by order in the list -// after taking this node out). -// -// If the assumption that the given base token belongs to this replica -// does not hold, we return an empty optional. -static stdx::optional -get_view_natural_endpoint(const sstring& keyspace_name, - const dht::token& base_token, const dht::token& view_token) { - auto &db = service::get_local_storage_service().db().local(); - auto& rs = db.find_keyspace(keyspace_name).get_replication_strategy(); - auto my_address = utils::fb_utilities::get_broadcast_address(); - auto my_datacenter = locator::i_endpoint_snitch::get_local_snitch_ptr()->get_datacenter(my_address); - bool network_topology = dynamic_cast(&rs); - std::vector base_endpoints, view_endpoints; - for (auto&& base_endpoint : rs.get_natural_endpoints(base_token)) { - if (!network_topology || locator::i_endpoint_snitch::get_local_snitch_ptr()->get_datacenter(base_endpoint) == my_datacenter) { - base_endpoints.push_back(base_endpoint); - } - } - - for (auto&& view_endpoint : rs.get_natural_endpoints(view_token)) { - // If this base replica is also one of the view replicas, we use - // ourselves as the view replica. - if (view_endpoint == my_address) { - return view_endpoint; - } - // We have to remove any endpoint which is shared between the base - // and the view, as it will select itself and throw off the counts - // otherwise. - auto it = std::find(base_endpoints.begin(), base_endpoints.end(), - view_endpoint); - if (it != base_endpoints.end()) { - base_endpoints.erase(it); - } else if (!network_topology || locator::i_endpoint_snitch::get_local_snitch_ptr()->get_datacenter(view_endpoint) == my_datacenter) { - view_endpoints.push_back(view_endpoint); - } - } - - assert(base_endpoints.size() == view_endpoints.size()); - auto base_it = std::find(base_endpoints.begin(), base_endpoints.end(), my_address); - if (base_it == base_endpoints.end()) { - // This node is not a base replica of this key, so we return empty - return {}; - } - return view_endpoints[base_it - base_endpoints.begin()]; -} - -// Take the view mutations generated by generate_view_updates(), which pertain -// to a modification of a single base partition, and apply them to the -// appropriate paired replicas. This is done asynchronously - we do not wait -// for the writes to complete. -// FIXME: I dropped a lot of parameters the Cassandra version had, -// we may need them back: writeCommitLog, baseComplete, queryStartNanoTime. -void mutate_MV(const dht::token& base_token, - std::vector mutations) -{ -#if 0 - Tracing.trace("Determining replicas for mutation"); - final String localDataCenter = DatabaseDescriptor.getEndpointSnitch().getDatacenter(FBUtilities.getBroadcastAddress()); - long startTime = System.nanoTime(); - - try - { - // if we haven't joined the ring, write everything to batchlog because paired replicas may be stale - final UUID batchUUID = UUIDGen.getTimeUUID(); - - if (StorageService.instance.isStarting() || StorageService.instance.isJoining() || StorageService.instance.isMoving()) - { - BatchlogManager.store(Batch.createLocal(batchUUID, FBUtilities.timestampMicros(), - mutations), writeCommitLog); - } - else - { - List wrappers = new ArrayList<>(mutations.size()); - List nonPairedMutations = new LinkedList<>(); - Token baseToken = StorageService.instance.getTokenMetadata().partitioner.getToken(dataKey); - - ConsistencyLevel consistencyLevel = ConsistencyLevel.ONE; - - //Since the base -> view replication is 1:1 we only need to store the BL locally - final Collection batchlogEndpoints = Collections.singleton(FBUtilities.getBroadcastAddress()); - BatchlogResponseHandler.BatchlogCleanup cleanup = new BatchlogResponseHandler.BatchlogCleanup(mutations.size(), - () -> asyncRemoveFromBatchlog(batchlogEndpoints, batchUUID)); - // add a handler for each mutation - includes checking availability, but doesn't initiate any writes, yet -#endif - for (auto& mut : mutations) { - auto view_token = mut.token(); - auto keyspace_name = mut.schema()->ks_name(); - auto paired_endpoint = get_view_natural_endpoint(keyspace_name, base_token, view_token); - auto pending_endpoints = service::get_local_storage_service().get_token_metadata().pending_endpoints_for(view_token, keyspace_name); - if (paired_endpoint) { - // When local node is the endpoint and there are no pending nodes we can - // Just apply the mutation locally. - auto my_address = utils::fb_utilities::get_broadcast_address(); - if (*paired_endpoint == my_address && pending_endpoints.empty() && - service::get_local_storage_service().is_joined()) { - // Note that we start here an asynchronous apply operation, and - // do not wait for it to complete. - // Note also that mutate_locally(mut) copies mut (in - // frozen from) so don't need to increase its lifetime. - service::get_local_storage_proxy().mutate_locally(mut).handle_exception([] (auto ep) { - vlogger.error("Error applying local view update: {}", ep); - }); - } else { -#if 0 - wrappers.add(wrapViewBatchResponseHandler(mutation, - consistencyLevel, - consistencyLevel, - Collections.singletonList(pairedEndpoint.get()), - baseComplete, - WriteType.BATCH, - cleanup, - queryStartNanoTime)); -#endif - // FIXME: Temporary hack: send the write directly to paired_endpoint, - // without a batchlog, and without checking for success - // Note we don't wait for the asynchronous operation to complete - // FIXME: need to extend mut's lifetime??? - service::get_local_storage_proxy().send_to_endpoint(mut, *paired_endpoint, db::write_type::VIEW).handle_exception([paired_endpoint] (auto ep) { - vlogger.error("Error applying view update to {}: {}", *paired_endpoint, ep); - });; - } - } else { -#if 0 - //if there are no paired endpoints there are probably range movements going on, - //so we write to the local batchlog to replay later - if (pendingEndpoints.isEmpty()) - vlogger.warn("Received base materialized view mutation for key {} that does not belong " + - "to this node. There is probably a range movement happening (move or decommission)," + - "but this node hasn't updated its ring metadata yet. Adding mutation to " + - "local batchlog to be replayed later.", - mutation.key()); - nonPairedMutations.add(mutation); - } -#endif - } - } -#if 0 - if (!wrappers.isEmpty()) - { - // Apply to local batchlog memtable in this thread - BatchlogManager.store(Batch.createLocal(batchUUID, FBUtilities.timestampMicros(), Lists.transform(wrappers, w -> w.mutation)), - writeCommitLog); - - // now actually perform the writes and wait for them to complete - asyncWriteBatchedMutations(wrappers, localDataCenter, Stage.VIEW_MUTATION); - } -#endif -#if 0 - if (!nonPairedMutations.isEmpty()) - { - BatchlogManager.store(Batch.createLocal(batchUUID, FBUtilities.timestampMicros(), nonPairedMutations), - writeCommitLog); - } - } -#endif -#if 0 - } - finally - { - viewWriteMetrics.addNano(System.nanoTime() - startTime); - } -#endif -} - -} // namespace view -} // namespace db - diff --git a/scylla/db/view/view.hh b/scylla/db/view/view.hh deleted file mode 100644 index f9b2f62..0000000 --- a/scylla/db/view/view.hh +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright (C) 2016 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "dht/i_partitioner.hh" -#include "gc_clock.hh" -#include "query-request.hh" -#include "schema.hh" -#include "streamed_mutation.hh" -#include "stdx.hh" - -namespace db { - -namespace view { - -/** - * Whether the view filter considers the specified partition key. - * - * @param base the base table schema. - * @param view_info the view info. - * @param key the partition key that is updated. - * @return false if we can guarantee that inserting an update for specified key - * won't affect the view in any way, true otherwise. - */ -bool partition_key_matches(const schema& base, const view_info& view, const dht::decorated_key& key); - -/** - * Whether the view might be affected by the provided update. - * - * Note that having this method return true is not an absolute guarantee that the view will be - * updated, just that it most likely will, but a false return guarantees it won't be affected. - * - * @param base the base table schema. - * @param view_info the view info. - * @param key the partition key that is updated. - * @param update the base table update being applied. - * @return false if we can guarantee that inserting update for key - * won't affect the view in any way, true otherwise. - */ -bool may_be_affected_by(const schema& base, const view_info& view, const dht::decorated_key& key, const rows_entry& update); - -/** - * Whether a given base row matches the view filter (and thus if the view should have a corresponding entry). - * - * Note that this differs from may_be_affected_by in that the provide row must be the current - * state of the base row, not just some updates to it. This function also has no false positive: a base - * row either does or doesn't match the view filter. - * - * Also note that this function doesn't check the partition key, as it assumes the upper layers - * have already filtered out the views that are not affected. - * - * @param base the base table schema. - * @param view_info the view info. - * @param key the partition key that is updated. - * @param update the current state of a particular base row. - * @param now the current time in seconds (to decide what is live and what isn't). - * @return whether the base row matches the view filter. - */ -bool matches_view_filter(const schema& base, const view_info& view, const partition_key& key, const clustering_row& update, gc_clock::time_point now); - -bool clustering_prefix_matches(const schema& base, const partition_key& key, const clustering_key_prefix& ck); - -future> generate_view_updates( - const schema_ptr& base, - std::vector&& views_to_update, - streamed_mutation&& updates, - streamed_mutation_opt&& existings); - -query::clustering_row_ranges calculate_affected_clustering_ranges( - const schema& base, - const dht::decorated_key& key, - const mutation_partition& mp, - const std::vector& views); - -void mutate_MV(const dht::token& base_token, - std::vector mutations); - -} - -} diff --git a/scylla/db/write_type.hh b/scylla/db/write_type.hh deleted file mode 100644 index 6c343b2..0000000 --- a/scylla/db/write_type.hh +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include -#include - -namespace db { - -enum class write_type : uint8_t { - SIMPLE, - BATCH, - UNLOGGED_BATCH, - COUNTER, - BATCH_LOG, - CAS, - VIEW, -}; - -std::ostream& operator<<(std::ostream& os, const write_type& t); - -} - - diff --git a/scylla/db_clock.hh b/scylla/db_clock.hh deleted file mode 100644 index cf82c46..0000000 --- a/scylla/db_clock.hh +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (C) 2014 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "clocks-impl.hh" -#include "gc_clock.hh" - -#include -#include -#include -#include - -// the database clock follows Java - 1ms granularity, 64-bit counter, 1970 epoch - -class db_clock final { -public: - using base = std::chrono::system_clock; - using rep = int64_t; - using period = std::ratio<1, 1000>; // milliseconds - using duration = std::chrono::duration; - using time_point = std::chrono::time_point; - - static constexpr bool is_steady = base::is_steady; - static constexpr std::time_t to_time_t(time_point t) { - return std::chrono::duration_cast(t.time_since_epoch()).count(); - } - static constexpr time_point from_time_t(std::time_t t) { - return time_point(std::chrono::duration_cast(std::chrono::seconds(t))); - } - static time_point now() { - return time_point(std::chrono::duration_cast(base::now().time_since_epoch())) + get_clocks_offset(); - } -}; - -static inline -gc_clock::time_point to_gc_clock(db_clock::time_point tp) { - // Converting time points through `std::time_t` means that we don't have to make any assumptions about the epochs - // of `gc_clock` and `db_clock`, though we require that that the period of `gc_clock` is also 1 s like - // `std::time_t` to avoid loss of information. - { - using second = std::ratio<1, 1>; - static_assert( - std::is_same::value, - "Conversion via std::time_t would lose information."); - } - - return gc_clock::from_time_t(db_clock::to_time_t(tp)); -} diff --git a/scylla/debug.hh b/scylla/debug.hh deleted file mode 100644 index 3d3a2f5..0000000 --- a/scylla/debug.hh +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include - -class database; - -namespace debug { - -extern seastar::sharded* db; - - -} - diff --git a/scylla/dht/boot_strapper.cc b/scylla/dht/boot_strapper.cc deleted file mode 100644 index a15c440..0000000 --- a/scylla/dht/boot_strapper.cc +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "dht/boot_strapper.hh" -#include "service/storage_service.hh" -#include "dht/range_streamer.hh" -#include "gms/failure_detector.hh" -#include "log.hh" - -static logging::logger blogger("boot_strapper"); - -namespace dht { - -future<> boot_strapper::bootstrap() { - blogger.debug("Beginning bootstrap process: sorted_tokens={}", _token_metadata.sorted_tokens()); - - auto streamer = make_lw_shared(_db, _token_metadata, _tokens, _address, "Bootstrap"); - streamer->add_source_filter(std::make_unique(gms::get_local_failure_detector())); - for (const auto& keyspace_name : _db.local().get_non_system_keyspaces()) { - auto& ks = _db.local().find_keyspace(keyspace_name); - auto& strategy = ks.get_replication_strategy(); - dht::token_range_vector ranges = strategy.get_pending_address_ranges(_token_metadata, _tokens, _address); - blogger.debug("Will stream keyspace={}, ranges={}", keyspace_name, ranges); - streamer->add_ranges(keyspace_name, ranges); - } - - return streamer->fetch_async().then_wrapped([streamer] (auto&& f) { - try { - auto state = f.get0(); - } catch (...) { - throw std::runtime_error(sprint("Error during boostrap: %s", std::current_exception())); - } - service::get_local_storage_service().finish_bootstrapping(); - return make_ready_future<>(); - }); -} - -std::unordered_set boot_strapper::get_bootstrap_tokens(token_metadata metadata, database& db) { - auto initial_tokens = db.get_initial_tokens(); - // if user specified tokens, use those - if (initial_tokens.size() > 0) { - blogger.debug("tokens manually specified as {}", initial_tokens); - std::unordered_set tokens; - for (auto& token_string : initial_tokens) { - auto token = dht::global_partitioner().from_sstring(token_string); - if (metadata.get_endpoint(token)) { - throw std::runtime_error(sprint("Bootstrapping to existing token %s is not allowed (decommission/removenode the old node first).", token_string)); - } - tokens.insert(token); - } - blogger.debug("Get manually specified bootstrap_tokens={}", tokens); - return tokens; - } - - size_t num_tokens = db.get_config().num_tokens(); - if (num_tokens < 1) { - throw std::runtime_error("num_tokens must be >= 1"); - } - - if (num_tokens == 1) { - blogger.warn("Picking random token for a single vnode. You should probably add more vnodes; failing that, you should probably specify the token manually"); - } - - auto tokens = get_random_tokens(metadata, num_tokens); - blogger.debug("Get random bootstrap_tokens={}", tokens); - return tokens; -} - -std::unordered_set boot_strapper::get_random_tokens(token_metadata metadata, size_t num_tokens) { - std::unordered_set tokens; - while (tokens.size() < num_tokens) { - auto token = global_partitioner().get_random_token(); - auto ep = metadata.get_endpoint(token); - if (!ep) { - tokens.emplace(token); - } - } - return tokens; -} - - -} // namespace dht diff --git a/scylla/dht/boot_strapper.hh b/scylla/dht/boot_strapper.hh deleted file mode 100644 index e734f72..0000000 --- a/scylla/dht/boot_strapper.hh +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ -#pragma once -#include "gms/inet_address.hh" -#include "locator/token_metadata.hh" -#include "dht/i_partitioner.hh" -#include -#include "database.hh" -#include "db/config.hh" - -namespace dht { - -class boot_strapper { - using inet_address = gms::inet_address; - using token_metadata = locator::token_metadata; - using token = dht::token; - distributed& _db; - /* endpoint that needs to be bootstrapped */ - inet_address _address; - /* token of the node being bootstrapped. */ - std::unordered_set _tokens; - token_metadata _token_metadata; -public: - boot_strapper(distributed& db, inet_address addr, std::unordered_set tokens, token_metadata tmd) - : _db(db) - , _address(addr) - , _tokens(tokens) - , _token_metadata(tmd) { - } - - future<> bootstrap(); - - /** - * if initialtoken was specified, use that (split on comma). - * otherwise, if num_tokens == 1, pick a token to assume half the load of the most-loaded node. - * else choose num_tokens tokens at random - */ - static std::unordered_set get_bootstrap_tokens(token_metadata metadata, database& db); - - static std::unordered_set get_random_tokens(token_metadata metadata, size_t num_tokens); -#if 0 - public static class StringSerializer implements IVersionedSerializer - { - public static final StringSerializer instance = new StringSerializer(); - - public void serialize(String s, DataOutputPlus out, int version) throws IOException - { - out.writeUTF(s); - } - - public String deserialize(DataInput in, int version) throws IOException - { - return in.readUTF(); - } - - public long serializedSize(String s, int version) - { - return TypeSizes.NATIVE.sizeof(s); - } - } -#endif -}; - -} // namespace dht diff --git a/scylla/dht/byte_ordered_partitioner.cc b/scylla/dht/byte_ordered_partitioner.cc deleted file mode 100644 index a5e34ef..0000000 --- a/scylla/dht/byte_ordered_partitioner.cc +++ /dev/null @@ -1,187 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "byte_ordered_partitioner.hh" -#include "utils/class_registrator.hh" -#include "utils/div_ceil.hh" -#include -#include - -namespace dht { - -static const boost::multiprecision::cpp_int cppint_one{1}; - -token byte_ordered_partitioner::get_random_token() -{ - bytes b(bytes::initialized_later(), 16); - *unaligned_cast(b.begin()) = dht::get_random_number(); - *unaligned_cast(b.begin() + 8) = dht::get_random_number(); - return token(token::kind::key, std::move(b)); -} - -static float ratio_helper(boost::multiprecision::cpp_int a, boost::multiprecision::cpp_int b, unsigned sigbits) { - static boost::multiprecision::cpp_int cppint_max = cppint_one << sigbits; - boost::multiprecision::cpp_int val; - if (a >= b) { - val = a - b; - } else { - val = cppint_max - (b - a); - } - boost::multiprecision::cpp_dec_float_100 f1(val); - boost::multiprecision::cpp_dec_float_100 f2(cppint_max); - boost::multiprecision::cpp_dec_float_100 ratio = f1 / f2; - return ratio.convert_to(); -} - -boost::multiprecision::cpp_int cppint_token(const token& t) { - boost::multiprecision::cpp_int ret{0}; - - // If the token is minimum token, token._data will be empty, - // zero will be returned - for (uint8_t d : t._data) { - ret = (ret << 8) + d; - } - - return ret; -} - -std::map byte_ordered_partitioner::describe_ownership(const std::vector& sorted_tokens) { - std::map ownerships; - auto i = sorted_tokens.begin(); - - // 0-case - if (i == sorted_tokens.end()) { - throw runtime_exception("No nodes present in the cluster. Has this node finished starting up?"); - } - // 1-case - if (sorted_tokens.size() == 1) { - ownerships[sorted_tokens[0]] = 1.0; - // n-case - } else { - unsigned sigbits = 0; - for (auto const& t : sorted_tokens) { - sigbits = std::max(sigbits, t._data.size() * 8); - } - - const token& start = sorted_tokens[0]; - auto ti = cppint_token(start); // The first token and its value - auto cppint_start = ti; - auto tim1 = ti; // The last token and its value (after loop) - for (i++; i != sorted_tokens.end(); i++) { - ti = cppint_token(*i); // The next token and its value - ownerships[*i]= ratio_helper(ti, tim1, sigbits); // save (T(i) -> %age) - tim1 = ti; - } - - // The start token's range extends backward to the last token, which is why both were saved above. - ownerships[start] = ratio_helper(cppint_start, ti, sigbits); - } - - return ownerships; -} - -token byte_ordered_partitioner::midpoint(const token& t1, const token& t2) const { - unsigned sigbytes = std::max(t1._data.size(), t2._data.size()); - if (sigbytes == 0) { - // The midpoint of two minimum token is minimum token - return minimum_token(); - } - - auto l1 = cppint_token(t1); - auto l2 = cppint_token(t2); - auto sum = l1 + l2; - bool remainder = bit_test(sum, 0); - boost::multiprecision::cpp_int mid; - if (t1 <= t2) { - mid = sum / 2; - } else { - boost::multiprecision::cpp_int max = cppint_one << (sigbytes * 8); - mid = (sum / 2 + max / 2) % max; - } - - std::vector t; - t.reserve(sigbytes + (remainder ? 1 : 0)); - // E.g., mid = 0x123456, sigbytes = 4, remainder = true - while (mid) { - t.push_back(mid.convert_to()); - mid >>= 8; - } - // now t = 0x56 0x34 0x12 - - // Make the midpoint token of the same length as t1 or t2 whichever is longer - while (t.size() < sigbytes) { - t.push_back(0x00); - } - // now t = 0x56 0x34 0x12 0x00 - - std::reverse(t.begin(), t.end()); - // now t = 0x00 0x12 0x34 0x56 - - // Add one byte with the value 0x80 to the end of the byte array to present - // the remainder - if (remainder) { - t.push_back(0x80); - } - // now t = 0x00 0x12 0x34 0x56 0x80 - - return token(token::kind::key, managed_bytes(t.data(), t.size())); -} - -unsigned -byte_ordered_partitioner::shard_of(const token& t) const { - switch (t._kind) { - case token::kind::before_all_keys: - return 0; - case token::kind::after_all_keys: - return _shard_count - 1; - case token::kind::key: - if (t._data.empty()) { - return 0; - } - // treat first byte as a fraction in the range [0, 1) and divide it evenly: - return (uint8_t(t._data[0]) * _shard_count) >> 8; - } - assert(0); -} - -token -byte_ordered_partitioner::token_for_next_shard(const token& t, shard_id shard, unsigned spans) const { - switch (t._kind) { - case token::kind::after_all_keys: - return maximum_token(); - case token::kind::before_all_keys: - case token::kind::key: - auto orig = shard_of(t); - if (shard <= orig || spans != 1) { - return maximum_token(); - } - auto e = div_ceil(shard << 8, _shard_count); - return token(token::kind::key, managed_bytes({int8_t(e)})); - } - assert(0); -} - - -using registry = class_registrator; -static registry registrator("org.apache.cassandra.dht.ByteOrderedPartitioner"); -static registry registrator_short_name("ByteOrderedPartitioner"); - -} diff --git a/scylla/dht/byte_ordered_partitioner.hh b/scylla/dht/byte_ordered_partitioner.hh deleted file mode 100644 index bd4cb0e..0000000 --- a/scylla/dht/byte_ordered_partitioner.hh +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "i_partitioner.hh" -#include "bytes.hh" - -#include "sstables/key.hh" - -namespace dht { - -class byte_ordered_partitioner final : public i_partitioner { -public: - byte_ordered_partitioner(unsigned shard_count = smp::count, unsigned ignore_msb = 0) : i_partitioner(shard_count) {} - virtual const sstring name() const { return "org.apache.cassandra.dht.ByteOrderedPartitioner"; } - virtual token get_token(const schema& s, partition_key_view key) override { - auto&& legacy = key.legacy_form(s); - return token(token::kind::key, bytes(legacy.begin(), legacy.end())); - } - virtual token get_token(const sstables::key_view& key) override { - auto v = bytes_view(key); - if (v.empty()) { - return minimum_token(); - } - return token(token::kind::key, bytes(v.begin(), v.end())); - } - virtual token get_token(const bytes& key) override { - auto v = bytes_view(key); - if (v.empty()) { - return minimum_token(); - } - return token(token::kind::key, bytes(v.begin(), v.end())); - } - virtual token get_random_token() override; - virtual bool preserves_order() override { return true; } - virtual std::map describe_ownership(const std::vector& sorted_tokens) override; - virtual data_type get_token_validator() override { return bytes_type; } - virtual int tri_compare(const token& t1, const token& t2) const override { - return compare_unsigned(t1._data, t2._data); - } - virtual token midpoint(const token& t1, const token& t2) const; - virtual sstring to_sstring(const dht::token& t) const override { - if (t._kind == dht::token::kind::before_all_keys) { - return sstring(); - } else { - return to_hex(t._data); - } - } - virtual dht::token from_sstring(const sstring& t) const override { - if (t.empty()) { - return minimum_token(); - } else { - auto data = from_hex(t); - return token(token::kind::key, bytes(data.begin(), data.end())); - } - } - virtual dht::token from_bytes(bytes_view bytes) const override { - if (bytes.empty()) { - return minimum_token(); - } else { - return token(token::kind::key, bytes); - } - } - virtual unsigned shard_of(const token& t) const override; - virtual token token_for_next_shard(const token& t, shard_id shard, unsigned spans) const override; -}; - -} diff --git a/scylla/dht/i_partitioner.cc b/scylla/dht/i_partitioner.cc deleted file mode 100644 index 3ed1a83..0000000 --- a/scylla/dht/i_partitioner.cc +++ /dev/null @@ -1,564 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "i_partitioner.hh" -#include "core/reactor.hh" -#include "murmur3_partitioner.hh" -#include "utils/class_registrator.hh" -#include "types.hh" -#include "utils/murmur_hash.hh" -#include "utils/div_ceil.hh" -#include -#include -#include -#include "sstables/key.hh" - -namespace dht { - -static const token min_token{ token::kind::before_all_keys, {} }; -static const token max_token{ token::kind::after_all_keys, {} }; - -const token& -minimum_token() { - return min_token; -} - -const token& -maximum_token() { - return max_token; -} - -// result + overflow bit -std::pair -add_bytes(bytes_view b1, bytes_view b2, bool carry = false) { - auto sz = std::max(b1.size(), b2.size()); - auto expand = [sz] (bytes_view b) { - bytes ret(bytes::initialized_later(), sz); - auto bsz = b.size(); - auto p = std::copy(b.begin(), b.end(), ret.begin()); - std::fill_n(p, sz - bsz, 0); - return ret; - }; - auto eb1 = expand(b1); - auto eb2 = expand(b2); - auto p1 = eb1.begin(); - auto p2 = eb2.begin(); - unsigned tmp = carry; - for (size_t idx = 0; idx < sz; ++idx) { - tmp += uint8_t(p1[sz - idx - 1]); - tmp += uint8_t(p2[sz - idx - 1]); - p1[sz - idx - 1] = tmp; - tmp >>= std::numeric_limits::digits; - } - return { std::move(eb1), bool(tmp) }; -} - -bytes -shift_right(bool carry, bytes b) { - unsigned tmp = carry; - auto sz = b.size(); - auto p = b.begin(); - for (size_t i = 0; i < sz; ++i) { - auto lsb = p[i] & 1; - p[i] = (tmp << std::numeric_limits::digits) | uint8_t(p[i]) >> 1; - tmp = lsb; - } - return b; -} - -token -midpoint_unsigned_tokens(const token& t1, const token& t2) { - // calculate the average of the two tokens. - // before_all_keys is implicit 0, after_all_keys is implicit 1. - bool c1 = t1._kind == token::kind::after_all_keys; - bool c2 = t1._kind == token::kind::after_all_keys; - if (c1 && c2) { - // both end-of-range tokens? - return t1; - } - // we can ignore beginning-of-range, since their representation is 0.0 - auto sum_carry = add_bytes(t1._data, t2._data); - auto& sum = sum_carry.first; - // if either was end-of-range, we added 0.0, so pretend we added 1.0 and - // and got a carry: - bool carry = sum_carry.second || c1 || c2; - auto avg = shift_right(carry, std::move(sum)); - if (t1 > t2) { - // wrap around the ring. We really want (t1 + (t2 + 1.0)) / 2, so add 0.5. - // example: midpoint(0.9, 0.2) == midpoint(0.9, 1.2) == 1.05 == 0.05 - // == (0.9 + 0.2) / 2 + 0.5 (mod 1) - if (avg.size() > 0) { - avg[0] ^= 0x80; - } - } - return token{token::kind::key, std::move(avg)}; -} - -int tri_compare(const token& t1, const token& t2) { - if (t1._kind == t2._kind) { - return global_partitioner().tri_compare(t1, t2); - } else if (t1._kind < t2._kind) { - return -1; - } - return 1; -} - -bool operator==(const token& t1, const token& t2) -{ - if (t1._kind != t2._kind) { - return false; - } else if (t1._kind == token::kind::key) { - return global_partitioner().is_equal(t1, t2); - } - return true; -} - -bool operator<(const token& t1, const token& t2) -{ - if (t1._kind < t2._kind) { - return true; - } else if (t1._kind == token::kind::key && t2._kind == token::kind::key) { - return global_partitioner().is_less(t1, t2); - } - return false; -} - -std::ostream& operator<<(std::ostream& out, const token& t) { - if (t._kind == token::kind::after_all_keys) { - out << "maximum token"; - } else if (t._kind == token::kind::before_all_keys) { - out << "minimum token"; - } else { - out << global_partitioner().to_sstring(t); - } - return out; -} - -std::ostream& operator<<(std::ostream& out, const decorated_key& dk) { - return out << "{key: " << dk._key << ", token:" << dk._token << "}"; -} - -// FIXME: make it per-keyspace -std::unique_ptr default_partitioner; - -void set_global_partitioner(const sstring& class_name, unsigned ignore_msb) -{ - try { - default_partitioner = create_object(class_name, smp::count, ignore_msb); - } catch (std::exception& e) { - auto supported_partitioners = ::join(", ", class_registry::classes() | - boost::adaptors::map_keys); - throw std::runtime_error(sprint("Partitioner %s is not supported, supported partitioners = { %s } : %s", - class_name, supported_partitioners, e.what())); - } -} - -i_partitioner& -global_partitioner() { - if (!default_partitioner) { - default_partitioner = std::make_unique(smp::count, 12); - } - return *default_partitioner; -} - -bool -decorated_key::equal(const schema& s, const decorated_key& other) const { - if (_token == other._token) { - return _key.legacy_equal(s, other._key); - } - return false; -} - -int -decorated_key::tri_compare(const schema& s, const decorated_key& other) const { - auto r = dht::tri_compare(_token, other._token); - if (r != 0) { - return r; - } else { - return _key.legacy_tri_compare(s, other._key); - } -} - -int -decorated_key::tri_compare(const schema& s, const ring_position& other) const { - auto r = dht::tri_compare(_token, other.token()); - if (r != 0) { - return r; - } else if (other.has_key()) { - return _key.legacy_tri_compare(s, *other.key()); - } - return -other.relation_to_keys(); -} - -bool -decorated_key::less_compare(const schema& s, const ring_position& other) const { - return tri_compare(s, other) < 0; -} - -bool -decorated_key::less_compare(const schema& s, const decorated_key& other) const { - return tri_compare(s, other) < 0; -} - -decorated_key::less_comparator::less_comparator(schema_ptr s) - : s(std::move(s)) -{ } - -bool -decorated_key::less_comparator::operator()(const decorated_key& lhs, const decorated_key& rhs) const { - return lhs.less_compare(*s, rhs); -} - -bool -decorated_key::less_comparator::operator()(const ring_position& lhs, const decorated_key& rhs) const { - return rhs.tri_compare(*s, lhs) > 0; -} - -bool -decorated_key::less_comparator::operator()(const decorated_key& lhs, const ring_position& rhs) const { - return lhs.tri_compare(*s, rhs) < 0; -} - -std::ostream& operator<<(std::ostream& out, const ring_position& pos) { - out << "{" << pos.token(); - if (pos.has_key()) { - out << ", " << *pos.key(); - } else { - out << ", " << ((pos.relation_to_keys() < 0) ? "start" : "end"); - } - return out << "}"; -} - -std::ostream& operator<<(std::ostream& out, ring_position_view pos) { - out << "{" << pos._token; - if (pos._key) { - out << ", " << *pos._key; - } - out << ", w=" << static_cast(pos._weight); - return out << "}"; -} - -unsigned shard_of(const token& t) { - return global_partitioner().shard_of(t); -} - -stdx::optional -selective_token_range_sharder::next() { - if (_done) { - return {}; - } - while (_range.overlaps(dht::token_range(_start_boundary, {}), dht::token_comparator()) - && !(_start_boundary && _start_boundary->value() == maximum_token())) { - auto end_token = _partitioner.token_for_next_shard(_start_token, _next_shard); - auto candidate = dht::token_range(std::move(_start_boundary), range_bound(end_token, false)); - auto intersection = _range.intersection(std::move(candidate), dht::token_comparator()); - _start_token = _partitioner.token_for_next_shard(end_token, _shard); - _start_boundary = range_bound(_start_token); - if (intersection) { - return *intersection; - } - } - - _done = true; - return {}; -} - -stdx::optional -ring_position_range_sharder::next(const schema& s) { - if (_done) { - return {}; - } - auto shard = _range.start() ? _partitioner.shard_of(_range.start()->value().token()) : _partitioner.shard_of_minimum_token(); - auto next_shard = shard + 1 < _partitioner.shard_count() ? shard + 1 : 0; - auto shard_boundary_token = _partitioner.token_for_next_shard(_range.start() ? _range.start()->value().token() : minimum_token(), next_shard); - auto shard_boundary = ring_position::starting_at(shard_boundary_token); - if ((!_range.end() || shard_boundary.less_compare(s, _range.end()->value())) - && shard_boundary_token != maximum_token()) { - // split the range at end_of_shard - auto start = _range.start(); - auto end = range_bound(shard_boundary, false); - _range = dht::partition_range( - range_bound(std::move(shard_boundary), true), - std::move(_range.end())); - return ring_position_range_and_shard{dht::partition_range(std::move(start), std::move(end)), shard}; - } - _done = true; - return ring_position_range_and_shard{std::move(_range), shard}; -} - - -ring_position_exponential_sharder::ring_position_exponential_sharder(const i_partitioner& partitioner, partition_range pr) - : _partitioner(partitioner) - , _range(std::move(pr)) - , _last_ends(_partitioner.shard_count()) { - if (_range.start()) { - _first_shard = _next_shard = _partitioner.shard_of(_range.start()->value().token()); - } -} - -ring_position_exponential_sharder::ring_position_exponential_sharder(partition_range pr) - : ring_position_exponential_sharder(global_partitioner(), std::move(pr)) { -} - -stdx::optional -ring_position_exponential_sharder::next(const schema& s) { - auto ret = ring_position_exponential_sharder_result{}; - ret.per_shard_ranges.reserve(std::min(_spans_per_iteration, _partitioner.shard_count())); - ret.inorder = _spans_per_iteration <= _partitioner.shard_count(); - unsigned spans_to_go = _spans_per_iteration; - auto cmp = ring_position_comparator(s); - auto spans_per_shard = _spans_per_iteration / _partitioner.shard_count(); - auto shards_with_extra_span = _spans_per_iteration % _partitioner.shard_count(); - auto first_shard = _next_shard; - _next_shard = (_next_shard + _spans_per_iteration) % _partitioner.shard_count(); - for (auto i : boost::irange(0u, std::min(_partitioner.shard_count(), _spans_per_iteration))) { - auto shard = (first_shard + i) % _partitioner.shard_count(); - if (_last_ends[shard] && *_last_ends[shard] == maximum_token()) { - continue; - } - range_bound this_shard_start = [&] { - if (_last_ends[shard]) { - return range_bound(ring_position::starting_at(*_last_ends[shard])); - } else { - return _range.start().value_or(range_bound(ring_position::starting_at(minimum_token()))); - } - }(); - // token_for_next_span() may give us the wrong boundary on the first pass, so add an extra span: - auto extra_span = !_last_ends[shard] && shard != _first_shard; - auto spans = spans_per_shard + unsigned(i < shards_with_extra_span); - auto boundary = _partitioner.token_for_next_shard(this_shard_start.value().token(), shard, spans + extra_span); - auto proposed_range = partition_range(this_shard_start, range_bound(ring_position::starting_at(boundary), false)); - auto intersection = _range.intersection(proposed_range, cmp); - if (!intersection) { - continue; - } - spans_to_go -= spans; - auto this_shard_result = ring_position_range_and_shard{std::move(*intersection), shard}; - _last_ends[shard] = boundary; - ret.per_shard_ranges.push_back(std::move(this_shard_result)); - } - if (ret.per_shard_ranges.empty()) { - return stdx::nullopt; - } - _spans_per_iteration *= 2; - return stdx::make_optional(std::move(ret)); -} - - -ring_position_exponential_vector_sharder::ring_position_exponential_vector_sharder(const std::vector>& ranges) - : _ranges(std::begin(ranges), std::end(ranges)) { - if (!_ranges.empty()) { - _current_sharder.emplace(_ranges.front()); - _ranges.pop_front(); - ++_element; - } -} - -stdx::optional -ring_position_exponential_vector_sharder::next(const schema& s) { - if (!_current_sharder) { - return stdx::nullopt; - } - while (true) { // yuch - auto ret = _current_sharder->next(s); - if (ret) { - auto augmented = ring_position_exponential_vector_sharder_result{std::move(*ret), _element}; - return stdx::make_optional(std::move(augmented)); - } - if (_ranges.empty()) { - _current_sharder = stdx::nullopt; - return stdx::nullopt; - } - _current_sharder.emplace(_ranges.front()); - _ranges.pop_front(); - ++_element; - } -} - - -ring_position_range_vector_sharder::ring_position_range_vector_sharder(dht::partition_range_vector ranges) - : _ranges(std::move(ranges)) - , _current_range(_ranges.begin()) { - next_range(); -} - -stdx::optional -ring_position_range_vector_sharder::next(const schema& s) { - if (!_current_sharder) { - return stdx::nullopt; - } - auto range_and_shard = _current_sharder->next(s); - while (!range_and_shard && _current_range != _ranges.end()) { - next_range(); - range_and_shard = _current_sharder->next(s); - } - auto ret = stdx::optional(); - if (range_and_shard) { - ret.emplace(std::move(*range_and_shard), _current_range - _ranges.begin() - 1); - } - return ret; -} - - -std::vector -split_range_to_single_shard(const i_partitioner& partitioner, const schema& s, const partition_range& pr, shard_id shard) { - auto cmp = ring_position_comparator(s); - auto ret = std::vector(); - auto next_shard = shard + 1 == partitioner.shard_count() ? 0 : shard + 1; - auto start_token = pr.start() ? pr.start()->value().token() : minimum_token(); - auto start_shard = partitioner.shard_of(start_token); - auto start_boundary = start_shard == shard ? pr.start() : range_bound(ring_position::starting_at(partitioner.token_for_next_shard(start_token, shard))); - while (pr.overlaps(partition_range(start_boundary, {}), cmp) - && !(start_boundary && start_boundary->value().token() == maximum_token())) { - auto end_token = partitioner.token_for_next_shard(start_token, next_shard); - auto candidate = partition_range(std::move(start_boundary), range_bound(ring_position::starting_at(end_token), false)); - auto intersection = pr.intersection(std::move(candidate), cmp); - if (intersection) { - ret.push_back(std::move(*intersection)); - } - start_token = partitioner.token_for_next_shard(end_token, shard); - start_boundary = range_bound(ring_position::starting_at(start_token)); - } - return ret; -} - -std::vector -split_range_to_single_shard(const schema& s, const partition_range& pr, shard_id shard) { - return split_range_to_single_shard(global_partitioner(), s, pr, shard); -} - - -int ring_position::tri_compare(const schema& s, const ring_position& o) const { - return ring_position_comparator(s)(*this, o); -} - -int token_comparator::operator()(const token& t1, const token& t2) const { - return tri_compare(t1, t2); -} - -bool ring_position::equal(const schema& s, const ring_position& other) const { - return tri_compare(s, other) == 0; -} - -bool ring_position::less_compare(const schema& s, const ring_position& other) const { - return tri_compare(s, other) < 0; -} - -int ring_position_comparator::operator()(ring_position_view lh, ring_position_view rh) const { - auto token_cmp = tri_compare(*lh._token, *rh._token); - if (token_cmp) { - return token_cmp; - } - if (lh._key && rh._key) { - auto c = lh._key->legacy_tri_compare(s, *rh._key); - if (c) { - return c; - } - return lh._weight - rh._weight; - } - if (!lh._key && !rh._key) { - return lh._weight - rh._weight; - } else if (!lh._key) { - return lh._weight > 0 ? 1 : -1; - } else { - return rh._weight > 0 ? -1 : 1; - } -} - -int ring_position_comparator::operator()(ring_position_view lh, sstables::decorated_key_view rh) const { - auto token_cmp = tri_compare(*lh._token, rh.token()); - if (token_cmp) { - return token_cmp; - } - if (lh._key) { - auto rel = rh.key().tri_compare(s, *lh._key); - if (rel) { - return -rel; - } - } - return lh._weight; -} - -int ring_position_comparator::operator()(sstables::decorated_key_view a, ring_position_view b) const { - return -(*this)(b, a); -} - -dht::partition_range -to_partition_range(dht::token_range r) { - using bound_opt = std::experimental::optional; - auto start = r.start() - ? bound_opt(dht::ring_position(r.start()->value(), - r.start()->is_inclusive() - ? dht::ring_position::token_bound::start - : dht::ring_position::token_bound::end)) - : bound_opt(); - - auto end = r.end() - ? bound_opt(dht::ring_position(r.end()->value(), - r.end()->is_inclusive() - ? dht::ring_position::token_bound::end - : dht::ring_position::token_bound::start)) - : bound_opt(); - - return { std::move(start), std::move(end) }; -} - -std::map -split_range_to_shards(dht::partition_range pr, const schema& s) { - std::map ret; - auto sharder = dht::ring_position_range_sharder(std::move(pr)); - auto rprs = sharder.next(s); - while (rprs) { - ret[rprs->shard].emplace_back(rprs->ring_range); - rprs = sharder.next(s); - } - return ret; -} - -std::map -split_ranges_to_shards(const dht::token_range_vector& ranges, const schema& s) { - std::map ret; - for (const auto& range : ranges) { - auto pr = dht::to_partition_range(range); - auto sharder = dht::ring_position_range_sharder(std::move(pr)); - auto rprs = sharder.next(s); - while (rprs) { - ret[rprs->shard].emplace_back(rprs->ring_range); - rprs = sharder.next(s); - } - } - return ret; -} - -} - -namespace std { - -size_t -hash::hash_large_token(const managed_bytes& b) const { - auto read_bytes = boost::irange(0, b.size()) - | boost::adaptors::transformed([&b] (size_t idx) { return b[idx]; }); - std::array result; - utils::murmur_hash::hash3_x64_128(read_bytes.begin(), b.size(), 0, result); - return result[0]; -} - -} diff --git a/scylla/dht/i_partitioner.hh b/scylla/dht/i_partitioner.hh deleted file mode 100644 index a09377f..0000000 --- a/scylla/dht/i_partitioner.hh +++ /dev/null @@ -1,729 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "core/shared_ptr.hh" -#include "core/sstring.hh" -#include "types.hh" -#include "keys.hh" -#include "utils/managed_bytes.hh" -#include "stdx.hh" -#include -#include -#include -#include -#include - -namespace sstables { - -class key_view; -class decorated_key_view; - -} - -namespace dht { - -// -// Origin uses a complex class hierarchy where Token is an abstract class, -// and various subclasses use different implementations (LongToken vs. -// BigIntegerToken vs. StringToken), plus other variants to to signify the -// the beginning of the token space etc. -// -// We'll fold all of that into the token class and push all of the variations -// into its users. - -class decorated_key; -class token; -class ring_position; - -using partition_range = nonwrapping_range; -using token_range = nonwrapping_range; - -using partition_range_vector = std::vector; -using token_range_vector = std::vector; - -class token { -public: - enum class kind { - before_all_keys, - key, - after_all_keys, - }; - kind _kind; - // _data can be interpreted as a big endian binary fraction - // in the range [0.0, 1.0). - // - // So, [] == 0.0 - // [0x00] == 0.0 - // [0x80] == 0.5 - // [0x00, 0x80] == 1/512 - // [0xff, 0x80] == 1 - 1/512 - managed_bytes _data; - - token() : _kind(kind::before_all_keys) { - } - - token(kind k, managed_bytes d) : _kind(std::move(k)), _data(std::move(d)) { - } - - bool is_minimum() const { - return _kind == kind::before_all_keys; - } - - bool is_maximum() const { - return _kind == kind::after_all_keys; - } -}; - -token midpoint_unsigned(const token& t1, const token& t2); -const token& minimum_token(); -const token& maximum_token(); -bool operator==(const token& t1, const token& t2); -bool operator<(const token& t1, const token& t2); -int tri_compare(const token& t1, const token& t2); -inline bool operator!=(const token& t1, const token& t2) { return std::rel_ops::operator!=(t1, t2); } -inline bool operator>(const token& t1, const token& t2) { return std::rel_ops::operator>(t1, t2); } -inline bool operator<=(const token& t1, const token& t2) { return std::rel_ops::operator<=(t1, t2); } -inline bool operator>=(const token& t1, const token& t2) { return std::rel_ops::operator>=(t1, t2); } -std::ostream& operator<<(std::ostream& out, const token& t); - -template -inline auto get_random_number() { - static thread_local std::random_device rd; - static thread_local std::default_random_engine re(rd()); - static thread_local std::uniform_int_distribution dist{}; - return dist(re); -} - -// Wraps partition_key with its corresponding token. -// -// Total ordering defined by comparators is compatible with Origin's ordering. -class decorated_key { -public: - dht::token _token; - partition_key _key; - - struct less_comparator { - schema_ptr s; - less_comparator(schema_ptr s); - bool operator()(const decorated_key& k1, const decorated_key& k2) const; - bool operator()(const decorated_key& k1, const ring_position& k2) const; - bool operator()(const ring_position& k1, const decorated_key& k2) const; - }; - - bool equal(const schema& s, const decorated_key& other) const; - - bool less_compare(const schema& s, const decorated_key& other) const; - bool less_compare(const schema& s, const ring_position& other) const; - - // Trichotomic comparators defining total ordering on the union of - // decorated_key and ring_position objects. - int tri_compare(const schema& s, const decorated_key& other) const; - int tri_compare(const schema& s, const ring_position& other) const; - - const dht::token& token() const { - return _token; - } - - const partition_key& key() const { - return _key; - } -}; - - -class decorated_key_equals_comparator { - const schema& _schema; -public: - explicit decorated_key_equals_comparator(const schema& schema) : _schema(schema) {} - bool operator()(const dht::decorated_key& k1, const dht::decorated_key& k2) const { - return k1.equal(_schema, k2); - } -}; - -using decorated_key_opt = std::experimental::optional; - -class i_partitioner { -protected: - unsigned _shard_count; -public: - explicit i_partitioner(unsigned shard_count) : _shard_count(shard_count) {} - virtual ~i_partitioner() {} - - /** - * Transform key to object representation of the on-disk format. - * - * @param key the raw, client-facing key - * @return decorated version of key - */ - decorated_key decorate_key(const schema& s, const partition_key& key) { - return { get_token(s, key), key }; - } - - /** - * Transform key to object representation of the on-disk format. - * - * @param key the raw, client-facing key - * @return decorated version of key - */ - decorated_key decorate_key(const schema& s, partition_key&& key) { - auto token = get_token(s, key); - return { std::move(token), std::move(key) }; - } - - /** - * Calculate a token representing the approximate "middle" of the given - * range. - * - * @return The approximate midpoint between left and right. - */ - virtual token midpoint(const token& left, const token& right) const = 0; - - /** - * @return A token smaller than all others in the range that is being partitioned. - * Not legal to assign to a node or key. (But legal to use in range scans.) - */ - token get_minimum_token() { - return dht::minimum_token(); - } - - /** - * @return a token that can be used to route a given key - * (This is NOT a method to create a token from its string representation; - * for that, use tokenFactory.fromString.) - */ - virtual token get_token(const schema& s, partition_key_view key) = 0; - virtual token get_token(const sstables::key_view& key) = 0; - virtual token get_token(const bytes& key) = 0; - - - /** - * @return a partitioner-specific string representation of this token - */ - virtual sstring to_sstring(const dht::token& t) const = 0; - - /** - * @return a token from its partitioner-specific string representation - */ - virtual dht::token from_sstring(const sstring& t) const = 0; - - /** - * @return a token from its partitioner-specific byte representation - */ - virtual dht::token from_bytes(bytes_view bytes) const = 0; - - /** - * @return a randomly generated token - */ - virtual token get_random_token() = 0; - - // FIXME: token.tokenFactory - //virtual token.tokenFactory gettokenFactory() = 0; - - /** - * @return True if the implementing class preserves key order in the tokens - * it generates. - */ - virtual bool preserves_order() = 0; - - /** - * Calculate the deltas between tokens in the ring in order to compare - * relative sizes. - * - * @param sortedtokens a sorted List of tokens - * @return the mapping from 'token' to 'percentage of the ring owned by that token'. - */ - virtual std::map describe_ownership(const std::vector& sorted_tokens) = 0; - - virtual data_type get_token_validator() = 0; - - /** - * @return name of partitioner. - */ - virtual const sstring name() const = 0; - - /** - * Calculates the shard that handles a particular token. - */ - virtual unsigned shard_of(const token& t) const = 0; - - /** - * Gets the first token greater than `t` that is in shard `shard`, and is a shard boundary (its first token). - * - * If the `spans` parameter is greater than zero, the result is the same as if the function - * is called `spans` times, each time applied to its return value, but efficiently. This allows - * selecting ranges that include multiple round trips around the 0..smp::count-1 shard span: - * - * token_for_next_shard(t, shard, spans) == token_for_next_shard(token_for_shard(t, shard, 1), spans - 1) - * - * On overflow, maximum_token() is returned. - */ - virtual token token_for_next_shard(const token& t, shard_id shard, unsigned spans = 1) const = 0; - - /** - * Gets the first shard of the minimum token. - */ - unsigned shard_of_minimum_token() const { - return 0; // hardcoded for now; unlikely to change - } - - /** - * @return bytes that represent the token as required by get_token_validator(). - */ - virtual bytes token_to_bytes(const token& t) const { - return bytes(t._data.begin(), t._data.end()); - } - - /** - * @return < 0 if if t1's _data array is less, t2's. 0 if they are equal, and > 0 otherwise. _kind comparison should be done separately. - */ - virtual int tri_compare(const token& t1, const token& t2) const = 0; - /** - * @return true if t1's _data array is equal t2's. _kind comparison should be done separately. - */ - bool is_equal(const token& t1, const token& t2) const { - return tri_compare(t1, t2) == 0; - } - /** - * @return true if t1's _data array is less then t2's. _kind comparison should be done separately. - */ - bool is_less(const token& t1, const token& t2) const { - return tri_compare(t1, t2) < 0; - } - - /** - * @return number of shards configured for this partitioner - */ - unsigned shard_count() const { - return _shard_count; - } - - friend bool operator==(const token& t1, const token& t2); - friend bool operator<(const token& t1, const token& t2); - friend int tri_compare(const token& t1, const token& t2); -}; - -// -// Represents position in the ring of partitions, where partitions are ordered -// according to decorated_key ordering (first by token, then by key value). -// Intended to be used for defining partition ranges. -// -// The 'key' part is optional. When it's absent, this object represents a position -// which is either before or after all keys sharing given token. That's determined -// by relation_to_keys(). -// -// For example for the following data: -// -// tokens: | t1 | t2 | -// +----+----+----+ -// keys: | k1 | k2 | k3 | -// -// The ordering is: -// -// ring_position(t1, token_bound::start) < ring_position(k1) -// ring_position(k1) < ring_position(k2) -// ring_position(k1) == decorated_key(k1) -// ring_position(k2) == decorated_key(k2) -// ring_position(k2) < ring_position(t1, token_bound::end) -// ring_position(k2) < ring_position(k3) -// ring_position(t1, token_bound::end) < ring_position(t2, token_bound::start) -// -// Maps to org.apache.cassandra.db.RowPosition and its derivatives in Origin. -// -class ring_position { -public: - enum class token_bound : int8_t { start = -1, end = 1 }; -private: - friend class ring_position_comparator; - dht::token _token; - token_bound _token_bound; // valid when !_key - std::experimental::optional _key; -public: - static ring_position min() { - return { minimum_token(), token_bound::start }; - } - - static ring_position max() { - return { maximum_token(), token_bound::end }; - } - - static ring_position starting_at(dht::token token) { - return { std::move(token), token_bound::start }; - } - - static ring_position ending_at(dht::token token) { - return { std::move(token), token_bound::end }; - } - - ring_position(dht::token token, token_bound bound) - : _token(std::move(token)) - , _token_bound(bound) - { } - - ring_position(dht::token token, partition_key key) - : _token(std::move(token)) - , _key(std::experimental::make_optional(std::move(key))) - { } - - ring_position(dht::token token, token_bound bound, std::experimental::optional key) - : _token(std::move(token)) - , _token_bound(bound) - , _key(std::move(key)) - { } - - ring_position(const dht::decorated_key& dk) - : _token(dk._token) - , _key(std::experimental::make_optional(dk._key)) - { } - - const dht::token& token() const { - return _token; - } - - // Valid when !has_key() - token_bound bound() const { - return _token_bound; - } - - // Returns -1 if smaller than keys with the same token, +1 if greater. - int relation_to_keys() const { - return _key ? 0 : static_cast(_token_bound); - } - - const std::experimental::optional& key() const { - return _key; - } - - bool has_key() const { - return bool(_key); - } - - // Call only when has_key() - dht::decorated_key as_decorated_key() const { - return { _token, *_key }; - } - - bool equal(const schema&, const ring_position&) const; - - // Trichotomic comparator defining a total ordering on ring_position objects - int tri_compare(const schema&, const ring_position&) const; - - // "less" comparator corresponding to tri_compare() - bool less_compare(const schema&, const ring_position&) const; - - friend std::ostream& operator<<(std::ostream&, const ring_position&); -}; - -// Non-owning version of ring_position. -// -// Unlike ring_position, it can express positions which are right after and right before the keys. -// ring_position still can not because it is sent between nodes and such a position -// would not be (yet) properly interpreted by old nodes. That's why any ring_position -// can be converted to ring_position_view, but not the other way. -// -// It is possible to express a partition_range using a pair of two ring_position_views v1 and v2, -// where v1 = ring_position_view::for_range_start(r) and v2 = ring_position_view::for_range_end(r). -// Such range includes all keys k such that v1 <= k < v2, with order defined by ring_position_comparator. -// -class ring_position_view { - friend class ring_position_comparator; - - // Order is lexicographical on (_token, _key) tuples, where _key part may be missing, and - // _weight affecting order between tuples if one is a prefix of the other (including being equal). - // A positive weight puts the position after all strictly prefixed by it, while a non-positive - // weight puts it before them. If tuples are equal, the order is further determined by _weight. - // - // For example {_token=t1, _key=nullptr, _weight=1} is ordered after {_token=t1, _key=k1, _weight=0}, - // but {_token=t1, _key=nullptr, _weight=-1} is ordered before it. - // - const dht::token* _token; // always not nullptr - const partition_key* _key; // Can be nullptr - int8_t _weight; -public: - struct after_key_tag {}; - using after_key = bool_class; - - static ring_position_view min() { - return { minimum_token(), nullptr, -1 }; - } - - static ring_position_view max() { - return { maximum_token(), nullptr, 1 }; - } - - bool is_min() const { - return _token->is_minimum(); - } - - bool is_max() const { - return _token->is_maximum(); - } - - static ring_position_view for_range_start(const partition_range& r) { - return r.start() ? ring_position_view(r.start()->value(), after_key(!r.start()->is_inclusive())) : min(); - } - - static ring_position_view for_range_end(const partition_range& r) { - return r.end() ? ring_position_view(r.end()->value(), after_key(r.end()->is_inclusive())) : max(); - } - - static ring_position_view for_after_key(const dht::decorated_key& dk) { - return ring_position_view(dk, after_key::yes); - } - - static ring_position_view for_after_key(dht::ring_position_view view) { - return ring_position_view(after_key_tag(), view); - } - - ring_position_view(const dht::ring_position& pos, after_key after = after_key::no) - : _token(&pos.token()) - , _key(pos.has_key() ? &*pos.key() : nullptr) - , _weight(pos.has_key() ? bool(after) : pos.relation_to_keys()) - { } - - ring_position_view(const ring_position_view& pos) = default; - ring_position_view& operator=(const ring_position_view& other) = default; - - ring_position_view(after_key_tag, const ring_position_view& v) - : _token(v._token) - , _key(v._key) - , _weight(v._key ? 1 : v._weight) - { } - - ring_position_view(const dht::decorated_key& key, after_key after_key = after_key::no) - : _token(&key.token()) - , _key(&key.key()) - , _weight(bool(after_key)) - { } - - ring_position_view(const dht::token& token, partition_key* key, int8_t weight) - : _token(&token) - , _key(key) - , _weight(weight) - { } - - const partition_key* key() const { return _key; } - - friend std::ostream& operator<<(std::ostream&, ring_position_view); -}; - -// Trichotomic comparator for ring order -struct ring_position_comparator { - const schema& s; - ring_position_comparator(const schema& s_) : s(s_) {} - int operator()(ring_position_view, ring_position_view) const; - int operator()(ring_position_view, sstables::decorated_key_view) const; - int operator()(sstables::decorated_key_view, ring_position_view) const; -}; - -// "less" comparator giving the same order as ring_position_comparator -struct ring_position_less_comparator { - ring_position_comparator tri; - - ring_position_less_comparator(const schema& s) : tri(s) {} - - template - bool operator()(const T& lh, const U& rh) const { - return tri(lh, rh) < 0; - } -}; - -struct token_comparator { - // Return values are those of a trichotomic comparison. - int operator()(const token& t1, const token& t2) const; -}; - -std::ostream& operator<<(std::ostream& out, const token& t); - -std::ostream& operator<<(std::ostream& out, const decorated_key& t); - -void set_global_partitioner(const sstring& class_name, unsigned ignore_msb = 0); -i_partitioner& global_partitioner(); - -unsigned shard_of(const token&); - -struct ring_position_range_and_shard { - dht::partition_range ring_range; - unsigned shard; -}; - -class ring_position_range_sharder { - const i_partitioner& _partitioner; - dht::partition_range _range; - bool _done = false; -public: - explicit ring_position_range_sharder(nonwrapping_range rrp) - : ring_position_range_sharder(global_partitioner(), std::move(rrp)) {} - ring_position_range_sharder(const i_partitioner& partitioner, nonwrapping_range rrp) - : _partitioner(partitioner), _range(std::move(rrp)) {} - stdx::optional next(const schema& s); -}; - -struct ring_position_range_and_shard_and_element : ring_position_range_and_shard { - ring_position_range_and_shard_and_element(ring_position_range_and_shard&& rpras, unsigned element) - : ring_position_range_and_shard(std::move(rpras)), element(element) { - } - unsigned element; -}; - -struct ring_position_exponential_sharder_result { - std::vector per_shard_ranges; - bool inorder = true; -}; - -// given a ring_position range, generates exponentially increasing -// sets per-shard sub-ranges -class ring_position_exponential_sharder { - const i_partitioner& _partitioner; - partition_range _range; - unsigned _spans_per_iteration = 1; - unsigned _first_shard = 0; - unsigned _next_shard = 0; - std::vector> _last_ends; // index = shard -public: - explicit ring_position_exponential_sharder(partition_range pr); - explicit ring_position_exponential_sharder(const i_partitioner& partitioner, partition_range pr); - stdx::optional next(const schema& s); -}; - -struct ring_position_exponential_vector_sharder_result : ring_position_exponential_sharder_result { - ring_position_exponential_vector_sharder_result(ring_position_exponential_sharder_result rpesr, unsigned element) - : ring_position_exponential_sharder_result(std::move(rpesr)), element(element) {} - unsigned element; // range within vector from which this result came -}; - - -// given a vector of sorted, disjoint ring_position ranges, generates exponentially increasing -// sets per-shard sub-ranges. May be non-exponential when moving from one ring position range to another. -class ring_position_exponential_vector_sharder { - std::deque> _ranges; - stdx::optional _current_sharder; - unsigned _element = 0; -public: - explicit ring_position_exponential_vector_sharder(const std::vector>& ranges); - stdx::optional next(const schema& s); -}; - -class ring_position_range_vector_sharder { - using vec_type = dht::partition_range_vector; - vec_type _ranges; - vec_type::iterator _current_range; - stdx::optional _current_sharder; -private: - void next_range() { - if (_current_range != _ranges.end()) { - _current_sharder.emplace(std::move(*_current_range++)); - } - } -public: - explicit ring_position_range_vector_sharder(dht::partition_range_vector ranges); - // results are returned sorted by index within the vector first, then within each vector item - stdx::optional next(const schema& s); -}; - -dht::partition_range to_partition_range(dht::token_range); - -// Each shard gets a sorted, disjoint vector of ranges -std::map -split_range_to_shards(dht::partition_range pr, const schema& s); - -// If input ranges are sorted and disjoint then the ranges for each shard -// are also sorted and disjoint. -std::map -split_ranges_to_shards(const dht::token_range_vector& ranges, const schema& s); - -// Intersect a partition_range with a shard and return the the resulting sub-ranges, in sorted order -std::vector split_range_to_single_shard(const schema& s, const dht::partition_range& pr, shard_id shard); -std::vector split_range_to_single_shard(const i_partitioner& partitioner, const schema& s, const dht::partition_range& pr, shard_id shard); - -class selective_token_range_sharder { - const i_partitioner& _partitioner; - dht::token_range _range; - shard_id _shard; - bool _done = false; - shard_id _next_shard; - dht::token _start_token; - stdx::optional> _start_boundary; -public: - explicit selective_token_range_sharder(dht::token_range range, shard_id shard) - : selective_token_range_sharder(global_partitioner(), std::move(range), shard) {} - selective_token_range_sharder(const i_partitioner& partitioner, dht::token_range range, shard_id shard) - : _partitioner(partitioner) - , _range(std::move(range)) - , _shard(shard) - , _next_shard(_shard + 1 == _partitioner.shard_count() ? 0 : _shard + 1) - , _start_token(_range.start() ? _range.start()->value() : minimum_token()) - , _start_boundary(_partitioner.shard_of(_start_token) == shard ? - _range.start() : range_bound(_partitioner.token_for_next_shard(_start_token, shard))) { - } - stdx::optional next(); -}; - -} // dht - -namespace std { -template<> -struct hash { - size_t operator()(const dht::token& t) const { - size_t ret = 0; - const auto& b = t._data; - if (b.size() <= sizeof(ret)) { // practically always - std::copy_n(b.data(), b.size(), reinterpret_cast(&ret)); - } else { - ret = hash_large_token(b); - } - return ret; - } -private: - size_t hash_large_token(const managed_bytes& b) const; -}; - -template <> -struct hash { - size_t operator()(const dht::decorated_key& k) const { - auto h_token = hash(); - return h_token(k.token()); - } -}; - - -} - - diff --git a/scylla/dht/murmur3_partitioner.hh b/scylla/dht/murmur3_partitioner.hh deleted file mode 100644 index 6702e8e..0000000 --- a/scylla/dht/murmur3_partitioner.hh +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "i_partitioner.hh" -#include "bytes.hh" -#include - -namespace dht { - -class murmur3_partitioner final : public i_partitioner { - unsigned _sharding_ignore_msb_bits; - std::vector _shard_start = init_zero_based_shard_start(_shard_count, _sharding_ignore_msb_bits); -public: - murmur3_partitioner(unsigned shard_count = smp::count, unsigned sharding_ignore_msb_bits = 0) - : i_partitioner(shard_count) - // if one shard, ignore sharding_ignore_msb_bits as they will just cause needless - // range breaks - , _sharding_ignore_msb_bits(shard_count > 1 ? sharding_ignore_msb_bits : 0) { - } - virtual const sstring name() const { return "org.apache.cassandra.dht.Murmur3Partitioner"; } - virtual token get_token(const schema& s, partition_key_view key) override; - virtual token get_token(const sstables::key_view& key) override; - virtual token get_token(const bytes& key) override; - virtual token get_random_token() override; - virtual bool preserves_order() override { return false; } - virtual std::map describe_ownership(const std::vector& sorted_tokens) override; - virtual data_type get_token_validator() override; - virtual int tri_compare(const token& t1, const token& t2) const override; - virtual token midpoint(const token& t1, const token& t2) const override; - virtual sstring to_sstring(const dht::token& t) const override; - virtual dht::token from_sstring(const sstring& t) const override; - virtual dht::token from_bytes(bytes_view bytes) const override; - - virtual unsigned shard_of(const token& t) const override; - virtual token token_for_next_shard(const token& t, shard_id shard, unsigned spans) const override; -private: - using uint128_t = unsigned __int128; - static int64_t normalize(int64_t in); - token get_token(bytes_view key); - token get_token(uint64_t value) const; - token bias(uint64_t value) const; // translate from a zero-baed range - uint64_t unbias(const token& t) const; // translate to a zero-baed range - static unsigned zero_based_shard_of(uint64_t zero_based_token, unsigned shards, unsigned sharding_ignore_msb_bits); - static std::vector init_zero_based_shard_start(unsigned shards, unsigned sharding_ignore_msb_bits); -}; - - -} - diff --git a/scylla/dht/random_partitioner.cc b/scylla/dht/random_partitioner.cc deleted file mode 100644 index 0bcf128..0000000 --- a/scylla/dht/random_partitioner.cc +++ /dev/null @@ -1,272 +0,0 @@ -/* - * Copyright (C) 2016 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "md5_hasher.hh" -#include "random_partitioner.hh" -#include "utils/class_registrator.hh" -#include "utils/div_ceil.hh" -#include - -namespace dht { - -static const boost::multiprecision::uint128_t cppint_one{1}; -static const boost::multiprecision::uint128_t cppint127_max = cppint_one << 127; - -// Convert token's byte array to integer value. -static boost::multiprecision::uint128_t token_to_cppint(const token& t) { - boost::multiprecision::uint128_t ret{0}; - // If the token is minimum token, token._data will be empty, - // zero will be returned - for (uint8_t d : t._data) { - ret = (ret << 8) + d; - } - return ret; -} - -// Store integer value for the token into token's byte array. The value must be within [0, 2 ^ 127]. -static token cppint_to_token(boost::multiprecision::uint128_t i) { - if (i == 0) { - return minimum_token(); - } - if (i > cppint127_max) { - throw std::runtime_error(sprint("RandomPartitioner value %s must be within [0, 2 ^ 127]", i)); - } - std::vector t; - while (i) { - static boost::multiprecision::uint128_t byte_mask = 0xFF; - auto data = (i & byte_mask).convert_to(); - t.push_back(data); - i >>= 8; - } - std::reverse(t.begin(), t.end()); - return token(token::kind::key, managed_bytes(t.data(), t.size())); -} - -// Convert a 16 bytes long raw byte array to token. Byte 0 is the most significant byte. -static token bytes_to_token(bytes digest) { - if (digest.size() != 16) { - throw std::runtime_error(sprint("RandomPartitioner digest should be 16 bytes, it is %d", digest.size())); - } - // Translates the bytes array to signed integer i, - // abs(i) is stored in token's _data array. - if (digest[0] & 0x80) { - boost::multiprecision::uint128_t i = 0; - for (uint8_t d : digest) { - i = (i << 8) + d; - } - // i = abs(i) = ~i + 1 - i = ~i + 1; - return cppint_to_token(i); - } else { - return token(token::kind::key, std::move(digest)); - } -} - -static float ratio_helper(boost::multiprecision::uint128_t a, boost::multiprecision::uint128_t b) { - boost::multiprecision::uint128_t val; - if (a >= b) { - val = a - b; - } else { - val = cppint127_max - (b - a); - } - return static_cast(val.convert_to() * 0x1p-127); -} - -token random_partitioner::get_token_impl(bytes data) { - md5_hasher h; - h.update(reinterpret_cast(data.c_str()), data.size()); - return bytes_to_token(h.finalize()); -} - -token random_partitioner::get_token(const schema& s, partition_key_view key) { - auto&& legacy = key.legacy_form(s); - return get_token_impl(bytes(legacy.begin(), legacy.end())); -} - -token random_partitioner::get_token(const sstables::key_view& key) { - auto v = bytes_view(key); - if (v.empty()) { - return minimum_token(); - } - return get_token_impl(bytes(v.begin(), v.end())); -} - -token random_partitioner::get_token(const bytes& key) { - auto v = bytes_view(key); - if (v.empty()) { - return minimum_token(); - } - return get_token(bytes(v.begin(), v.end())); -} - -int random_partitioner::tri_compare(const token& t1, const token& t2) const { - auto l1 = token_to_cppint(t1); - auto l2 = token_to_cppint(t2); - - if (l1 == l2) { - return 0; - } else { - return l1 < l2 ? -1 : 1; - } -} - -token random_partitioner::get_random_token() { - boost::multiprecision::uint128_t i = dht::get_random_number(); - i = (i << 64) + dht::get_random_number(); - if (i > cppint127_max) { - i = ~i + 1; - } - return cppint_to_token(i); -} - -std::map random_partitioner::describe_ownership(const std::vector& sorted_tokens) { - std::map ownerships; - auto i = sorted_tokens.begin(); - - // 0-case - if (i == sorted_tokens.end()) { - throw runtime_exception("No nodes present in the cluster. Has this node finished starting up?"); - } - // 1-case - if (sorted_tokens.size() == 1) { - ownerships[sorted_tokens[0]] = 1.0; - // n-case - } else { - const token& start = sorted_tokens[0]; - auto ti = token_to_cppint(start); // The first token and its value - auto cppint_start = ti; - auto tim1 = ti; // The last token and its value (after loop) - for (i++; i != sorted_tokens.end(); i++) { - ti = token_to_cppint(*i); // The next token and its value - ownerships[*i]= ratio_helper(ti, tim1); // save (T(i) -> %age) - tim1 = ti; - } - - // The start token's range extends backward to the last token, which is why both were saved above. - ownerships[start] = ratio_helper(cppint_start, ti); - } - - return ownerships; -} - -token random_partitioner::midpoint(const token& t1, const token& t2) const { - unsigned sigbytes = std::max(t1._data.size(), t2._data.size()); - if (sigbytes == 0) { - // The midpoint of two minimum token is minimum token - return minimum_token(); - } - static boost::multiprecision::uint128_t max = cppint_one << 127; - auto l1 = token_to_cppint(t1); - auto l2 = token_to_cppint(t2); - auto sum = l1 + l2; - boost::multiprecision::uint128_t mid; - // t1 <= t2 is the same as l1 <= l2 - if (l1 <= l2) { - mid = sum / 2; - } else { - mid = (sum / 2 + max / 2) % max; - } - return cppint_to_token(mid); -} - -sstring random_partitioner::to_sstring(const dht::token& t) const { - if (t._kind == dht::token::kind::before_all_keys) { - return sstring(); - } else { - return token_to_cppint(t).str(); - } -} - -dht::token random_partitioner::from_sstring(const sstring& t) const { - if (t.empty()) { - return minimum_token(); - } else { - boost::multiprecision::uint128_t x(t.c_str()); - return cppint_to_token(x); - } -} - -dht::token random_partitioner::from_bytes(bytes_view bytes) const { - if (bytes.empty()) { - return minimum_token(); - } else { - return dht::token(dht::token::kind::key, bytes); - } -} - -unsigned random_partitioner::shard_of(const token& t) const { - switch (t._kind) { - case token::kind::before_all_keys: - return 0; - case token::kind::after_all_keys: - return _shard_count - 1; - case token::kind::key: - auto i = (boost::multiprecision::uint256_t(token_to_cppint(t)) * _shard_count) >> 127; - // token can be [0, 2^127], make sure smp be [0, _shard_count) - auto smp = i.convert_to(); - if (smp >= _shard_count) { - return _shard_count - 1; - } - return smp; - } - assert(0); -} - -token -random_partitioner::token_for_next_shard(const token& t, shard_id shard, unsigned spans) const { - if (_shard_count == 1) { - return maximum_token(); - } - switch (t._kind) { - case token::kind::after_all_keys: - return maximum_token(); - case token::kind::before_all_keys: - case token::kind::key: - auto orig = shard_of(t); - if (shard <= orig || spans != 1) { - return maximum_token(); - } - auto t = div_ceil(boost::multiprecision::uint256_t(shard) << 127, _shard_count); - return cppint_to_token(t.convert_to()); - } - assert(0); -} - - -bytes random_partitioner::token_to_bytes(const token& t) const { - static const bytes zero_byte(1, int8_t(0x00)); - if (t.is_minimum() || t._data.empty()) { - return zero_byte; - } - auto data = bytes(t._data.begin(), t._data.end()); - if (t._data[0] & 0x80) { - // Prepend 0x00 to the byte array to mimic BigInteger.toByteArray's - // byte array representation which has a sign bit. - return zero_byte + data; - } - return data; -} - -using registry = class_registrator; -static registry registrator("org.apache.cassandra.dht.RandomPartitioner"); -static registry registrator_short_name("RandomPartitioner"); - -} diff --git a/scylla/dht/random_partitioner.hh b/scylla/dht/random_partitioner.hh deleted file mode 100644 index bb21c13..0000000 --- a/scylla/dht/random_partitioner.hh +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (C) 2016 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "i_partitioner.hh" -#include "bytes.hh" - -#include "sstables/key.hh" - -namespace dht { - -class random_partitioner final : public i_partitioner { -public: - random_partitioner(unsigned shard_count = smp::count, unsigned ignore_msb = 0) : i_partitioner(shard_count) {} - virtual const sstring name() const { return "org.apache.cassandra.dht.RandomPartitioner"; } - virtual token get_token(const schema& s, partition_key_view key) override; - virtual token get_token(const sstables::key_view& key) override; - virtual token get_token(const bytes& key) override; - virtual token get_random_token() override; - virtual bool preserves_order() override { return false; } - virtual std::map describe_ownership(const std::vector& sorted_tokens) override; - virtual data_type get_token_validator() override { return varint_type; } - virtual bytes token_to_bytes(const token& t) const override; - virtual int tri_compare(const token& t1, const token& t2) const override; - virtual token midpoint(const token& t1, const token& t2) const; - virtual sstring to_sstring(const dht::token& t) const override; - virtual dht::token from_sstring(const sstring& t) const override; - virtual dht::token from_bytes(bytes_view bytes) const override; - virtual unsigned shard_of(const token& t) const override; - virtual token token_for_next_shard(const token& t, shard_id shard, unsigned spans) const override; -private: - token get_token_impl(bytes data); -}; - -} diff --git a/scylla/dht/range_streamer.cc b/scylla/dht/range_streamer.cc deleted file mode 100644 index 6d3a44d..0000000 --- a/scylla/dht/range_streamer.cc +++ /dev/null @@ -1,268 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "dht/range_streamer.hh" -#include "utils/fb_utilities.hh" -#include "locator/snitch_base.hh" -#include "database.hh" -#include "gms/gossiper.hh" -#include "gms/failure_detector.hh" -#include "log.hh" -#include "streaming/stream_plan.hh" -#include "streaming/stream_state.hh" -#include "service/storage_service.hh" - -namespace dht { - -logging::logger logger("range_streamer"); - -using inet_address = gms::inet_address; - -static std::unordered_map> -unordered_multimap_to_unordered_map(const std::unordered_multimap& multimap) { - std::unordered_map> ret; - for (auto x : multimap) { - ret[x.first].emplace(x.second); - } - return ret; -} - -std::unordered_multimap -range_streamer::get_range_fetch_map(const std::unordered_multimap& ranges_with_sources, - const std::unordered_set>& source_filters, - const sstring& keyspace) { - std::unordered_multimap range_fetch_map_map; - for (auto x : unordered_multimap_to_unordered_map(ranges_with_sources)) { - const dht::token_range& range_ = x.first; - const std::unordered_set& addresses = x.second; - bool found_source = false; - for (auto address : addresses) { - if (address == utils::fb_utilities::get_broadcast_address()) { - // If localhost is a source, we have found one, but we don't add it to the map to avoid streaming locally - found_source = true; - continue; - } - - auto filtered = false; - for (const auto& filter : source_filters) { - if (!filter->should_include(address)) { - filtered = true; - break; - } - } - - if (filtered) { - continue; - } - - range_fetch_map_map.emplace(address, range_); - found_source = true; - break; // ensure we only stream from one other node for each range - } - - if (!found_source) { - throw std::runtime_error(sprint("unable to find sufficient sources for streaming range %s in keyspace %s", range_, keyspace)); - } - } - - return range_fetch_map_map; -} - -std::unordered_multimap -range_streamer::get_all_ranges_with_sources_for(const sstring& keyspace_name, dht::token_range_vector desired_ranges) { - logger.debug("{} ks={}", __func__, keyspace_name); - - auto& ks = _db.local().find_keyspace(keyspace_name); - auto& strat = ks.get_replication_strategy(); - - auto tm = _metadata.clone_only_token_map(); - auto range_addresses = unordered_multimap_to_unordered_map(strat.get_range_addresses(tm)); - - std::unordered_multimap range_sources; - auto& snitch = locator::i_endpoint_snitch::get_local_snitch_ptr(); - for (auto& desired_range : desired_ranges) { - auto found = false; - for (auto& x : range_addresses) { - const range& src_range = x.first; - if (src_range.contains(desired_range, dht::tri_compare)) { - std::unordered_set& addresses = x.second; - auto preferred = snitch->get_sorted_list_by_proximity(_address, addresses); - for (inet_address& p : preferred) { - range_sources.emplace(desired_range, p); - } - found = true; - } - } - - if (!found) { - throw std::runtime_error(sprint("No sources found for %s", desired_range)); - } - } - - return range_sources; -} - -std::unordered_multimap -range_streamer::get_all_ranges_with_strict_sources_for(const sstring& keyspace_name, dht::token_range_vector desired_ranges) { - logger.debug("{} ks={}", __func__, keyspace_name); - assert (_tokens.empty() == false); - - auto& ks = _db.local().find_keyspace(keyspace_name); - auto& strat = ks.get_replication_strategy(); - - //Active ranges - auto metadata_clone = _metadata.clone_only_token_map(); - auto range_addresses = unordered_multimap_to_unordered_map(strat.get_range_addresses(metadata_clone)); - - //Pending ranges - metadata_clone.update_normal_tokens(_tokens, _address); - auto pending_range_addresses = unordered_multimap_to_unordered_map(strat.get_range_addresses(metadata_clone)); - - //Collects the source that will have its range moved to the new node - std::unordered_multimap range_sources; - - for (auto& desired_range : desired_ranges) { - for (auto& x : range_addresses) { - const range& src_range = x.first; - if (src_range.contains(desired_range, dht::tri_compare)) { - std::vector old_endpoints(x.second.begin(), x.second.end()); - auto it = pending_range_addresses.find(desired_range); - if (it == pending_range_addresses.end()) { - throw std::runtime_error(sprint("Can not find desired_range = {} in pending_range_addresses", desired_range)); - } - std::unordered_set new_endpoints = it->second; - - //Due to CASSANDRA-5953 we can have a higher RF then we have endpoints. - //So we need to be careful to only be strict when endpoints == RF - if (old_endpoints.size() == strat.get_replication_factor()) { - auto it = std::remove_if(old_endpoints.begin(), old_endpoints.end(), - [&new_endpoints] (inet_address ep) { return new_endpoints.count(ep); }); - old_endpoints.erase(it, old_endpoints.end()); - if (old_endpoints.size() != 1) { - throw std::runtime_error(sprint("Expected 1 endpoint but found %d", old_endpoints.size())); - } - } - range_sources.emplace(desired_range, old_endpoints.front()); - } - } - - //Validate - auto nr = range_sources.count(desired_range); - if (nr < 1) { - throw std::runtime_error(sprint("No sources found for %s", desired_range)); - } - - if (nr > 1) { - throw std::runtime_error(sprint("Multiple endpoints found for %s", desired_range)); - } - - inet_address source_ip = range_sources.find(desired_range)->second; - auto& gossiper = gms::get_local_gossiper(); - auto source_state = gossiper.get_endpoint_state_for_endpoint(source_ip); - if (gossiper.is_enabled() && source_state && !source_state->is_alive()) { - throw std::runtime_error(sprint("A node required to move the data consistently is down (%s). If you wish to move the data from a potentially inconsistent replica, restart the node with consistent_rangemovement=false", source_ip)); - } - } - - return range_sources; -} - -bool range_streamer::use_strict_sources_for_ranges(const sstring& keyspace_name) { - auto& ks = _db.local().find_keyspace(keyspace_name); - auto& strat = ks.get_replication_strategy(); - return !_db.local().is_replacing() - && use_strict_consistency() - && !_tokens.empty() - && _metadata.get_all_endpoints().size() != strat.get_replication_factor(); -} - -void range_streamer::add_ranges(const sstring& keyspace_name, dht::token_range_vector ranges) { - auto ranges_for_keyspace = use_strict_sources_for_ranges(keyspace_name) - ? get_all_ranges_with_strict_sources_for(keyspace_name, ranges) - : get_all_ranges_with_sources_for(keyspace_name, ranges); - - if (logger.is_enabled(logging::log_level::debug)) { - for (auto& x : ranges_for_keyspace) { - logger.debug("{} : range {} exists on {}", _description, x.first, x.second); - } - } - - std::unordered_map range_fetch_map; - for (auto& x : get_range_fetch_map(ranges_for_keyspace, _source_filters, keyspace_name)) { - range_fetch_map[x.first].emplace_back(x.second); - } - - if (logger.is_enabled(logging::log_level::debug)) { - for (auto& x : range_fetch_map) { - logger.debug("{} : range {} from source {} for keyspace {}", _description, x.second, x.first, keyspace_name); - } - } - _to_fetch.emplace(keyspace_name, std::move(range_fetch_map)); -} - -future range_streamer::fetch_async() { - for (auto& fetch : _to_fetch) { - const auto& keyspace = fetch.first; - for (auto& x : fetch.second) { - auto& source = x.first; - auto& ranges = x.second; - /* Send messages to respective folks to stream data over to me */ - if (logger.is_enabled(logging::log_level::debug)) { - logger.debug("{}ing from {} ranges {}", _description, source, ranges); - } - _stream_plan.request_ranges(source, keyspace, ranges); - } - } - - return _stream_plan.execute(); -} - -std::unordered_multimap -range_streamer::get_work_map(const std::unordered_multimap& ranges_with_source_target, - const sstring& keyspace) { - auto filter = std::make_unique(gms::get_local_failure_detector()); - std::unordered_set> source_filters; - source_filters.emplace(std::move(filter)); - return get_range_fetch_map(ranges_with_source_target, source_filters, keyspace); -} - -bool range_streamer::use_strict_consistency() { - return service::get_local_storage_service().db().local().get_config().consistent_rangemovement(); -} - -} // dht diff --git a/scylla/dht/range_streamer.hh b/scylla/dht/range_streamer.hh deleted file mode 100644 index 79f69ed..0000000 --- a/scylla/dht/range_streamer.hh +++ /dev/null @@ -1,174 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "locator/token_metadata.hh" -#include "locator/snitch_base.hh" -#include "streaming/stream_plan.hh" -#include "streaming/stream_state.hh" -#include "gms/inet_address.hh" -#include "gms/i_failure_detector.hh" -#include "range.hh" -#include -#include -#include - -class database; - -namespace dht { -/** - * Assists in streaming ranges to a node. - */ -class range_streamer { -public: - using inet_address = gms::inet_address; - using token_metadata = locator::token_metadata; - using stream_plan = streaming::stream_plan; - using stream_state = streaming::stream_state; - using i_failure_detector = gms::i_failure_detector; - static bool use_strict_consistency(); -public: - /** - * A filter applied to sources to stream from when constructing a fetch map. - */ - class i_source_filter { - public: - virtual bool should_include(inet_address endpoint) = 0; - virtual ~i_source_filter() {} - }; - - /** - * Source filter which excludes any endpoints that are not alive according to a - * failure detector. - */ - class failure_detector_source_filter : public i_source_filter { - private: - gms::i_failure_detector& _fd; - public: - failure_detector_source_filter(i_failure_detector& fd) : _fd(fd) { } - virtual bool should_include(inet_address endpoint) override { return _fd.is_alive(endpoint); } - }; - - /** - * Source filter which excludes any endpoints that are not in a specific data center. - */ - class single_datacenter_filter : public i_source_filter { - private: - sstring _source_dc; - public: - single_datacenter_filter(const sstring& source_dc) - : _source_dc(source_dc) { - } - virtual bool should_include(inet_address endpoint) override { - auto& snitch_ptr = locator::i_endpoint_snitch::get_local_snitch_ptr(); - return snitch_ptr->get_datacenter(endpoint) == _source_dc; - } - }; - - range_streamer(distributed& db, token_metadata& tm, std::unordered_set tokens, inet_address address, sstring description) - : _db(db) - , _metadata(tm) - , _tokens(std::move(tokens)) - , _address(address) - , _description(std::move(description)) - , _stream_plan(_description) { - } - - range_streamer(distributed& db, token_metadata& tm, inet_address address, sstring description) - : range_streamer(db, tm, std::unordered_set(), address, description) { - } - - void add_source_filter(std::unique_ptr filter) { - _source_filters.emplace(std::move(filter)); - } - - void add_ranges(const sstring& keyspace_name, dht::token_range_vector ranges); -private: - bool use_strict_sources_for_ranges(const sstring& keyspace_name); - /** - * Get a map of all ranges and their respective sources that are candidates for streaming the given ranges - * to us. For each range, the list of sources is sorted by proximity relative to the given destAddress. - */ - std::unordered_multimap - get_all_ranges_with_sources_for(const sstring& keyspace_name, dht::token_range_vector desired_ranges); - /** - * Get a map of all ranges and the source that will be cleaned up once this bootstrapped node is added for the given ranges. - * For each range, the list should only contain a single source. This allows us to consistently migrate data without violating - * consistency. - */ - std::unordered_multimap - get_all_ranges_with_strict_sources_for(const sstring& keyspace_name, dht::token_range_vector desired_ranges); -private: - /** - * @param rangesWithSources The ranges we want to fetch (key) and their potential sources (value) - * @param sourceFilters A (possibly empty) collection of source filters to apply. In addition to any filters given - * here, we always exclude ourselves. - * @return - */ - static std::unordered_multimap - get_range_fetch_map(const std::unordered_multimap& ranges_with_sources, - const std::unordered_set>& source_filters, - const sstring& keyspace); - -public: - static std::unordered_multimap - get_work_map(const std::unordered_multimap& ranges_with_source_target, - const sstring& keyspace); -#if 0 - - // For testing purposes - Multimap>>> toFetch() - { - return toFetch; - } -#endif -public: - future fetch_async(); -private: - distributed& _db; - token_metadata& _metadata; - std::unordered_set _tokens; - inet_address _address; - sstring _description; - std::unordered_multimap> _to_fetch; - std::unordered_set> _source_filters; - stream_plan _stream_plan; -}; - -} // dht diff --git a/scylla/dht/token_range_endpoints.hh b/scylla/dht/token_range_endpoints.hh deleted file mode 100644 index acc1bc2..0000000 --- a/scylla/dht/token_range_endpoints.hh +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB. - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General License for more details. - * - * You should have received a copy of the GNU General License - * along with Scylla. If not, see . - */ - -#pragma once -#include - -namespace dht { -struct endpoint_details { - sstring _host; - sstring _datacenter; - sstring _rack; -}; - -struct token_range_endpoints { - sstring _start_token; - sstring _end_token; - std::vector _endpoints; - std::vector _rpc_endpoints; - std::vector _endpoint_details; -}; - -} diff --git a/scylla/digest_algorithm.hh b/scylla/digest_algorithm.hh deleted file mode 100644 index 966f18d..0000000 --- a/scylla/digest_algorithm.hh +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (C) 2016 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -namespace query { - -enum class digest_algorithm : uint8_t { - none = 0, // digest not required - MD5 = 1, // default algorithm -}; - -} diff --git a/scylla/disk-error-handler.cc b/scylla/disk-error-handler.cc deleted file mode 100644 index 40f7245..0000000 --- a/scylla/disk-error-handler.cc +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright 2016 ScyllaDB - **/ - -/* This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "disk-error-handler.hh" - -thread_local io_error_handler commit_error_handler = default_io_error_handler(commit_error); -thread_local io_error_handler general_disk_error_handler = default_io_error_handler(general_disk_error); -thread_local io_error_handler sstable_write_error_handler = default_io_error_handler(sstable_write_error); - -io_error_handler default_io_error_handler(disk_error_signal_type& signal) { - return [&signal] (std::exception_ptr eptr) { - try { - std::rethrow_exception(eptr); - } catch(std::system_error& e) { - if (should_stop_on_system_error(e)) { - signal(); - throw storage_io_error(e); - } - } - }; -} - -io_error_handler_gen default_io_error_handler_gen() { - return [] (disk_error_signal_type& signal) { - return default_io_error_handler(signal); - }; -} diff --git a/scylla/disk-error-handler.hh b/scylla/disk-error-handler.hh deleted file mode 100644 index 3ff1c99..0000000 --- a/scylla/disk-error-handler.hh +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Copyright 2016 ScyllaDB - **/ - -/* This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include -#include -#include - -#include "utils/exceptions.hh" -#include - -#include "seastarx.hh" - -namespace bs2 = boost::signals2; - -using disk_error_signal_type = bs2::signal_type>::type; - -extern thread_local disk_error_signal_type commit_error; -extern thread_local disk_error_signal_type sstable_read_error; -extern thread_local disk_error_signal_type sstable_write_error; -extern thread_local disk_error_signal_type general_disk_error; - -bool should_stop_on_system_error(const std::system_error& e); - -using io_error_handler = std::function; -// stores a function that generates a io handler for a given signal. -using io_error_handler_gen = std::function; - -io_error_handler default_io_error_handler(disk_error_signal_type& signal); -// generates handler that handles exception for a given signal -io_error_handler_gen default_io_error_handler_gen(); - -extern thread_local io_error_handler commit_error_handler; -extern thread_local io_error_handler sstable_write_error_handler; -extern thread_local io_error_handler general_disk_error_handler; - -template -std::enable_if_t>::value, - std::result_of_t> -do_io_check(const io_error_handler& error_handler, Func&& func, Args&&... args) { - try { - // calling function - return func(std::forward(args)...); - } catch (...) { - error_handler(std::current_exception()); - throw; - } -} - -template>::value>::type> -auto do_io_check(const io_error_handler& error_handler, Func&& func, Args&&... args) { - try { - // calling function - auto fut = func(std::forward(args)...); - return fut.handle_exception([&] (auto ep) { - error_handler(ep); - return futurize>::make_exception_future(ep); - }); - } catch (...) { - error_handler(std::current_exception()); - throw; - } -} - -template -auto commit_io_check(Func&& func, Args&&... args) { - return do_io_check(commit_error_handler, func, std::forward(args)...); -} - -template -auto sstable_io_check(const io_error_handler& error_handler, Func&& func, Args&&... args) { - return do_io_check(error_handler, func, std::forward(args)...); -} - -template -auto io_check(const io_error_handler& error_handler, Func&& func, Args&&... args) { - return do_io_check(error_handler, general_disk_error, func, std::forward(args)...); -} - -template -auto io_check(Func&& func, Args&&... args) { - return do_io_check(general_disk_error_handler, func, std::forward(args)...); -} diff --git a/scylla/enum_set.hh b/scylla/enum_set.hh deleted file mode 100644 index b41ac59..0000000 --- a/scylla/enum_set.hh +++ /dev/null @@ -1,230 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include -#include -#include - -/** - * - * Allows to take full advantage of compile-time information when operating - * on a set of enum values. - * - * Examples: - * - * enum class x { A, B, C }; - * using my_enum = super_enum; - * using my_enumset = enum_set; - * - * static_assert(my_enumset::frozen::contains(), "it should..."); - * - * assert(my_enumset::frozen::contains(my_enumset::prepare())); - * - * assert(my_enumset::frozen::contains(x::A)); - * - */ - - -template -struct super_enum { - using enum_type = EnumType; - - template - struct max { - static constexpr enum_type max_of(enum_type a, enum_type b) { - return a > b ? a : b; - } - - template - static constexpr enum_type get() { - return max_of(first, get()); - } - - template - static constexpr enum_type get() { return first; } - - static constexpr enum_type value = get(); - }; - - template - struct min { - static constexpr enum_type min_of(enum_type a, enum_type b) { - return a < b ? a : b; - } - - template - static constexpr enum_type get() { - return min_of(first, get()); - } - - template - static constexpr enum_type get() { return first; } - - static constexpr enum_type value = get(); - }; - - using sequence_type = typename std::underlying_type::type; - - template - static constexpr sequence_type sequence_for() { - return static_cast(Elem); - } - - static sequence_type sequence_for(enum_type elem) { - return static_cast(elem); - } - - static constexpr sequence_type max_sequence = sequence_for::value>(); - static constexpr sequence_type min_sequence = sequence_for::value>(); - - static_assert(min_sequence >= 0, "negative enum values unsupported"); -}; - -template -class enum_set { -public: - using mask_type = size_t; // TODO: use the smallest sufficient type - using enum_type = typename Enum::enum_type; -private: - mask_type _mask; - constexpr enum_set(mask_type mask) : _mask(mask) {} - - template - static constexpr unsigned shift_for() { - return Enum::template sequence_for(); - } -public: - constexpr enum_set() : _mask(0) {} - - static constexpr enum_set from_mask(mask_type mask) { - return enum_set(mask); - } - - static inline mask_type mask_for(enum_type e) { - return mask_type(1) << Enum::sequence_for(e); - } - - template - static constexpr mask_type mask_for() { - return mask_type(1) << shift_for(); - } - - struct prepared { - mask_type mask; - bool operator==(const prepared& o) const { - return mask == o.mask; - } - }; - - static prepared prepare(enum_type e) { - return {mask_for(e)}; - } - - template - static constexpr prepared prepare() { - return {mask_for()}; - } - - static_assert(std::numeric_limits::max() >= ((size_t)1 << Enum::max_sequence), "mask type too small"); - - template - bool contains() const { - return bool(_mask & mask_for()); - } - - bool contains(enum_type e) const { - return bool(_mask & mask_for(e)); - } - - template - void remove() { - _mask &= ~mask_for(); - } - - void remove(enum_type e) { - _mask &= ~mask_for(e); - } - - template - void set() { - _mask |= mask_for(); - } - - template - void set_if(bool condition) { - _mask |= mask_type(condition) << shift_for(); - } - - void set(enum_type e) { - _mask |= mask_for(e); - } - - explicit operator bool() const { - return bool(_mask); - } - - mask_type mask() const { - return _mask; - } - - template - struct frozen { - template - static constexpr mask_type make_mask() { - return mask_for(); - } - - static constexpr mask_type make_mask() { - return 0; - } - - template - static constexpr mask_type make_mask() { - return mask_for() | make_mask(); - } - - static constexpr mask_type mask = make_mask(); - - template - static constexpr bool contains() { - return mask & mask_for(); - } - - static bool contains(enum_type e) { - return mask & mask_for(e); - } - - static bool contains(prepared e) { - return mask & e.mask; - } - - static enum_set unfreeze() { - return enum_set(mask); - } - }; - - template - static enum_set of() { - return frozen::unfreeze(); - } -}; diff --git a/scylla/exceptions/exceptions.cc b/scylla/exceptions/exceptions.cc deleted file mode 100644 index 2cfc657..0000000 --- a/scylla/exceptions/exceptions.cc +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include - -#include "exceptions.hh" -#include "log.hh" - -exceptions::truncate_exception::truncate_exception(std::exception_ptr ep) - : request_execution_exception(exceptions::exception_code::PROTOCOL_ERROR, sprint("Error during truncate: %s", ep)) -{} diff --git a/scylla/exceptions/exceptions.hh b/scylla/exceptions/exceptions.hh deleted file mode 100644 index 7e34bc1..0000000 --- a/scylla/exceptions/exceptions.hh +++ /dev/null @@ -1,268 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "db/consistency_level_type.hh" -#include "db/write_type.hh" -#include -#include "core/sstring.hh" -#include "core/print.hh" -#include "bytes.hh" - -namespace exceptions { - -enum class exception_code : int32_t { - SERVER_ERROR = 0x0000, - PROTOCOL_ERROR = 0x000A, - - BAD_CREDENTIALS = 0x0100, - - // 1xx: problem during request execution - UNAVAILABLE = 0x1000, - OVERLOADED = 0x1001, - IS_BOOTSTRAPPING= 0x1002, - TRUNCATE_ERROR = 0x1003, - WRITE_TIMEOUT = 0x1100, - READ_TIMEOUT = 0x1200, - - // 2xx: problem validating the request - SYNTAX_ERROR = 0x2000, - UNAUTHORIZED = 0x2100, - INVALID = 0x2200, - CONFIG_ERROR = 0x2300, - ALREADY_EXISTS = 0x2400, - UNPREPARED = 0x2500 -}; - -class cassandra_exception : public std::exception { -private: - exception_code _code; - sstring _msg; -protected: - template - static inline sstring prepare_message(const char* fmt, Args&&... args) noexcept { - try { - return sprint(fmt, std::forward(args)...); - } catch (...) { - return sstring(); - } - } -public: - cassandra_exception(exception_code code, sstring msg) noexcept - : _code(code) - , _msg(std::move(msg)) - { } - virtual const char* what() const noexcept override { return _msg.begin(); } - exception_code code() const { return _code; } - sstring get_message() const { return what(); } -}; - -class protocol_exception : public cassandra_exception { -public: - protocol_exception(sstring msg) noexcept - : exceptions::cassandra_exception{exceptions::exception_code::PROTOCOL_ERROR, std::move(msg)} - { } -}; - -struct unavailable_exception : cassandra_exception { - db::consistency_level consistency; - int32_t required; - int32_t alive; - - unavailable_exception(db::consistency_level cl, int32_t required, int32_t alive) noexcept - : exceptions::cassandra_exception(exceptions::exception_code::UNAVAILABLE, prepare_message("Cannot achieve consistency level for cl %s. Requires %ld, alive %ld", cl, required, alive)) - , consistency(cl) - , required(required) - , alive(alive) - {} -}; - -class request_execution_exception : public cassandra_exception { -public: - request_execution_exception(exception_code code, sstring msg) noexcept - : cassandra_exception(code, std::move(msg)) - { } -}; - -class truncate_exception : public request_execution_exception -{ -public: - truncate_exception(std::exception_ptr ep); -}; - -class request_timeout_exception : public cassandra_exception { -public: - db::consistency_level consistency; - int32_t received; - int32_t block_for; - - request_timeout_exception(exception_code code, const sstring& ks, const sstring& cf, db::consistency_level consistency, int32_t received, int32_t block_for) noexcept - : cassandra_exception{code, prepare_message("Operation timed out for %s.%s - received only %d responses from %d CL=%s.", ks, cf, received, block_for, consistency)} - , consistency{consistency} - , received{received} - , block_for{block_for} - { } -}; - -class read_timeout_exception : public request_timeout_exception { -public: - bool data_present; - - read_timeout_exception(const sstring& ks, const sstring& cf, db::consistency_level consistency, int32_t received, int32_t block_for, bool data_present) noexcept - : request_timeout_exception{exception_code::READ_TIMEOUT, ks, cf, consistency, received, block_for} - , data_present{data_present} - { } -}; - -struct mutation_write_timeout_exception : public request_timeout_exception { - db::write_type type; - mutation_write_timeout_exception(const sstring& ks, const sstring& cf, db::consistency_level consistency, int32_t received, int32_t block_for, db::write_type type) noexcept : - request_timeout_exception(exception_code::WRITE_TIMEOUT, ks, cf, consistency, received, block_for) - , type{std::move(type)} - { } -}; - -struct overloaded_exception : public cassandra_exception { - overloaded_exception(size_t c) noexcept : - cassandra_exception(exception_code::OVERLOADED, prepare_message("Too many in flight hints: %lu", c)) {} -}; - -class request_validation_exception : public cassandra_exception { -public: - using cassandra_exception::cassandra_exception; -}; - -class invalidated_prepared_usage_attempt_exception : public exceptions::request_validation_exception { -public: - invalidated_prepared_usage_attempt_exception() : request_validation_exception{exception_code::UNPREPARED, "Attempt to execute the invalidated prepared statement."} {} -}; - -class unauthorized_exception: public request_validation_exception { -public: - unauthorized_exception(sstring msg) noexcept - : request_validation_exception(exception_code::UNAUTHORIZED, - std::move(msg)) { - } -}; - -class authentication_exception: public request_validation_exception { -public: - authentication_exception(sstring msg) noexcept - : request_validation_exception(exception_code::BAD_CREDENTIALS, - std::move(msg)) { - } -}; - -class invalid_request_exception : public request_validation_exception { -public: - invalid_request_exception(sstring cause) noexcept - : request_validation_exception(exception_code::INVALID, std::move(cause)) - { } -}; - -class keyspace_not_defined_exception : public invalid_request_exception { -public: - keyspace_not_defined_exception(std::string cause) noexcept - : invalid_request_exception(std::move(cause)) - { } -}; - -class prepared_query_not_found_exception : public request_validation_exception { -public: - bytes id; - - prepared_query_not_found_exception(bytes id) noexcept - : request_validation_exception{exception_code::UNPREPARED, prepare_message("No prepared statement with ID %s found.", id)} - , id{id} - { } -}; - -class syntax_exception : public request_validation_exception { -public: - syntax_exception(sstring msg) noexcept - : request_validation_exception(exception_code::SYNTAX_ERROR, std::move(msg)) - { } -}; - -class configuration_exception : public request_validation_exception { -public: - configuration_exception(sstring msg) noexcept - : request_validation_exception{exception_code::CONFIG_ERROR, std::move(msg)} - { } - - configuration_exception(exception_code code, sstring msg) noexcept - : request_validation_exception{code, std::move(msg)} - { } -}; - -class already_exists_exception : public configuration_exception { -public: - const sstring ks_name; - const sstring cf_name; -private: - already_exists_exception(sstring ks_name_, sstring cf_name_, sstring msg) - : configuration_exception{exception_code::ALREADY_EXISTS, msg} - , ks_name{ks_name_} - , cf_name{cf_name_} - { } -public: - already_exists_exception(sstring ks_name_, sstring cf_name_) - : already_exists_exception{ks_name_, cf_name_, sprint("Cannot add already existing table \"%s\" to keyspace \"%s\"", cf_name_, ks_name_)} - { } - - already_exists_exception(sstring ks_name_) - : already_exists_exception{ks_name_, "", sprint("Cannot add existing keyspace \"%s\"", ks_name_)} - { } -}; - -class recognition_exception : public std::runtime_error { -public: - recognition_exception(const std::string& msg) : std::runtime_error(msg) {}; -}; - -class unsupported_operation_exception : public std::runtime_error { -public: - unsupported_operation_exception() : std::runtime_error("unsupported operation") {} - unsupported_operation_exception(const sstring& msg) : std::runtime_error("unsupported operation: " + msg) {} -}; - -} diff --git a/scylla/exceptions/unrecognized_entity_exception.hh b/scylla/exceptions/unrecognized_entity_exception.hh deleted file mode 100644 index 4293372..0000000 --- a/scylla/exceptions/unrecognized_entity_exception.hh +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "exceptions.hh" -#include "core/shared_ptr.hh" -#include "cql3/column_identifier.hh" -#include "cql3/relation.hh" - -namespace exceptions { - -/** - * Exception thrown when an entity is not recognized within a relation. - */ -class unrecognized_entity_exception : public invalid_request_exception { -public: - /** - * The unrecognized entity. - */ - ::shared_ptr entity; - - /** - * The entity relation. - */ - cql3::relation_ptr relation; - - /** - * Creates a new UnrecognizedEntityException. - * @param entity the unrecognized entity - * @param relation the entity relation - */ - unrecognized_entity_exception(::shared_ptr entity, cql3::relation_ptr relation) - : invalid_request_exception(sprint("Undefined name %s in where clause ('%s')", *entity, relation->to_string())) - , entity(entity) - , relation(relation) - { } -}; - -} diff --git a/scylla/fix_system_distributed_tables.py b/scylla/fix_system_distributed_tables.py deleted file mode 100755 index 0ca1f9a..0000000 --- a/scylla/fix_system_distributed_tables.py +++ /dev/null @@ -1,158 +0,0 @@ -#!/usr/bin/env python -# -# Copyright 2017 ScyllaDB -# -# -# This file is part of Scylla. -# -# Scylla is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# Scylla is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with Scylla. If not, see . -# -import argparse -import sys - -from cassandra.cluster import Cluster -from cassandra.auth import PlainTextAuthProvider - -events_cols = { - 'session_id' : 'uuid', - 'event_id' : 'timeuuid', - 'activity' : 'text', - 'source' : 'inet', - 'source_elapsed': 'int', - 'thread' : 'text', - 'scylla_span_id' : 'bigint', - 'scylla_parent_id' : 'bigint' -} - -sessions_cols = { - 'session_id' : 'uuid', - 'command' : 'text', - 'client' : 'inet', - 'coordinator' : 'inet', - 'duration' : 'int', - 'parameters' : 'map', - 'request' : 'text', - 'started_at' : 'timestamp' -} - -slow_query_log_cols = { - 'node_ip' : 'inet', - 'shard' : 'int', - 'session_id' : 'uuid', - 'date' : 'timestamp', - 'start_time' : 'timeuuid', - 'command' : 'text', - 'duration' : 'int', - 'parameters' : 'map', - 'source_ip' : 'inet', - 'table_names' : 'set', - 'username' : 'text' -} - -traces_tables_defs = { - 'events' : events_cols, - 'sessions' : sessions_cols, - 'node_slow_log': slow_query_log_cols -} -################################################################################ -credentials_cols = { - 'username' : 'text', - 'options' : 'map', - 'salted_hash' : 'text' -} - -permissions_cols = { - 'username' : 'text', - 'resource' : 'text', - 'permissions' : 'set' -} - -users_cols = { - 'name' : 'text', - 'super' : 'boolean' -} - -auth_tables_defs = { - 'credentials' : credentials_cols, - 'permissions' : permissions_cols, - 'users' : users_cols -} -################################################################################ -ks_defs = { - 'system_traces' : traces_tables_defs, - 'system_auth' : auth_tables_defs -} -################################################################################ -def validate_and_fix(args): - res = True - if args.user: - auth_provider = PlainTextAuthProvider(username=args.user, password=args.password) - cluster = Cluster(auth_provider=auth_provider, contact_points=[ args.node ], port=args.port) - else: - cluster = Cluster(contact_points=[ args.node ], port=args.port) - - try: - session = cluster.connect() - cluster_meta = session.cluster.metadata - for ks, tables_defs in ks_defs.items(): - if not ks in cluster_meta.keyspaces: - print("keyspace {} doesn't exist - skipping".format(ks)) - continue - - ks_meta = cluster_meta.keyspaces[ks] - for table_name, table_cols in tables_defs.items(): - - if not table_name in ks_meta.tables: - print("{}.{} doesn't exist - skipping".format(ks, table_name)) - continue - - print "Adjusting {}.{}".format(ks, table_name) - - table_meta = ks_meta.tables[table_name] - for column_name, column_type in table_cols.items(): - if column_name in table_meta.columns: - column_meta = table_meta.columns[column_name] - if column_meta.cql_type != column_type: - print("ERROR: {}.{}::{} column has an unexpected column type: expected '{}' found '{}'".format(ks, table_name, column_name, column_type, column_meta.cql_type)) - res = False - else: - try: - session.execute("ALTER TABLE {}.{} ADD {} {}".format(ks, table_name, column_name, column_type)) - print "{}.{}: added column '{}' of the type '{}'".format(ks, table_name, column_name, column_type) - except: - print "ERROR: {}.{}: failed to add column '{}' with type '{}': {}".format(ks, table_name, column_name, column_type, sys.exc_info()) - res = False - except: - print "ERROR: {}".format(sys.exc_info()) - res = False - - return res -################################################################################ -if __name__ == '__main__': - argp = argparse.ArgumentParser(description = 'Validate distributed system keyspaces') - argp.add_argument('--user', '-u') - argp.add_argument('--password', '-p', default='none') - argp.add_argument('--node', default='127.0.0.1', help='Node to connect to.') - argp.add_argument('--port', default='9042', help='Port to connect to.') - - args = argp.parse_args() - res = validate_and_fix(args) - if res: - sys.exit(0) - else: - sys.exit(1) - - - - diff --git a/scylla/fnv1a_hasher.hh b/scylla/fnv1a_hasher.hh deleted file mode 100644 index b6e33de..0000000 --- a/scylla/fnv1a_hasher.hh +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (C) 2017 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -// FIXME: FNV-1a is quite slow, consider something faster, CityHash seems to be -// a good choice. - -template -struct fnv1a_constants { }; - -template<> -struct fnv1a_constants<8> { - enum : uint64_t { - offset = 0xcbf29ce484222325ull, - prime = 0x100000001b3ull, - }; -}; - -class fnv1a_hasher { - using constants = fnv1a_constants; - size_t _hash = constants::offset; -public: - void update(const char* ptr, size_t length) { - auto end = ptr + length; - while (ptr != end) { - _hash ^= *ptr; - _hash *= constants::prime; - ++ptr; - } - } - - size_t finalize() const { - return _hash; - } -}; diff --git a/scylla/frozen_mutation.cc b/scylla/frozen_mutation.cc deleted file mode 100644 index 58a954e..0000000 --- a/scylla/frozen_mutation.cc +++ /dev/null @@ -1,269 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "frozen_mutation.hh" -#include "mutation_partition.hh" -#include "mutation.hh" -#include "counters.hh" -#include "partition_builder.hh" -#include "mutation_partition_serializer.hh" -#include "utils/UUID.hh" -#include "utils/data_input.hh" -#include "query-result-set.hh" -#include "utils/UUID.hh" -#include "serializer.hh" -#include "idl/uuid.dist.hh" -#include "idl/keys.dist.hh" -#include "idl/mutation.dist.hh" -#include "serializer_impl.hh" -#include "serialization_visitors.hh" -#include "idl/uuid.dist.impl.hh" -#include "idl/keys.dist.impl.hh" -#include "idl/mutation.dist.impl.hh" - -// -// Representation layout: -// -// ::= -// - -using namespace db; - -ser::mutation_view frozen_mutation::mutation_view() const { - auto in = ser::as_input_stream(_bytes); - return ser::deserialize(in, boost::type()); -} - -utils::UUID -frozen_mutation::column_family_id() const { - return mutation_view().table_id(); -} - -utils::UUID -frozen_mutation::schema_version() const { - return mutation_view().schema_version(); -} - -partition_key_view -frozen_mutation::key(const schema& s) const { - return _pk; -} - -dht::decorated_key -frozen_mutation::decorated_key(const schema& s) const { - return dht::global_partitioner().decorate_key(s, key(s)); -} - -partition_key frozen_mutation::deserialize_key() const { - return mutation_view().key(); -} - -frozen_mutation::frozen_mutation(bytes_ostream&& b) - : _bytes(std::move(b)) - , _pk(deserialize_key()) -{ - _bytes.reduce_chunk_count(); -} - -frozen_mutation::frozen_mutation(bytes_ostream&& b, partition_key pk) - : _bytes(std::move(b)) - , _pk(std::move(pk)) -{ - _bytes.reduce_chunk_count(); -} - -frozen_mutation::frozen_mutation(const mutation& m) - : _pk(m.key()) -{ - mutation_partition_serializer part_ser(*m.schema(), m.partition()); - - ser::writer_of_mutation wom(_bytes); - std::move(wom).write_table_id(m.schema()->id()) - .write_schema_version(m.schema()->version()) - .write_key(m.key()) - .partition([&] (auto wr) { - part_ser.write(std::move(wr)); - }).end_mutation(); - _bytes.reduce_chunk_count(); -} - -mutation -frozen_mutation::unfreeze(schema_ptr schema) const { - mutation m(key(*schema), schema); - partition_builder b(*schema, m.partition()); - partition().accept(*schema, b); - return m; -} - -frozen_mutation freeze(const mutation& m) { - return { m }; -} - -mutation_partition_view frozen_mutation::partition() const { - return mutation_partition_view::from_view(mutation_view().partition()); -} - -std::ostream& operator<<(std::ostream& out, const frozen_mutation::printer& pr) { - return out << pr.self.unfreeze(pr.schema); -} - -frozen_mutation::printer frozen_mutation::pretty_printer(schema_ptr s) const { - return { *this, std::move(s) }; -} - -stop_iteration streamed_mutation_freezer::consume(tombstone pt) { - _partition_tombstone = pt; - return stop_iteration::no; -} - -stop_iteration streamed_mutation_freezer::consume(static_row&& sr) { - _sr = std::move(sr); - return stop_iteration::no; -} - -stop_iteration streamed_mutation_freezer::consume(clustering_row&& cr) { - if (_reversed) { - _crs.emplace_front(std::move(cr)); - } else { - _crs.emplace_back(std::move(cr)); - } - return stop_iteration::no; -} - -stop_iteration streamed_mutation_freezer::consume(range_tombstone&& rt) { - if (_reversed) { - rt.flip(); - } - _rts.apply(_schema, std::move(rt)); - return stop_iteration::no; -} - -frozen_mutation streamed_mutation_freezer::consume_end_of_stream() { - bytes_ostream out; - ser::writer_of_mutation wom(out); - std::move(wom).write_table_id(_schema.id()) - .write_schema_version(_schema.version()) - .write_key(_key) - .partition([&] (auto wr) { - serialize_mutation_fragments(_schema, _partition_tombstone, - std::move(_sr), std::move(_rts), - std::move(_crs), std::move(wr)); - }).end_mutation(); - return frozen_mutation(std::move(out), std::move(_key)); -} - -future freeze(streamed_mutation sm) { - return do_with(streamed_mutation(std::move(sm)), [] (auto& sm) mutable { - return consume(sm, streamed_mutation_freezer(*sm.schema(), sm.key())); - }); -} - -class fragmenting_mutation_freezer { - const schema& _schema; - partition_key _key; - - tombstone _partition_tombstone; - stdx::optional _sr; - std::deque _crs; - range_tombstone_list _rts; - - frozen_mutation_consumer_fn _consumer; - - bool _fragmented = false; - size_t _dirty_size = 0; - size_t _fragment_size; -private: - future<> flush() { - bytes_ostream out; - ser::writer_of_mutation wom(out); - std::move(wom).write_table_id(_schema.id()) - .write_schema_version(_schema.version()) - .write_key(_key) - .partition([&] (auto wr) { - serialize_mutation_fragments(_schema, _partition_tombstone, - std::move(_sr), std::move(_rts), - std::move(_crs), std::move(wr)); - }).end_mutation(); - - _sr = { }; - _rts.clear(); - _crs.clear(); - _dirty_size = 0; - return _consumer(frozen_mutation(std::move(out), _key), _fragmented); - } - - future maybe_flush() { - if (_dirty_size >= _fragment_size) { - _fragmented = true; - return flush().then([] { return stop_iteration::no; }); - } - return make_ready_future(stop_iteration::no); - } -public: - fragmenting_mutation_freezer(const schema& s, const partition_key& key, frozen_mutation_consumer_fn c, size_t fragment_size) - : _schema(s), _key(key), _rts(s), _consumer(c), _fragment_size(fragment_size) { } - - void consume(tombstone pt) { - _dirty_size += sizeof(tombstone); - _partition_tombstone = pt; - } - - future consume(static_row&& sr) { - _sr = std::move(sr); - _dirty_size += _sr->memory_usage(); - return maybe_flush(); - } - - future consume(clustering_row&& cr) { - _dirty_size += cr.memory_usage(); - _crs.emplace_back(std::move(cr)); - return maybe_flush(); - } - - future consume(range_tombstone&& rt) { - _dirty_size += rt.memory_usage(); - _rts.apply(_schema, std::move(rt)); - return maybe_flush(); - } - - future consume_end_of_stream() { - if (_dirty_size) { - return flush().then([] { return stop_iteration::yes; }); - } - return make_ready_future(stop_iteration::yes); - } -}; - -future<> fragment_and_freeze(streamed_mutation sm, frozen_mutation_consumer_fn c, size_t fragment_size) -{ - fragmenting_mutation_freezer freezer(*sm.schema(), sm.key(), c, fragment_size); - return do_with(std::move(sm), std::move(freezer), [] (auto& sm, auto& freezer) { - freezer.consume(sm.partition_tombstone()); - return repeat([&] { - return sm().then([&] (auto mfopt) { - if (!mfopt) { - return freezer.consume_end_of_stream(); - } - return std::move(*mfopt).consume(freezer); - }); - }); - }); -} diff --git a/scylla/frozen_mutation.hh b/scylla/frozen_mutation.hh deleted file mode 100644 index 72844d9..0000000 --- a/scylla/frozen_mutation.hh +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "dht/i_partitioner.hh" -#include "atomic_cell.hh" -#include "database_fwd.hh" -#include "mutation_partition_view.hh" -#include "streamed_mutation.hh" -class mutation; -class streamed_mutation; - -namespace ser { -class mutation_view; -} - -// Immutable, compact form of mutation. -// -// This form is primarily destined to be sent over the network channel. -// Regular mutation can't be deserialized because its complex data structures -// need schema reference at the time object is constructed. We can't lookup -// schema before we deserialize column family ID. Another problem is that even -// if we had the ID somehow, low level RPC layer doesn't know how to lookup -// the schema. Data can be wrapped in frozen_mutation without schema -// information, the schema is only needed to access some of the fields. -// -class frozen_mutation final { -private: - bytes_ostream _bytes; - partition_key _pk; -private: - partition_key deserialize_key() const; - ser::mutation_view mutation_view() const; -public: - frozen_mutation(const mutation& m); - explicit frozen_mutation(bytes_ostream&& b); - frozen_mutation(bytes_ostream&& b, partition_key key); - frozen_mutation(frozen_mutation&& m) = default; - frozen_mutation(const frozen_mutation& m) = default; - frozen_mutation& operator=(frozen_mutation&&) = default; - frozen_mutation& operator=(const frozen_mutation&) = default; - const bytes_ostream& representation() const { return _bytes; } - utils::UUID column_family_id() const; - utils::UUID schema_version() const; // FIXME: Should replace column_family_id() - partition_key_view key(const schema& s) const; - dht::decorated_key decorated_key(const schema& s) const; - mutation_partition_view partition() const; - mutation unfreeze(schema_ptr s) const; - - struct printer { - const frozen_mutation& self; - schema_ptr schema; - friend std::ostream& operator<<(std::ostream&, const printer&); - }; - - printer pretty_printer(schema_ptr) const; -}; - -frozen_mutation freeze(const mutation& m); - -// Can receive streamed_mutation in reversed order. -class streamed_mutation_freezer { - const schema& _schema; - partition_key _key; - bool _reversed; - - tombstone _partition_tombstone; - stdx::optional _sr; - std::deque _crs; - range_tombstone_list _rts; -public: - streamed_mutation_freezer(const schema& s, const partition_key& key, bool reversed = false) - : _schema(s), _key(key), _reversed(reversed), _rts(s) { } - - stop_iteration consume(tombstone pt); - - stop_iteration consume(static_row&& sr); - stop_iteration consume(clustering_row&& cr); - - stop_iteration consume(range_tombstone&& rt); - - frozen_mutation consume_end_of_stream(); -}; - -future freeze(streamed_mutation sm); - -static constexpr size_t default_frozen_fragment_size = 128 * 1024; - -using frozen_mutation_consumer_fn = std::function(frozen_mutation, bool)>; -future<> fragment_and_freeze(streamed_mutation sm, frozen_mutation_consumer_fn c, - size_t fragment_size = default_frozen_fragment_size); - diff --git a/scylla/frozen_schema.cc b/scylla/frozen_schema.cc deleted file mode 100644 index 9104464..0000000 --- a/scylla/frozen_schema.cc +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "frozen_schema.hh" -#include "db/schema_tables.hh" -#include "canonical_mutation.hh" -#include "schema_mutations.hh" -#include "idl/uuid.dist.hh" -#include "idl/frozen_schema.dist.hh" -#include "serializer_impl.hh" -#include "serialization_visitors.hh" -#include "idl/uuid.dist.impl.hh" -#include "idl/frozen_schema.dist.impl.hh" - -frozen_schema::frozen_schema(const schema_ptr& s) - : _data([&s] { - schema_mutations sm = db::schema_tables::make_schema_mutations(s, api::new_timestamp(), true); - bytes_ostream out; - ser::writer_of_schema wr(out); - std::move(wr).write_version(s->version()) - .write_mutations(sm) - .end_schema(); - return to_bytes(out.linearize()); - }()) -{ } - -schema_ptr frozen_schema::unfreeze() const { - auto in = ser::as_input_stream(_data); - auto sv = ser::deserialize(in, boost::type()); - return sv.mutations().is_view() - ? db::schema_tables::create_view_from_mutations(sv.mutations(), sv.version()) - : db::schema_tables::create_table_from_mutations(sv.mutations(), sv.version()); -} - -frozen_schema::frozen_schema(bytes b) - : _data(std::move(b)) -{ } - -bytes_view frozen_schema::representation() const -{ - return _data; -} diff --git a/scylla/frozen_schema.hh b/scylla/frozen_schema.hh deleted file mode 100644 index 04837c4..0000000 --- a/scylla/frozen_schema.hh +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "query-result.hh" -#include "schema.hh" -#include "frozen_mutation.hh" - -// Transport for schema_ptr across shards/nodes. -// It's safe to access from another shard by const&. -class frozen_schema { - bytes _data; -public: - explicit frozen_schema(bytes); - frozen_schema(const schema_ptr&); - frozen_schema(frozen_schema&&) = default; - frozen_schema(const frozen_schema&) = default; - frozen_schema& operator=(const frozen_schema&) = default; - frozen_schema& operator=(frozen_schema&&) = default; - schema_ptr unfreeze() const; - bytes_view representation() const; -}; diff --git a/scylla/gc_clock.hh b/scylla/gc_clock.hh deleted file mode 100644 index 04eaf03..0000000 --- a/scylla/gc_clock.hh +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "clocks-impl.hh" - -#include - -#include -#include - -// FIXME: wraps around in 2038 -class gc_clock final { -public: - using base = seastar::lowres_system_clock; - using rep = int32_t; - using period = std::ratio<1, 1>; // seconds - using duration = std::chrono::duration; - using time_point = std::chrono::time_point; - - static constexpr auto is_steady = base::is_steady; - - static constexpr std::time_t to_time_t(time_point t) { - return std::chrono::duration_cast(t.time_since_epoch()).count(); - } - - static constexpr time_point from_time_t(std::time_t t) { - return time_point(std::chrono::duration_cast(std::chrono::seconds(t))); - } - - static time_point now() { - return time_point(std::chrono::duration_cast(base::now().time_since_epoch())) + get_clocks_offset(); - } -}; - -using expiry_opt = std::experimental::optional; -using ttl_opt = std::experimental::optional; - -// 20 years in seconds -static constexpr gc_clock::duration max_ttl = gc_clock::duration{20 * 365 * 24 * 60 * 60}; diff --git a/scylla/hashing.hh b/scylla/hashing.hh deleted file mode 100644 index 424f3cc..0000000 --- a/scylla/hashing.hh +++ /dev/null @@ -1,160 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include -#include -#include -#include -#include -#include "seastarx.hh" - -// -// This hashing differs from std::hash<> in that it decouples knowledge about -// type structure from the way the hash value is calculated: -// * appending_hash instantiation knows about what data should be included in the hash for type T. -// * Hasher object knows how to combine the data into the final hash. -// -// The appending_hash should always feed some data into the hasher, regardless of the state the object is in, -// in order for the hash to be highly sensitive for value changes. For example, vector> should -// ideally feed different values for empty vector and a vector with a single empty optional. -// -// appending_hash is machine-independent. -// - -// The Hasher concept -struct Hasher { - void update(const char* ptr, size_t size); -}; - -template -struct appending_hash; - -template -inline -void feed_hash(Hasher& h, const T& value, Args&&... args) { - appending_hash()(h, value, std::forward(args)...); -}; - -template -struct appending_hash::value>> { - template - void operator()(Hasher& h, T value) const { - auto value_le = cpu_to_le(value); - h.update(reinterpret_cast(&value_le), sizeof(T)); - } -}; - -template<> -struct appending_hash { - template - void operator()(Hasher& h, bool value) const { - feed_hash(h, static_cast(value)); - } -}; - -template -struct appending_hash::value>> { - template - void operator()(Hasher& h, const T& value) const { - feed_hash(h, static_cast>(value)); - } -}; - -template -struct appending_hash> { - template - void operator()(Hasher& h, const std::experimental::optional& value) const { - if (value) { - feed_hash(h, true); - feed_hash(h, *value); - } else { - feed_hash(h, false); - } - } -}; - -template -struct appending_hash { - template - void operator()(Hasher& h, const char (&value) [N]) const { - feed_hash(h, N); - h.update(value, N); - } -}; - -template -struct appending_hash> { - template - void operator()(Hasher& h, const std::vector& value) const { - feed_hash(h, value.size()); - for (auto&& v : value) { - appending_hash()(h, v); - } - } -}; - -template -struct appending_hash> { - template - void operator()(Hasher& h, const std::map& value) const { - feed_hash(h, value.size()); - for (auto&& e : value) { - appending_hash()(h, e.first); - appending_hash()(h, e.second); - } - } -}; - -template<> -struct appending_hash { - template - void operator()(Hasher& h, const sstring& v) const { - feed_hash(h, v.size()); - h.update(reinterpret_cast(v.cbegin()), v.size() * sizeof(sstring::value_type)); - } -}; - -template<> -struct appending_hash { - template - void operator()(Hasher& h, const std::string& v) const { - feed_hash(h, v.size()); - h.update(reinterpret_cast(v.data()), v.size() * sizeof(std::string::value_type)); - } -}; - -template -struct appending_hash> { - template - void operator()(Hasher& h, std::chrono::duration v) const { - feed_hash(h, v.count()); - } -}; - -template -struct appending_hash> { - template - void operator()(Hasher& h, std::chrono::time_point v) const { - feed_hash(h, v.time_since_epoch().count()); - } -}; diff --git a/scylla/hashing_partition_visitor.hh b/scylla/hashing_partition_visitor.hh deleted file mode 100644 index 83c5bb7..0000000 --- a/scylla/hashing_partition_visitor.hh +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "mutation_partition_visitor.hh" -#include "hashing.hh" -#include "schema.hh" -#include "atomic_cell_hash.hh" -#include "keys.hh" -#include "counters.hh" - -// Calculates a hash of a mutation_partition which is consistent with -// mutation equality. For any equal mutations, no matter which schema -// version they were generated under, the hash fed will be the same for both of them. -template -class hashing_partition_visitor : public mutation_partition_visitor { - Hasher& _h; - const schema& _s; -public: - hashing_partition_visitor(Hasher& h, const schema& s) - : _h(h) - , _s(s) - { } - - virtual void accept_partition_tombstone(tombstone t) { - feed_hash(_h, t); - } - - virtual void accept_static_cell(column_id id, atomic_cell_view cell) { - auto&& col = _s.static_column_at(id); - feed_hash(_h, col.name()); - feed_hash(_h, col.type->name()); - feed_hash(_h, cell, col); - } - - virtual void accept_static_cell(column_id id, collection_mutation_view cell) { - auto&& col = _s.static_column_at(id); - feed_hash(_h, col.name()); - feed_hash(_h, col.type->name()); - feed_hash(_h, cell, col); - } - - virtual void accept_row_tombstone(const range_tombstone& rt) { - rt.feed_hash(_h, _s); - } - - virtual void accept_row(position_in_partition_view pos, const row_tombstone& deleted_at, const row_marker& rm, is_dummy dummy, is_continuous continuous) override { - if (dummy) { - return; - } - pos.key().feed_hash(_h, _s); - feed_hash(_h, deleted_at); - feed_hash(_h, rm); - } - - virtual void accept_row_cell(column_id id, atomic_cell_view cell) { - auto&& col = _s.regular_column_at(id); - feed_hash(_h, col.name()); - feed_hash(_h, col.type->name()); - feed_hash(_h, cell, col); - } - - virtual void accept_row_cell(column_id id, collection_mutation_view cell) { - auto&& col = _s.regular_column_at(id); - feed_hash(_h, col.name()); - feed_hash(_h, col.type->name()); - feed_hash(_h, cell, col); - } -}; diff --git a/scylla/idl/cache_temperature.idl.hh b/scylla/idl/cache_temperature.idl.hh deleted file mode 100644 index 9eda527..0000000 --- a/scylla/idl/cache_temperature.idl.hh +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright 2017 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -class cache_temperature final { - uint8_t get_serialized_temperature(); -}; diff --git a/scylla/idl/commitlog.idl.hh b/scylla/idl/commitlog.idl.hh deleted file mode 100644 index a2dc60c..0000000 --- a/scylla/idl/commitlog.idl.hh +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright 2016 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -class commitlog_entry [[writable]] { - std::experimental::optional mapping(); - frozen_mutation mutation(); -}; diff --git a/scylla/idl/consistency_level.idl.hh b/scylla/idl/consistency_level.idl.hh deleted file mode 100644 index 3cd80ed..0000000 --- a/scylla/idl/consistency_level.idl.hh +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright 2017 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -namespace db { - -enum class consistency_level : uint8_t { - ANY, - ONE, - TWO, - THREE, - QUORUM, - ALL, - LOCAL_QUORUM, - EACH_QUORUM, - SERIAL, - LOCAL_SERIAL, - LOCAL_ONE -}; - -} diff --git a/scylla/idl/frozen_mutation.idl.hh b/scylla/idl/frozen_mutation.idl.hh deleted file mode 100644 index 82dd419..0000000 --- a/scylla/idl/frozen_mutation.idl.hh +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright 2016 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -class frozen_mutation final { - bytes representation(); -}; diff --git a/scylla/idl/frozen_schema.idl.hh b/scylla/idl/frozen_schema.idl.hh deleted file mode 100644 index 01e6e8b..0000000 --- a/scylla/idl/frozen_schema.idl.hh +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright 2016 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -class canonical_mutation final { - bytes representation(); -}; - -class schema_mutations { - canonical_mutation columnfamilies_canonical_mutation(); - canonical_mutation columns_canonical_mutation(); - bool is_view()[[version 1.6]]; - std::experimental::optional indices_canonical_mutation()[[version 2.0]]; - std::experimental::optional dropped_columns_canonical_mutation()[[version 2.0]]; - std::experimental::optional scylla_tables_canonical_mutation()[[version 2.0]]; -}; - -class schema stub [[writable]] { - utils::UUID version; - schema_mutations mutations; -}; - -class frozen_schema final { - bytes representation(); -}; diff --git a/scylla/idl/mutation.idl.hh b/scylla/idl/mutation.idl.hh deleted file mode 100644 index 88b5437..0000000 --- a/scylla/idl/mutation.idl.hh +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Copyright 2016 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -class counter_id final { - utils::UUID to_uuid(); -}; - -class counter_shard final { - counter_id id(); - int64_t value(); - int64_t logical_clock(); -}; - -class counter_cell_full final stub [[writable]] { - std::vector shards; -}; - -class counter_cell_update final stub [[writable]] { - int64_t delta; -}; - -class counter_cell stub [[writable]] { - api::timestamp_type created_at; - boost::variant value; -}; - -class tombstone [[writable]] { - api::timestamp_type timestamp; - gc_clock::time_point deletion_time; -}; - -class live_cell stub [[writable]] { - api::timestamp_type created_at; - bytes value; -}; - -class expiring_cell stub [[writable]] { - gc_clock::duration ttl; - gc_clock::time_point expiry; - live_cell c; -}; - -class dead_cell final stub [[writable]] { - tombstone tomb; -}; - -class collection_element stub [[writable]] { - // key's format depends on its CQL type as defined in the schema and is specified in CQL binary protocol. - bytes key; - boost::variant value; -}; - -class collection_cell stub [[writable]] { - tombstone tomb; - std::vector elements; // sorted by key -}; - -class column stub [[writable]] { - uint32_t id; - boost::variant, collection_cell> c; -}; - -class row stub [[writable]] { - std::vector columns; // sorted by id -}; - -class no_marker final stub [[writable]] {}; - -class live_marker stub [[writable]] { - api::timestamp_type created_at; -}; - -class expiring_marker stub [[writable]] { - live_marker lm; - gc_clock::duration ttl; - gc_clock::time_point expiry; -}; - -class dead_marker final stub [[writable]] { - tombstone tomb; -}; - -class deletable_row stub [[writable]] { - clustering_key key; - boost::variant marker; - tombstone deleted_at; - row cells; - tombstone shadowable_deleted_at [[version 1.8]] = deleted_at; -}; - -enum class bound_kind : uint8_t { - excl_end, - incl_start, - incl_end, - excl_start, -}; - -class range_tombstone [[writable]] { - clustering_key_prefix start; - tombstone tomb; - bound_kind start_kind [[version 1.3]] = bound_kind::incl_start; - clustering_key_prefix end [[version 1.3]] = start; - bound_kind end_kind [[version 1.3]] = bound_kind::incl_end; -}; - -class mutation_partition stub [[writable]] { - tombstone tomb; - row static_row; - std::vector range_tombstones; // sorted by key - std::vector rows; // sorted by key - -}; - -class mutation stub [[writable]] { - utils::UUID table_id; - utils::UUID schema_version; - partition_key key; - mutation_partition partition; -}; - -class column_mapping_entry { - bytes name(); - sstring type_name(); -}; - -class column_mapping { - std::vector columns(); - uint32_t n_static(); -}; - -class canonical_mutation stub [[writable]] { - utils::UUID table_id; - utils::UUID schema_version; - partition_key key; - column_mapping mapping; - mutation_partition partition; -} diff --git a/scylla/idl/paging_state.idl.hh b/scylla/idl/paging_state.idl.hh deleted file mode 100644 index 9c6e6df..0000000 --- a/scylla/idl/paging_state.idl.hh +++ /dev/null @@ -1,9 +0,0 @@ -namespace service { -namespace pager { -class paging_state { - partition_key get_partition_key(); - std::experimental::optional get_clustering_key(); - uint32_t get_remaining(); -}; -} -} diff --git a/scylla/idl/partition_checksum.idl.hh b/scylla/idl/partition_checksum.idl.hh deleted file mode 100644 index b89e57a..0000000 --- a/scylla/idl/partition_checksum.idl.hh +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright 2016 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -enum class repair_checksum : uint8_t { - legacy = 0, - streamed = 1, -}; - -class partition_checksum { - std::array digest(); -}; diff --git a/scylla/idl/query.idl.hh b/scylla/idl/query.idl.hh deleted file mode 100644 index b2aa14d..0000000 --- a/scylla/idl/query.idl.hh +++ /dev/null @@ -1,37 +0,0 @@ -namespace query { - -class qr_cell stub [[writable]] { - std::experimental::optional timestamp; // present when send_timestamp option set in partition_slice - std::experimental::optional expiry; // present when send_expiry option set in partition_slice - - // Specified by CQL binary protocol, according to cql_serialization_format in read_command. - bytes value; - - std::experimental::optional ttl [[version 1.3]]; // present when send_ttl option set in partition_slice -}; - -class qr_row stub [[writable]] { - std::vector> cells; // ordered as requested in partition_slice -}; - -class qr_clustered_row stub [[writable]] { - std::experimental::optional key; // present when send_clustering_key option set in partition_slice - qr_row cells; // ordered as requested in partition_slice -}; - -class qr_partition stub [[writable]] { - std::experimental::optional key; // present when send_partition_key option set in partition_slice - qr_row static_row; - std::vector rows; // ordered by key -}; - -class query_result stub [[writable]] { - std::vector partitions; // in ring order -}; - -enum class digest_algorithm : uint8_t { - none = 0, // digest not required - MD5 = 1, // default algorithm -}; - -} diff --git a/scylla/idl/range.idl.hh b/scylla/idl/range.idl.hh deleted file mode 100644 index da1a297..0000000 --- a/scylla/idl/range.idl.hh +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright 2016 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -template -class range_bound { - T value(); - bool is_inclusive(); -}; - -template -class range { - std::experimental::optional> start(); - std::experimental::optional> end(); - bool is_singular(); -}; - -template -class nonwrapping_range { - std::experimental::optional> start(); - std::experimental::optional> end(); - bool is_singular(); -}; diff --git a/scylla/idl/read_command.idl.hh b/scylla/idl/read_command.idl.hh deleted file mode 100644 index 192a806..0000000 --- a/scylla/idl/read_command.idl.hh +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright 2016 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -class cql_serialization_format final { - uint8_t protocol_version(); -}; - -namespace query { - -class specific_ranges { - partition_key pk(); - std::vector> ranges(); -}; - -class partition_slice { - std::vector> default_row_ranges(); - std::vector static_columns; - std::vector regular_columns; - query::partition_slice::option_set options; - std::unique_ptr get_specific_ranges(); - cql_serialization_format cql_format(); - uint32_t partition_row_limit() [[version 1.3]] = std::numeric_limits::max(); -}; - -class read_command { - utils::UUID cf_id; - utils::UUID schema_version; - query::partition_slice slice; - uint32_t row_limit; - std::chrono::time_point timestamp; - std::experimental::optional trace_info [[version 1.3]]; - uint32_t partition_limit [[version 1.3]] = std::numeric_limits::max(); -}; - -} diff --git a/scylla/idl/reconcilable_result.idl.hh b/scylla/idl/reconcilable_result.idl.hh deleted file mode 100644 index a562213..0000000 --- a/scylla/idl/reconcilable_result.idl.hh +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright 2016 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -class partition { - uint32_t row_count(); - frozen_mutation mut(); -}; - -class reconcilable_result { - uint32_t row_count(); - std::vector partitions(); - query::short_read is_short_read() [[version 1.6]] = query::short_read::no; -}; diff --git a/scylla/idl/replay_position.idl.hh b/scylla/idl/replay_position.idl.hh deleted file mode 100644 index f1d1dc8..0000000 --- a/scylla/idl/replay_position.idl.hh +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright 2016 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -namespace db { -struct replay_position { - uint64_t id; - uint32_t pos; -}; -} diff --git a/scylla/idl/ring_position.idl.hh b/scylla/idl/ring_position.idl.hh deleted file mode 100644 index a1b3df1..0000000 --- a/scylla/idl/ring_position.idl.hh +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright 2016 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -namespace dht { -class ring_position { - enum class token_bound:int8_t {start = -1, end = 1}; - dht::token token(); - dht::ring_position::token_bound bound(); - std::experimental::optional key(); -}; -} diff --git a/scylla/idl/tracing.idl.hh b/scylla/idl/tracing.idl.hh deleted file mode 100644 index 5ea19e7..0000000 --- a/scylla/idl/tracing.idl.hh +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright 2016 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -namespace tracing { -enum class trace_type : uint8_t { - NONE, - QUERY, - REPAIR, -}; - -class span_id { - uint64_t get_id(); -}; - -class trace_info { - utils::UUID session_id; - tracing::trace_type type; - bool write_on_close; - tracing::trace_state_props_set state_props [[version 1.4]]; - uint32_t slow_query_threshold_us [[version 1.4]]; - uint32_t slow_query_ttl_sec [[version 1.4]]; - tracing::span_id parent_id [[version 1.8]]; /// RPC sender's tracing session span ID -}; -} - diff --git a/scylla/idl/truncation_record.idl.hh b/scylla/idl/truncation_record.idl.hh deleted file mode 100644 index a49546d..0000000 --- a/scylla/idl/truncation_record.idl.hh +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright 2016 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -namespace db { -namespace system_keyspace { -struct truncation_record { - uint32_t magic; - std::vector positions; - db_clock::time_point time_stamp; -}; -} -} diff --git a/scylla/index/secondary_index_manager.cc b/scylla/index/secondary_index_manager.cc deleted file mode 100644 index 7579a70..0000000 --- a/scylla/index/secondary_index_manager.cc +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2017 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "index/secondary_index_manager.hh" - -namespace secondary_index { - -seastar::sharded _the_secondary_index_manager; - -std::set secondary_index_manager::get_dependent_indices(const column_definition& cdef) const { - // FIXME - return {}; -} - -} diff --git a/scylla/index/secondary_index_manager.hh b/scylla/index/secondary_index_manager.hh deleted file mode 100644 index 336ec6c..0000000 --- a/scylla/index/secondary_index_manager.hh +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2017 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "schema.hh" - -#include - -namespace secondary_index { - -class secondary_index_manager : public seastar::async_sharded_service { -public: - std::set get_dependent_indices(const column_definition& cdef) const; -}; - -extern seastar::sharded _the_secondary_index_manager; - -inline seastar::sharded& get_secondary_index_manager() { - return _the_secondary_index_manager; -} - -} diff --git a/scylla/init.cc b/scylla/init.cc deleted file mode 100644 index fb904bb..0000000 --- a/scylla/init.cc +++ /dev/null @@ -1,154 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "init.hh" -#include "message/messaging_service.hh" -#include "gms/failure_detector.hh" -#include "gms/gossiper.hh" -#include "service/storage_service.hh" -#include "to_string.hh" -#include "gms/inet_address.hh" - -logging::logger startlog("init"); - -// -// NOTE: there functions are (temporarily) -// duplicated in cql_test_env.cc -// until proper shutdown is done. - -void init_storage_service(distributed& db) { - service::init_storage_service(db).get(); - // #293 - do not stop anything - //engine().at_exit([] { return service::deinit_storage_service(); }); -} - -void init_ms_fd_gossiper(sstring listen_address_in - , uint16_t storage_port - , uint16_t ssl_storage_port - , bool tcp_nodelay_inter_dc - , sstring ms_encrypt_what - , sstring ms_trust_store - , sstring ms_cert - , sstring ms_key - , sstring ms_tls_prio - , bool ms_client_auth - , sstring ms_compress - , db::seed_provider_type seed_provider - , sstring cluster_name - , double phi - , bool sltba) -{ - const auto listen = gms::inet_address::lookup(listen_address_in).get0(); - - using encrypt_what = netw::messaging_service::encrypt_what; - using compress_what = netw::messaging_service::compress_what; - using tcp_nodelay_what = netw::messaging_service::tcp_nodelay_what; - using namespace seastar::tls; - - encrypt_what ew = encrypt_what::none; - if (ms_encrypt_what == "all") { - ew = encrypt_what::all; - } else if (ms_encrypt_what == "dc") { - ew = encrypt_what::dc; - } else if (ms_encrypt_what == "rack") { - ew = encrypt_what::rack; - } - - compress_what cw = compress_what::none; - if (ms_compress == "all") { - cw = compress_what::all; - } else if (ms_compress == "dc") { - cw = compress_what::dc; - } - - tcp_nodelay_what tndw = tcp_nodelay_what::all; - if (!tcp_nodelay_inter_dc) { - tndw = tcp_nodelay_what::local; - } - - future<> f = make_ready_future<>(); - std::shared_ptr creds; - - if (ew != encrypt_what::none) { - creds = std::make_shared(); - creds->set_dh_level(dh_params::level::MEDIUM); - - creds->set_x509_key_file(ms_cert, ms_key, x509_crt_format::PEM).get(); - if (ms_trust_store.empty()) { - creds->set_system_trust().get(); - } else { - creds->set_x509_trust_file(ms_trust_store, x509_crt_format::PEM).get(); - } - - if (!ms_tls_prio.empty()) { - creds->set_priority_string(ms_tls_prio); - } - if (ms_client_auth) { - creds->set_client_auth(seastar::tls::client_auth::REQUIRE); - } - } - - // Init messaging_service - // Delay listening messaging_service until gossip message handlers are registered - bool listen_now = false; - netw::get_messaging_service().start(listen, storage_port, ew, cw, tndw, ssl_storage_port, creds, sltba, listen_now).get(); - - // #293 - do not stop anything - //engine().at_exit([] { return netw::get_messaging_service().stop(); }); - // Init failure_detector - gms::get_failure_detector().start(std::move(phi)).get(); - // #293 - do not stop anything - //engine().at_exit([]{ return gms::get_failure_detector().stop(); }); - // Init gossiper - std::set seeds; - if (seed_provider.parameters.count("seeds") > 0) { - size_t begin = 0; - size_t next = 0; - sstring seeds_str = seed_provider.parameters.find("seeds")->second; - while (begin < seeds_str.length() && begin != (next=seeds_str.find(",",begin))) { - auto seed = boost::trim_copy(seeds_str.substr(begin,next-begin)); - try { - seeds.emplace(gms::inet_address::lookup(seed).get0()); - } catch (...) { - startlog.error("Bad configuration: invalid value in 'seeds': '{}': {}", seed, std::current_exception()); - throw bad_configuration_error(); - } - begin = next+1; - } - } - if (seeds.empty()) { - seeds.emplace(gms::inet_address("127.0.0.1")); - } - auto broadcast_address = utils::fb_utilities::get_broadcast_address(); - if (broadcast_address != listen && seeds.count(listen)) { - print("Use broadcast_address instead of listen_address for seeds list: seeds=%s, listen_address=%s, broadcast_address=%s\n", - to_string(seeds), listen_address_in, broadcast_address); - throw std::runtime_error("Use broadcast_address for seeds list"); - } - gms::get_gossiper().start().get(); - auto& gossiper = gms::get_local_gossiper(); - gossiper.set_seeds(seeds); - // #293 - do not stop anything - //engine().at_exit([]{ return gms::get_gossiper().stop(); }); - gms::get_gossiper().invoke_on_all([cluster_name](gms::gossiper& g) { - g.set_cluster_name(cluster_name); - }); -} diff --git a/scylla/init.hh b/scylla/init.hh deleted file mode 100644 index 4f205a6..0000000 --- a/scylla/init.hh +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ -#pragma once - -#include -#include -#include -#include "db/config.hh" -#include "database.hh" -#include "log.hh" - -extern logging::logger startlog; - -class bad_configuration_error : public std::exception {}; - -void init_storage_service(distributed& db); -void init_ms_fd_gossiper(sstring listen_address - , uint16_t storage_port - , uint16_t ssl_storage_port - , bool tcp_nodelay_inter_dc - , sstring ms_encrypt_what - , sstring ms_trust_store - , sstring ms_cert - , sstring ms_key - , sstring ms_tls_prio - , bool ms_client_auth - , sstring ms_compress - , db::seed_provider_type seed_provider - , sstring cluster_name = "Test Cluster" - , double phi = 8 - , bool sltba = false); diff --git a/scylla/interface/cassandra.thrift b/scylla/interface/cassandra.thrift deleted file mode 100644 index 2b3c700..0000000 --- a/scylla/interface/cassandra.thrift +++ /dev/null @@ -1,955 +0,0 @@ -#!/usr/local/bin/thrift --java --php --py -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# -# Copyright (C) 2014 ScyllaDB -# - -# -# This file has been modified from the Apache distribution -# by ScyllaDB -# - - -# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -# *** PLEASE REMEMBER TO EDIT THE VERSION CONSTANT WHEN MAKING CHANGES *** -# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -# -# Interface definition for Cassandra Service -# - -namespace java org.apache.cassandra.thrift -namespace cpp cassandra -namespace csharp Apache.Cassandra -namespace py cassandra -namespace php cassandra -namespace perl Cassandra - -# Thrift.rb has a bug where top-level modules that include modules -# with the same name are not properly referenced, so we can't do -# Cassandra::Cassandra::Client. -namespace rb CassandraThrift - -# The API version (NOT the product version), composed as a dot delimited -# string with major, minor, and patch level components. -# -# - Major: Incremented for backward incompatible changes. An example would -# be changes to the number or disposition of method arguments. -# - Minor: Incremented for backward compatible changes. An example would -# be the addition of a new (optional) method. -# - Patch: Incremented for bug fixes. The patch level should be increased -# for every edit that doesn't result in a change to major/minor. -# -# See the Semantic Versioning Specification (SemVer) http://semver.org. -# -# Note that this backwards compatibility is from the perspective of the server, -# not the client. Cassandra should always be able to talk to older client -# software, but client software may not be able to talk to older Cassandra -# instances. -# -# An effort should be made not to break forward-client-compatibility either -# (e.g. one should avoid removing obsolete fields from the IDL), but no -# guarantees in this respect are made by the Cassandra project. -const string VERSION_ = "20.1.0" - - -# -# data structures -# - -/** Basic unit of data within a ColumnFamily. - * @param name, the name by which this column is set and retrieved. Maximum 64KB long. - * @param value. The data associated with the name. Maximum 2GB long, but in practice you should limit it to small numbers of MB (since Thrift must read the full value into memory to operate on it). - * @param timestamp. The timestamp is used for conflict detection/resolution when two columns with same name need to be compared. - * @param ttl. An optional, positive delay (in seconds) after which the column will be automatically deleted. - */ -struct Column { - 1: required binary name, - 2: optional binary value, - 3: optional i64 timestamp, - 4: optional i32 ttl, -} - -/** A named list of columns. - * @param name. see Column.name. - * @param columns. A collection of standard Columns. The columns within a super column are defined in an adhoc manner. - * Columns within a super column do not have to have matching structures (similarly named child columns). - */ -struct SuperColumn { - 1: required binary name, - 2: required list columns, -} - -struct CounterColumn { - 1: required binary name, - 2: required i64 value -} - -struct CounterSuperColumn { - 1: required binary name, - 2: required list columns -} - -/** - Methods for fetching rows/records from Cassandra will return either a single instance of ColumnOrSuperColumn or a list - of ColumnOrSuperColumns (get_slice()). If you're looking up a SuperColumn (or list of SuperColumns) then the resulting - instances of ColumnOrSuperColumn will have the requested SuperColumn in the attribute super_column. For queries resulting - in Columns, those values will be in the attribute column. This change was made between 0.3 and 0.4 to standardize on - single query methods that may return either a SuperColumn or Column. - - If the query was on a counter column family, you will either get a counter_column (instead of a column) or a - counter_super_column (instead of a super_column) - - @param column. The Column returned by get() or get_slice(). - @param super_column. The SuperColumn returned by get() or get_slice(). - @param counter_column. The Counterolumn returned by get() or get_slice(). - @param counter_super_column. The CounterSuperColumn returned by get() or get_slice(). - */ -struct ColumnOrSuperColumn { - 1: optional Column column, - 2: optional SuperColumn super_column, - 3: optional CounterColumn counter_column, - 4: optional CounterSuperColumn counter_super_column -} - - -# -# Exceptions -# (note that internal server errors will raise a TApplicationException, courtesy of Thrift) -# - -/** A specific column was requested that does not exist. */ -exception NotFoundException { -} - -/** Invalid request could mean keyspace or column family does not exist, required parameters are missing, or a parameter is malformed. - why contains an associated error message. -*/ -exception InvalidRequestException { - 1: required string why -} - -/** Not all the replicas required could be created and/or read. */ -exception UnavailableException { -} - -/** RPC timeout was exceeded. either a node failed mid-operation, or load was too high, or the requested op was too large. */ -exception TimedOutException { - /** - * if a write operation was acknowledged by some replicas but not by enough to - * satisfy the required ConsistencyLevel, the number of successful - * replies will be given here. In case of atomic_batch_mutate method this field - * will be set to -1 if the batch was written to the batchlog and to 0 if it wasn't. - */ - 1: optional i32 acknowledged_by - - /** - * in case of atomic_batch_mutate method this field tells if the batch - * was written to the batchlog. - */ - 2: optional bool acknowledged_by_batchlog - - /** - * for the CAS method, this field tells if we timed out during the paxos - * protocol, as opposed to during the commit of our update - */ - 3: optional bool paxos_in_progress -} - -/** invalid authentication request (invalid keyspace, user does not exist, or credentials invalid) */ -exception AuthenticationException { - 1: required string why -} - -/** invalid authorization request (user does not have access to keyspace) */ -exception AuthorizationException { - 1: required string why -} - -/** - * NOTE: This up outdated exception left for backward compatibility reasons, - * no actual schema agreement validation is done starting from Cassandra 1.2 - * - * schemas are not in agreement across all nodes - */ -exception SchemaDisagreementException { -} - - -# -# service api -# -/** - * The ConsistencyLevel is an enum that controls both read and write - * behavior based on the ReplicationFactor of the keyspace. The - * different consistency levels have different meanings, depending on - * if you're doing a write or read operation. - * - * If W + R > ReplicationFactor, where W is the number of nodes to - * block for on write, and R the number to block for on reads, you - * will have strongly consistent behavior; that is, readers will - * always see the most recent write. Of these, the most interesting is - * to do QUORUM reads and writes, which gives you consistency while - * still allowing availability in the face of node failures up to half - * of . Of course if latency is more important than - * consistency then you can use lower values for either or both. - * - * Some ConsistencyLevels (ONE, TWO, THREE) refer to a specific number - * of replicas rather than a logical concept that adjusts - * automatically with the replication factor. Of these, only ONE is - * commonly used; TWO and (even more rarely) THREE are only useful - * when you care more about guaranteeing a certain level of - * durability, than consistency. - * - * Write consistency levels make the following guarantees before reporting success to the client: - * ANY Ensure that the write has been written once somewhere, including possibly being hinted in a non-target node. - * ONE Ensure that the write has been written to at least 1 node's commit log and memory table - * TWO Ensure that the write has been written to at least 2 node's commit log and memory table - * THREE Ensure that the write has been written to at least 3 node's commit log and memory table - * QUORUM Ensure that the write has been written to / 2 + 1 nodes - * LOCAL_ONE Ensure that the write has been written to 1 node within the local datacenter (requires NetworkTopologyStrategy) - * LOCAL_QUORUM Ensure that the write has been written to / 2 + 1 nodes, within the local datacenter (requires NetworkTopologyStrategy) - * EACH_QUORUM Ensure that the write has been written to / 2 + 1 nodes in each datacenter (requires NetworkTopologyStrategy) - * ALL Ensure that the write is written to <ReplicationFactor> nodes before responding to the client. - * - * Read consistency levels make the following guarantees before returning successful results to the client: - * ANY Not supported. You probably want ONE instead. - * ONE Returns the record obtained from a single replica. - * TWO Returns the record with the most recent timestamp once two replicas have replied. - * THREE Returns the record with the most recent timestamp once three replicas have replied. - * QUORUM Returns the record with the most recent timestamp once a majority of replicas have replied. - * LOCAL_ONE Returns the record with the most recent timestamp once a single replica within the local datacenter have replied. - * LOCAL_QUORUM Returns the record with the most recent timestamp once a majority of replicas within the local datacenter have replied. - * EACH_QUORUM Returns the record with the most recent timestamp once a majority of replicas within each datacenter have replied. - * ALL Returns the record with the most recent timestamp once all replicas have replied (implies no replica may be down).. -*/ -enum ConsistencyLevel { - ONE = 1, - QUORUM = 2, - LOCAL_QUORUM = 3, - EACH_QUORUM = 4, - ALL = 5, - ANY = 6, - TWO = 7, - THREE = 8, - SERIAL = 9, - LOCAL_SERIAL = 10, - LOCAL_ONE = 11, -} - -/** - ColumnParent is used when selecting groups of columns from the same ColumnFamily. In directory structure terms, imagine - ColumnParent as ColumnPath + '/../'. - - See also ColumnPath - */ -struct ColumnParent { - 3: required string column_family, - 4: optional binary super_column, -} - -/** The ColumnPath is the path to a single column in Cassandra. It might make sense to think of ColumnPath and - * ColumnParent in terms of a directory structure. - * - * ColumnPath is used to looking up a single column. - * - * @param column_family. The name of the CF of the column being looked up. - * @param super_column. The super column name. - * @param column. The column name. - */ -struct ColumnPath { - 3: required string column_family, - 4: optional binary super_column, - 5: optional binary column, -} - -/** - A slice range is a structure that stores basic range, ordering and limit information for a query that will return - multiple columns. It could be thought of as Cassandra's version of LIMIT and ORDER BY - - @param start. The column name to start the slice with. This attribute is not required, though there is no default value, - and can be safely set to '', i.e., an empty byte array, to start with the first column name. Otherwise, it - must a valid value under the rules of the Comparator defined for the given ColumnFamily. - @param finish. The column name to stop the slice at. This attribute is not required, though there is no default value, - and can be safely set to an empty byte array to not stop until 'count' results are seen. Otherwise, it - must also be a valid value to the ColumnFamily Comparator. - @param reversed. Whether the results should be ordered in reversed order. Similar to ORDER BY blah DESC in SQL. - @param count. How many columns to return. Similar to LIMIT in SQL. May be arbitrarily large, but Thrift will - materialize the whole result into memory before returning it to the client, so be aware that you may - be better served by iterating through slices by passing the last value of one call in as the 'start' - of the next instead of increasing 'count' arbitrarily large. - */ -struct SliceRange { - 1: required binary start, - 2: required binary finish, - 3: required bool reversed=0, - 4: required i32 count=100, -} - -/** - A SlicePredicate is similar to a mathematic predicate (see http://en.wikipedia.org/wiki/Predicate_(mathematical_logic)), - which is described as "a property that the elements of a set have in common." - - SlicePredicate's in Cassandra are described with either a list of column_names or a SliceRange. If column_names is - specified, slice_range is ignored. - - @param column_name. A list of column names to retrieve. This can be used similar to Memcached's "multi-get" feature - to fetch N known column names. For instance, if you know you wish to fetch columns 'Joe', 'Jack', - and 'Jim' you can pass those column names as a list to fetch all three at once. - @param slice_range. A SliceRange describing how to range, order, and/or limit the slice. - */ -struct SlicePredicate { - 1: optional list column_names, - 2: optional SliceRange slice_range, -} - -enum IndexOperator { - EQ, - GTE, - GT, - LTE, - LT -} - -struct IndexExpression { - 1: required binary column_name, - 2: required IndexOperator op, - 3: required binary value, -} - -/** - * @deprecated use a KeyRange with row_filter in get_range_slices instead - */ -struct IndexClause { - 1: required list expressions, - 2: required binary start_key, - 3: required i32 count=100, -} - - -/** -The semantics of start keys and tokens are slightly different. -Keys are start-inclusive; tokens are start-exclusive. Token -ranges may also wrap -- that is, the end token may be less -than the start one. Thus, a range from keyX to keyX is a -one-element range, but a range from tokenY to tokenY is the -full ring. -*/ -struct KeyRange { - 1: optional binary start_key, - 2: optional binary end_key, - 3: optional string start_token, - 4: optional string end_token, - 6: optional list row_filter, - 5: required i32 count=100 -} - -/** - A KeySlice is key followed by the data it maps to. A collection of KeySlice is returned by the get_range_slice operation. - - @param key. a row key - @param columns. List of data represented by the key. Typically, the list is pared down to only the columns specified by - a SlicePredicate. - */ -struct KeySlice { - 1: required binary key, - 2: required list columns, -} - -struct KeyCount { - 1: required binary key, - 2: required i32 count -} - -/** - * Note that the timestamp is only optional in case of counter deletion. - */ -struct Deletion { - 1: optional i64 timestamp, - 2: optional binary super_column, - 3: optional SlicePredicate predicate, -} - -/** - A Mutation is either an insert (represented by filling column_or_supercolumn) or a deletion (represented by filling the deletion attribute). - @param column_or_supercolumn. An insert to a column or supercolumn (possibly counter column or supercolumn) - @param deletion. A deletion of a column or supercolumn -*/ -struct Mutation { - 1: optional ColumnOrSuperColumn column_or_supercolumn, - 2: optional Deletion deletion, -} - -struct EndpointDetails { - 1: string host, - 2: string datacenter, - 3: optional string rack -} - -struct CASResult { - 1: required bool success, - 2: optional list current_values, -} - -/** - A TokenRange describes part of the Cassandra ring, it is a mapping from a range to - endpoints responsible for that range. - @param start_token The first token in the range - @param end_token The last token in the range - @param endpoints The endpoints responsible for the range (listed by their configured listen_address) - @param rpc_endpoints The endpoints responsible for the range (listed by their configured rpc_address) -*/ -struct TokenRange { - 1: required string start_token, - 2: required string end_token, - 3: required list endpoints, - 4: optional list rpc_endpoints - 5: optional list endpoint_details, -} - -/** - Authentication requests can contain any data, dependent on the IAuthenticator used -*/ -struct AuthenticationRequest { - 1: required map credentials -} - -enum IndexType { - KEYS, - CUSTOM, - COMPOSITES -} - -/* describes a column in a column family. */ -struct ColumnDef { - 1: required binary name, - 2: required string validation_class, - 3: optional IndexType index_type, - 4: optional string index_name, - 5: optional map index_options -} - -/** - Describes a trigger. - `options` should include at least 'class' param. - Other options are not supported yet. -*/ -struct TriggerDef { - 1: required string name, - 2: required map options -} - -/* describes a column family. */ -struct CfDef { - 1: required string keyspace, - 2: required string name, - 3: optional string column_type="Standard", - 5: optional string comparator_type="BytesType", - 6: optional string subcomparator_type, - 8: optional string comment, - 12: optional double read_repair_chance, - 13: optional list column_metadata, - 14: optional i32 gc_grace_seconds, - 15: optional string default_validation_class, - 16: optional i32 id, - 17: optional i32 min_compaction_threshold, - 18: optional i32 max_compaction_threshold, - 26: optional string key_validation_class, - 28: optional binary key_alias, - 29: optional string compaction_strategy, - 30: optional map compaction_strategy_options, - 32: optional map compression_options, - 33: optional double bloom_filter_fp_chance, - 34: optional string caching="keys_only", - 37: optional double dclocal_read_repair_chance = 0.0, - 39: optional i32 memtable_flush_period_in_ms, - 40: optional i32 default_time_to_live, - 42: optional string speculative_retry="NONE", - 43: optional list triggers, - 44: optional string cells_per_row_to_cache = "100", - 45: optional i32 min_index_interval, - 46: optional i32 max_index_interval, - - /* All of the following are now ignored and unsupplied. */ - - /** @deprecated */ - 9: optional double row_cache_size, - /** @deprecated */ - 11: optional double key_cache_size, - /** @deprecated */ - 19: optional i32 row_cache_save_period_in_seconds, - /** @deprecated */ - 20: optional i32 key_cache_save_period_in_seconds, - /** @deprecated */ - 21: optional i32 memtable_flush_after_mins, - /** @deprecated */ - 22: optional i32 memtable_throughput_in_mb, - /** @deprecated */ - 23: optional double memtable_operations_in_millions, - /** @deprecated */ - 24: optional bool replicate_on_write, - /** @deprecated */ - 25: optional double merge_shards_chance, - /** @deprecated */ - 27: optional string row_cache_provider, - /** @deprecated */ - 31: optional i32 row_cache_keys_to_save, - /** @deprecated */ - 38: optional bool populate_io_cache_on_flush, - /** @deprecated */ - 41: optional i32 index_interval, -} - -/* describes a keyspace. */ -struct KsDef { - 1: required string name, - 2: required string strategy_class, - 3: optional map strategy_options, - - /** @deprecated ignored */ - 4: optional i32 replication_factor, - - 5: required list cf_defs, - 6: optional bool durable_writes=1, -} - -/** CQL query compression */ -enum Compression { - GZIP = 1, - NONE = 2 -} - -enum CqlResultType { - ROWS = 1, - VOID = 2, - INT = 3 -} - -/** - Row returned from a CQL query. - - This struct is used for both CQL2 and CQL3 queries. For CQL2, the partition key - is special-cased and is always returned. For CQL3, it is not special cased; - it will be included in the columns list if it was included in the SELECT and - the key field is always null. -*/ -struct CqlRow { - 1: required binary key, - 2: required list columns -} - -struct CqlMetadata { - 1: required map name_types, - 2: required map value_types, - 3: required string default_name_type, - 4: required string default_value_type -} - -struct CqlResult { - 1: required CqlResultType type, - 2: optional list rows, - 3: optional i32 num, - 4: optional CqlMetadata schema -} - -struct CqlPreparedResult { - 1: required i32 itemId, - 2: required i32 count, - 3: optional list variable_types, - 4: optional list variable_names -} - -/** Represents input splits used by hadoop ColumnFamilyRecordReaders */ -struct CfSplit { - 1: required string start_token, - 2: required string end_token, - 3: required i64 row_count -} - -/** The ColumnSlice is used to select a set of columns from inside a row. - * If start or finish are unspecified they will default to the start-of - * end-of value. - * @param start. The start of the ColumnSlice inclusive - * @param finish. The end of the ColumnSlice inclusive - */ -struct ColumnSlice { - 1: optional binary start, - 2: optional binary finish -} - -/** - * Used to perform multiple slices on a single row key in one rpc operation - * @param key. The row key to be multi sliced - * @param column_parent. The column family (super columns are unsupported) - * @param column_slices. 0 to many ColumnSlice objects each will be used to select columns - * @param reversed. Direction of slice - * @param count. Maximum number of columns - * @param consistency_level. Level to perform the operation at - */ -struct MultiSliceRequest { - 1: optional binary key, - 2: optional ColumnParent column_parent, - 3: optional list column_slices, - 4: optional bool reversed=false, - 5: optional i32 count=1000, - 6: optional ConsistencyLevel consistency_level=ConsistencyLevel.ONE -} - -service Cassandra { - # auth methods - void login(1: required AuthenticationRequest auth_request) throws (1:AuthenticationException authnx, 2:AuthorizationException authzx), - - # set keyspace - void set_keyspace(1: required string keyspace) throws (1:InvalidRequestException ire), - - # retrieval methods - - /** - Get the Column or SuperColumn at the given column_path. If no value is present, NotFoundException is thrown. (This is - the only method that can throw an exception under non-failure conditions.) - */ - ColumnOrSuperColumn get(1:required binary key, - 2:required ColumnPath column_path, - 3:required ConsistencyLevel consistency_level=ConsistencyLevel.ONE) - throws (1:InvalidRequestException ire, 2:NotFoundException nfe, 3:UnavailableException ue, 4:TimedOutException te), - - /** - Get the group of columns contained by column_parent (either a ColumnFamily name or a ColumnFamily/SuperColumn name - pair) specified by the given SlicePredicate. If no matching values are found, an empty list is returned. - */ - list get_slice(1:required binary key, - 2:required ColumnParent column_parent, - 3:required SlicePredicate predicate, - 4:required ConsistencyLevel consistency_level=ConsistencyLevel.ONE) - throws (1:InvalidRequestException ire, 2:UnavailableException ue, 3:TimedOutException te), - - /** - returns the number of columns matching predicate for a particular key, - ColumnFamily and optionally SuperColumn. - */ - i32 get_count(1:required binary key, - 2:required ColumnParent column_parent, - 3:required SlicePredicate predicate, - 4:required ConsistencyLevel consistency_level=ConsistencyLevel.ONE) - throws (1:InvalidRequestException ire, 2:UnavailableException ue, 3:TimedOutException te), - - /** - Performs a get_slice for column_parent and predicate for the given keys in parallel. - */ - map> multiget_slice(1:required list keys, - 2:required ColumnParent column_parent, - 3:required SlicePredicate predicate, - 4:required ConsistencyLevel consistency_level=ConsistencyLevel.ONE) - throws (1:InvalidRequestException ire, 2:UnavailableException ue, 3:TimedOutException te), - - /** - Perform a get_count in parallel on the given list keys. The return value maps keys to the count found. - */ - map multiget_count(1:required list keys, - 2:required ColumnParent column_parent, - 3:required SlicePredicate predicate, - 4:required ConsistencyLevel consistency_level=ConsistencyLevel.ONE) - throws (1:InvalidRequestException ire, 2:UnavailableException ue, 3:TimedOutException te), - - /** - returns a subset of columns for a contiguous range of keys. - */ - list get_range_slices(1:required ColumnParent column_parent, - 2:required SlicePredicate predicate, - 3:required KeyRange range, - 4:required ConsistencyLevel consistency_level=ConsistencyLevel.ONE) - throws (1:InvalidRequestException ire, 2:UnavailableException ue, 3:TimedOutException te), - - /** - returns a range of columns, wrapping to the next rows if necessary to collect max_results. - */ - list get_paged_slice(1:required string column_family, - 2:required KeyRange range, - 3:required binary start_column, - 4:required ConsistencyLevel consistency_level=ConsistencyLevel.ONE) - throws (1:InvalidRequestException ire, 2:UnavailableException ue, 3:TimedOutException te), - - /** - Returns the subset of columns specified in SlicePredicate for the rows matching the IndexClause - @deprecated use get_range_slices instead with range.row_filter specified - */ - list get_indexed_slices(1:required ColumnParent column_parent, - 2:required IndexClause index_clause, - 3:required SlicePredicate column_predicate, - 4:required ConsistencyLevel consistency_level=ConsistencyLevel.ONE) - throws (1:InvalidRequestException ire, 2:UnavailableException ue, 3:TimedOutException te), - - # modification methods - - /** - * Insert a Column at the given column_parent.column_family and optional column_parent.super_column. - */ - void insert(1:required binary key, - 2:required ColumnParent column_parent, - 3:required Column column, - 4:required ConsistencyLevel consistency_level=ConsistencyLevel.ONE) - throws (1:InvalidRequestException ire, 2:UnavailableException ue, 3:TimedOutException te), - - /** - * Increment or decrement a counter. - */ - void add(1:required binary key, - 2:required ColumnParent column_parent, - 3:required CounterColumn column, - 4:required ConsistencyLevel consistency_level=ConsistencyLevel.ONE) - throws (1:InvalidRequestException ire, 2:UnavailableException ue, 3:TimedOutException te), - - /** - * Atomic compare and set. - * - * If the cas is successfull, the success boolean in CASResult will be true and there will be no current_values. - * Otherwise, success will be false and current_values will contain the current values for the columns in - * expected (that, by definition of compare-and-set, will differ from the values in expected). - * - * A cas operation takes 2 consistency level. The first one, serial_consistency_level, simply indicates the - * level of serialization required. This can be either ConsistencyLevel.SERIAL or ConsistencyLevel.LOCAL_SERIAL. - * The second one, commit_consistency_level, defines the consistency level for the commit phase of the cas. This - * is a more traditional consistency level (the same CL than for traditional writes are accepted) that impact - * the visibility for reads of the operation. For instance, if commit_consistency_level is QUORUM, then it is - * guaranteed that a followup QUORUM read will see the cas write (if that one was successful obviously). If - * commit_consistency_level is ANY, you will need to use a SERIAL/LOCAL_SERIAL read to be guaranteed to see - * the write. - */ - CASResult cas(1:required binary key, - 2:required string column_family, - 3:list expected, - 4:list updates, - 5:required ConsistencyLevel serial_consistency_level=ConsistencyLevel.SERIAL, - 6:required ConsistencyLevel commit_consistency_level=ConsistencyLevel.QUORUM) - throws (1:InvalidRequestException ire, 2:UnavailableException ue, 3:TimedOutException te), - - /** - Remove data from the row specified by key at the granularity specified by column_path, and the given timestamp. Note - that all the values in column_path besides column_path.column_family are truly optional: you can remove the entire - row by just specifying the ColumnFamily, or you can remove a SuperColumn or a single Column by specifying those levels too. - */ - void remove(1:required binary key, - 2:required ColumnPath column_path, - 3:required i64 timestamp, - 4:ConsistencyLevel consistency_level=ConsistencyLevel.ONE) - throws (1:InvalidRequestException ire, 2:UnavailableException ue, 3:TimedOutException te), - - /** - * Remove a counter at the specified location. - * Note that counters have limited support for deletes: if you remove a counter, you must wait to issue any following update - * until the delete has reached all the nodes and all of them have been fully compacted. - */ - void remove_counter(1:required binary key, - 2:required ColumnPath path, - 3:required ConsistencyLevel consistency_level=ConsistencyLevel.ONE) - throws (1:InvalidRequestException ire, 2:UnavailableException ue, 3:TimedOutException te), - - /** - Mutate many columns or super columns for many row keys. See also: Mutation. - - mutation_map maps key to column family to a list of Mutation objects to take place at that scope. - **/ - void batch_mutate(1:required map>> mutation_map, - 2:required ConsistencyLevel consistency_level=ConsistencyLevel.ONE) - throws (1:InvalidRequestException ire, 2:UnavailableException ue, 3:TimedOutException te), - - /** - Atomically mutate many columns or super columns for many row keys. See also: Mutation. - - mutation_map maps key to column family to a list of Mutation objects to take place at that scope. - **/ - void atomic_batch_mutate(1:required map>> mutation_map, - 2:required ConsistencyLevel consistency_level=ConsistencyLevel.ONE) - throws (1:InvalidRequestException ire, 2:UnavailableException ue, 3:TimedOutException te), - - /** - Truncate will mark and entire column family as deleted. - From the user's perspective a successful call to truncate will result complete data deletion from cfname. - Internally, however, disk space will not be immediatily released, as with all deletes in cassandra, this one - only marks the data as deleted. - The operation succeeds only if all hosts in the cluster at available and will throw an UnavailableException if - some hosts are down. - */ - void truncate(1:required string cfname) - throws (1: InvalidRequestException ire, 2: UnavailableException ue, 3: TimedOutException te), - - /** - * Select multiple slices of a key in a single RPC operation - */ - list get_multi_slice(1:required MultiSliceRequest request) - throws (1:InvalidRequestException ire, 2:UnavailableException ue, 3:TimedOutException te), - - // Meta-APIs -- APIs to get information about the node or cluster, - // rather than user data. The nodeprobe program provides usage examples. - - /** - * for each schema version present in the cluster, returns a list of nodes at that version. - * hosts that do not respond will be under the key DatabaseDescriptor.INITIAL_VERSION. - * the cluster is all on the same version if the size of the map is 1. - */ - map> describe_schema_versions() - throws (1: InvalidRequestException ire), - - /** list the defined keyspaces in this cluster */ - list describe_keyspaces() - throws (1:InvalidRequestException ire), - - /** get the cluster name */ - string describe_cluster_name(), - - /** get the thrift api version */ - string describe_version(), - - /** get the token ring: a map of ranges to host addresses, - represented as a set of TokenRange instead of a map from range - to list of endpoints, because you can't use Thrift structs as - map keys: - https://issues.apache.org/jira/browse/THRIFT-162 - - for the same reason, we can't return a set here, even though - order is neither important nor predictable. */ - list describe_ring(1:required string keyspace) - throws (1:InvalidRequestException ire), - - - /** same as describe_ring, but considers only nodes in the local DC */ - list describe_local_ring(1:required string keyspace) - throws (1:InvalidRequestException ire), - - /** get the mapping between token->node ip - without taking replication into consideration - https://issues.apache.org/jira/browse/CASSANDRA-4092 */ - map describe_token_map() - throws (1:InvalidRequestException ire), - - /** returns the partitioner used by this cluster */ - string describe_partitioner(), - - /** returns the snitch used by this cluster */ - string describe_snitch(), - - /** describe specified keyspace */ - KsDef describe_keyspace(1:required string keyspace) - throws (1:NotFoundException nfe, 2:InvalidRequestException ire), - - /** experimental API for hadoop/parallel query support. - may change violently and without warning. - - returns list of token strings such that first subrange is (list[0], list[1]], - next is (list[1], list[2]], etc. */ - list describe_splits(1:required string cfName, - 2:required string start_token, - 3:required string end_token, - 4:required i32 keys_per_split) - throws (1:InvalidRequestException ire), - - /** Enables tracing for the next query in this connection and returns the UUID for that trace session - The next query will be traced idependently of trace probability and the returned UUID can be used to query the trace keyspace */ - binary trace_next_query(), - - list describe_splits_ex(1:required string cfName, - 2:required string start_token, - 3:required string end_token, - 4:required i32 keys_per_split) - throws (1:InvalidRequestException ire), - - /** adds a column family. returns the new schema id. */ - string system_add_column_family(1:required CfDef cf_def) - throws (1:InvalidRequestException ire, 2:SchemaDisagreementException sde), - - /** drops a column family. returns the new schema id. */ - string system_drop_column_family(1:required string column_family) - throws (1:InvalidRequestException ire, 2:SchemaDisagreementException sde), - - /** adds a keyspace and any column families that are part of it. returns the new schema id. */ - string system_add_keyspace(1:required KsDef ks_def) - throws (1:InvalidRequestException ire, 2:SchemaDisagreementException sde), - - /** drops a keyspace and any column families that are part of it. returns the new schema id. */ - string system_drop_keyspace(1:required string keyspace) - throws (1:InvalidRequestException ire, 2:SchemaDisagreementException sde), - - /** updates properties of a keyspace. returns the new schema id. */ - string system_update_keyspace(1:required KsDef ks_def) - throws (1:InvalidRequestException ire, 2:SchemaDisagreementException sde), - - /** updates properties of a column family. returns the new schema id. */ - string system_update_column_family(1:required CfDef cf_def) - throws (1:InvalidRequestException ire, 2:SchemaDisagreementException sde), - - - /** - * @deprecated Throws InvalidRequestException since 3.0. Please use the CQL3 version instead. - */ - CqlResult execute_cql_query(1:required binary query, 2:required Compression compression) - throws (1:InvalidRequestException ire, - 2:UnavailableException ue, - 3:TimedOutException te, - 4:SchemaDisagreementException sde) - - /** - * Executes a CQL3 (Cassandra Query Language) statement and returns a - * CqlResult containing the results. - */ - CqlResult execute_cql3_query(1:required binary query, 2:required Compression compression, 3:required ConsistencyLevel consistency) - throws (1:InvalidRequestException ire, - 2:UnavailableException ue, - 3:TimedOutException te, - 4:SchemaDisagreementException sde) - - - /** - * @deprecated Throws InvalidRequestException since 3.0. Please use the CQL3 version instead. - */ - CqlPreparedResult prepare_cql_query(1:required binary query, 2:required Compression compression) - throws (1:InvalidRequestException ire) - - /** - * Prepare a CQL3 (Cassandra Query Language) statement by compiling and returning - * - the type of CQL statement - * - an id token of the compiled CQL stored on the server side. - * - a count of the discovered bound markers in the statement - */ - CqlPreparedResult prepare_cql3_query(1:required binary query, 2:required Compression compression) - throws (1:InvalidRequestException ire) - - - /** - * @deprecated Throws InvalidRequestException since 3.0. Please use the CQL3 version instead. - */ - CqlResult execute_prepared_cql_query(1:required i32 itemId, 2:required list values) - throws (1:InvalidRequestException ire, - 2:UnavailableException ue, - 3:TimedOutException te, - 4:SchemaDisagreementException sde) - - /** - * Executes a prepared CQL3 (Cassandra Query Language) statement by passing an id token, a list of variables - * to bind, and the consistency level, and returns a CqlResult containing the results. - */ - CqlResult execute_prepared_cql3_query(1:required i32 itemId, 2:required list values, 3:required ConsistencyLevel consistency) - throws (1:InvalidRequestException ire, - 2:UnavailableException ue, - 3:TimedOutException te, - 4:SchemaDisagreementException sde) - - /** - * @deprecated This is now a no-op. Please use the CQL3 specific methods instead. - */ - void set_cql_version(1: required string version) throws (1:InvalidRequestException ire) -} diff --git a/scylla/intrusive_set_external_comparator.hh b/scylla/intrusive_set_external_comparator.hh deleted file mode 100644 index b1a0f2f..0000000 --- a/scylla/intrusive_set_external_comparator.hh +++ /dev/null @@ -1,229 +0,0 @@ -/* - * Copyright (C) 2016 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -/* - * (C) Copyright Ion Gaztanaga 2013-2014 - * Distributed under the Boost Software License, Version 1.0. - * (See accompanying file LICENSE_1_0.txt or copy at - * http://www.boost.org/LICENSE_1_0.txt) - */ - -#pragma once - -#include -#include - -namespace bi = boost::intrusive; - -typedef bi::rbtree_algorithms> algo; - -class intrusive_set_external_comparator_member_hook : public bi::set_member_hook> { -public: - intrusive_set_external_comparator_member_hook() = default; - intrusive_set_external_comparator_member_hook(intrusive_set_external_comparator_member_hook&& o) noexcept { - algo::replace_node(o.this_ptr(), this_ptr()); - algo::init(o.this_ptr()); - } -}; - -template -class intrusive_set_external_comparator final { - typedef boost::intrusive::mhtraits value_traits; - typedef typename value_traits::node_traits node_traits; - typedef typename node_traits::node_ptr node_ptr; -public: - typedef Elem value_type; - typedef typename bi::tree_iterator iterator; - typedef typename bi::tree_iterator const_iterator; - typedef typename std::reverse_iterator reverse_iterator; - typedef typename std::reverse_iterator const_reverse_iterator; - -private: - intrusive_set_external_comparator_member_hook _header; - static const value_traits _value_traits; - - struct key_of_value { - typedef Elem type; - Elem& operator()(Elem& t) { return t; } - }; - - template - struct key_node_comparator { - Comparator _cmp; - const value_traits& _value_traits; - key_node_comparator(Comparator cmp, const value_traits& value_traits) : _cmp(cmp), _value_traits(value_traits) { } - bool operator()(const node_ptr& a, const node_ptr& b) { - return _cmp(*_value_traits.to_value_ptr(a), *_value_traits.to_value_ptr(b)); - } - template - bool operator()(const node_ptr& a, const T1& b) { - return _cmp(*_value_traits.to_value_ptr(a), b); - } - template - bool operator()(const T1& a, const node_ptr& b) { - return _cmp(a, *_value_traits.to_value_ptr(b)); - } - template - bool operator()(const T1& a, const T2& b) { - return _cmp(a, b); - } - }; - - using const_value_traits_ptr = typename std::pointer_traits::template rebind; - - const_value_traits_ptr priv_value_traits_ptr() const { - return bi::pointer_traits::pointer_to(_value_traits); - } - template - key_node_comparator key_node_comp(Comparator comp) const { - return key_node_comparator(comp, _value_traits); - } - iterator insert_unique_commit(Elem& value, const algo::insert_commit_data &commit_data) { - node_ptr to_insert(_value_traits.to_node_ptr(value)); - algo::insert_unique_commit(_header.this_ptr(), to_insert, commit_data); - return iterator(to_insert, priv_value_traits_ptr()); - } -public: - intrusive_set_external_comparator() { algo::init_header(_header.this_ptr()); } - intrusive_set_external_comparator(intrusive_set_external_comparator&& o) { - algo::swap_tree(_header.this_ptr(), node_ptr(o._header.this_ptr())); - } - iterator begin() { return iterator(algo::begin_node(_header.this_ptr()), priv_value_traits_ptr()); } - const_iterator begin() const { return const_iterator(algo::begin_node(_header.this_ptr()), priv_value_traits_ptr()); } - iterator end() { return iterator(algo::end_node(_header.this_ptr()), priv_value_traits_ptr()); } - const_iterator end() const { return const_iterator(algo::end_node(_header.this_ptr()), priv_value_traits_ptr()); } - reverse_iterator rbegin() { return reverse_iterator(end()); } - const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); } - reverse_iterator rend() { return reverse_iterator(begin()); } - const_reverse_iterator rend() const { return const_reverse_iterator(begin()); } - template - void clear_and_dispose(Disposer disposer) { - algo::clear_and_dispose(_header.this_ptr(), - [&disposer] (const node_ptr& p) { - disposer(_value_traits.to_value_ptr(p)); - }); - algo::init_header(_header.this_ptr()); - } - bool empty() const { return algo::unique(_header.this_ptr()); } - - // WARNING: this method has O(N) time complexity, use with care - auto calculate_size() const { return algo::size(_header.this_ptr()); } - iterator erase(const_iterator i) { - const_iterator ret(i); - ++ret; - node_ptr to_erase(i.pointed_node()); - algo::erase(_header.this_ptr(), to_erase); - algo::init(to_erase); - return ret.unconst(); - } - iterator erase(const_iterator b, const_iterator e) { - while (b != e) { - erase(b++); - } - return b.unconst(); - } - template - iterator erase_and_dispose(const_iterator i, Disposer disposer) { - node_ptr to_erase(i.pointed_node()); - iterator ret(erase(i)); - disposer(_value_traits.to_value_ptr(to_erase)); - return ret; - } - template - iterator erase_and_dispose(const_iterator b, const_iterator e, Disposer disposer) { - while (b != e) { - erase_and_dispose(b++, disposer); - } - return b.unconst(); - } - template - void clone_from(const intrusive_set_external_comparator &src, Cloner cloner, Disposer disposer) { - clear_and_dispose(disposer); - if (!src.empty()) { - auto rollback = defer([this, &disposer] { this->clear_and_dispose(disposer); }); - algo::clone(src._header.this_ptr(), - _header.this_ptr(), - [&cloner] (const node_ptr& p) { - return _value_traits.to_node_ptr(*cloner(*_value_traits.to_value_ptr(p))); - }, - [&disposer] (const node_ptr& p) { - disposer(_value_traits.to_value_ptr(p)); - }); - rollback.cancel(); - } - } - Elem* unlink_leftmost_without_rebalance() { - node_ptr to_be_disposed(algo::unlink_leftmost_without_rebalance(_header.this_ptr())); - if(!to_be_disposed) - return 0; - algo::init(to_be_disposed); - return _value_traits.to_value_ptr(to_be_disposed); - } - iterator insert_before(const_iterator pos, Elem& value) { - node_ptr to_insert(_value_traits.to_node_ptr(value)); - return iterator(algo::insert_before(_header.this_ptr(), pos.pointed_node(), to_insert), priv_value_traits_ptr()); - } - template - iterator upper_bound(const KeyType& key, KeyTypeKeyCompare comp) { - return iterator(algo::upper_bound(_header.this_ptr(), key, key_node_comp(comp)), priv_value_traits_ptr()); - } - template - const_iterator upper_bound(const KeyType& key, KeyTypeKeyCompare comp) const { - return const_iterator(algo::upper_bound(_header.this_ptr(), key, key_node_comp(comp)), priv_value_traits_ptr()); - } - template - iterator lower_bound(const KeyType &key, KeyTypeKeyCompare comp) { - return iterator(algo::lower_bound(_header.this_ptr(), key, key_node_comp(comp)), priv_value_traits_ptr()); - } - template - const_iterator lower_bound(const KeyType &key, KeyTypeKeyCompare comp) const { - return const_iterator(algo::lower_bound(_header.this_ptr(), key, key_node_comp(comp)), priv_value_traits_ptr()); - } - template - iterator find(const KeyType &key, KeyTypeKeyCompare comp) { - return iterator(algo::find(_header.this_ptr(), key, key_node_comp(comp)), priv_value_traits_ptr()); - } - template - const_iterator find(const KeyType &key, KeyTypeKeyCompare comp) const { - return const_iterator(algo::find(_header.this_ptr(), key, key_node_comp(comp)), priv_value_traits_ptr()); - } - template - iterator insert(const_iterator hint, Elem& value, ElemCompare cmp) { - return insert_check(hint, value, std::move(cmp)).first; - } - template - std::pair insert_check(const_iterator hint, Elem& value, ElemCompare cmp) { - algo::insert_commit_data commit_data; - std::pair ret = - algo::insert_unique_check(_header.this_ptr(), - hint.pointed_node(), - key_of_value()(value), - key_node_comp(cmp), - commit_data); - return ret.second ? std::make_pair(insert_unique_commit(value, commit_data), true) - : std::make_pair(iterator(ret.first, priv_value_traits_ptr()), false); - } -}; - -template -const typename intrusive_set_external_comparator::value_traits intrusive_set_external_comparator::_value_traits; diff --git a/scylla/json.hh b/scylla/json.hh deleted file mode 100644 index 92d2a20..0000000 --- a/scylla/json.hh +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "core/sstring.hh" - -#include - -namespace seastar { // FIXME: not ours -namespace json { - -template -inline sstring to_json(const Map& map) { - Json::Value root(Json::objectValue); - for (auto&& kv : map) { - root[kv.first] = Json::Value(kv.second); - } - Json::FastWriter writer; - // Json::FastWriter unnecessarily adds a newline at the end of string. - // There is a method omitEndingLineFeed() which prevents that, but it seems - // to be too recent addition, so, at least for now, a workaround is needed. - auto str = writer.write(root); - if (str.length() && str.back() == '\n') { - str.pop_back(); - } - return str; -} - -template -inline Map to_map(const sstring& raw, Map&& map) { - Json::Value root; - Json::Reader reader; - reader.parse(std::string{raw}, root); - for (auto&& member : root.getMemberNames()) { - map.emplace(member, root[member].asString()); - } - return std::forward(map); -} - -inline std::map to_map(const sstring& raw) { - return to_map(raw, std::map()); -} - -} - -} diff --git a/scylla/keys.cc b/scylla/keys.cc deleted file mode 100644 index 6f9af7c..0000000 --- a/scylla/keys.cc +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include - -#include "keys.hh" -#include "dht/i_partitioner.hh" -#include "clustering_bounds_comparator.hh" - -std::ostream& operator<<(std::ostream& out, const partition_key& pk) { - return out << "pk{" << to_hex(pk) << "}"; -} -std::ostream& operator<<(std::ostream& out, const partition_key_view& pk) { - return out << "pk{" << to_hex(pk.representation()) << "}"; -} - -std::ostream& operator<<(std::ostream& out, const clustering_key_prefix& ckp) { - return out << "ckp{" << to_hex(ckp) << "}"; -} - -const legacy_compound_view -partition_key_view::legacy_form(const schema& s) const { - return { *get_compound_type(s), _bytes }; -} - -int -partition_key_view::legacy_tri_compare(const schema& s, partition_key_view o) const { - auto cmp = legacy_compound_view::tri_comparator(*get_compound_type(s)); - return cmp(this->representation(), o.representation()); -} - -int -partition_key_view::ring_order_tri_compare(const schema& s, partition_key_view k2) const { - auto t1 = dht::global_partitioner().get_token(s, *this); - auto t2 = dht::global_partitioner().get_token(s, k2); - if (t1 != t2) { - return t1 < t2 ? -1 : 1; - } - return legacy_tri_compare(s, k2); -} - -std::ostream& operator<<(std::ostream& out, const bound_kind k) { - switch(k) { - case bound_kind::excl_end: - return out << "excl end"; - case bound_kind::incl_start: - return out << "incl start"; - case bound_kind::incl_end: - return out << "incl end"; - case bound_kind::excl_start: - return out << "excl start"; - } - abort(); -} - -bound_kind invert_kind(bound_kind k) { - switch(k) { - case bound_kind::excl_start: return bound_kind::incl_end; - case bound_kind::incl_start: return bound_kind::excl_end; - case bound_kind::excl_end: return bound_kind::incl_start; - case bound_kind::incl_end: return bound_kind::excl_start; - } - abort(); -} - -int32_t weight(bound_kind k) { - switch(k) { - case bound_kind::excl_end: - return -2; - case bound_kind::incl_start: - return -1; - case bound_kind::incl_end: - return 1; - case bound_kind::excl_start: - return 2; - } - abort(); -} - -const thread_local clustering_key_prefix bound_view::empty_prefix = clustering_key::make_empty(); diff --git a/scylla/keys.hh b/scylla/keys.hh deleted file mode 100644 index 7ff5d56..0000000 --- a/scylla/keys.hh +++ /dev/null @@ -1,779 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "schema.hh" -#include "bytes.hh" -#include "types.hh" -#include "compound_compat.hh" -#include "utils/managed_bytes.hh" -#include "hashing.hh" -#include "database_fwd.hh" - -// -// This header defines type system for primary key holders. -// -// We distinguish partition keys and clustering keys. API-wise they are almost -// the same, but they're separate type hierarchies. -// -// Clustering keys are further divided into prefixed and non-prefixed (full). -// Non-prefixed keys always have full component set, as defined by schema. -// Prefixed ones can have any number of trailing components missing. They may -// differ in underlying representation. -// -// The main classes are: -// -// partition_key - full partition key -// clustering_key - full clustering key -// clustering_key_prefix - clustering key prefix -// -// These classes wrap only the minimum information required to store the key -// (the key value itself). Any information which can be inferred from schema -// is not stored. Therefore accessors need to be provided with a pointer to -// schema, from which information about structure is extracted. - -// Abstracts a view to serialized compound. -template -class compound_view_wrapper { -protected: - bytes_view _bytes; -protected: - compound_view_wrapper(bytes_view v) - : _bytes(v) - { } - - static inline const auto& get_compound_type(const schema& s) { - return TopLevelView::get_compound_type(s); - } -public: - std::vector explode(const schema& s) const { - return get_compound_type(s)->deserialize_value(_bytes); - } - - bytes_view representation() const { - return _bytes; - } - - struct less_compare { - typename TopLevelView::compound _t; - less_compare(const schema& s) : _t(get_compound_type(s)) {} - bool operator()(const TopLevelView& k1, const TopLevelView& k2) const { - return _t->less(k1.representation(), k2.representation()); - } - }; - - struct tri_compare { - typename TopLevelView::compound _t; - tri_compare(const schema &s) : _t(get_compound_type(s)) {} - int operator()(const TopLevelView& k1, const TopLevelView& k2) const { - return _t->compare(k1.representation(), k2.representation()); - } - }; - - struct hashing { - typename TopLevelView::compound _t; - hashing(const schema& s) : _t(get_compound_type(s)) {} - size_t operator()(const TopLevelView& o) const { - return _t->hash(o.representation()); - } - }; - - struct equality { - typename TopLevelView::compound _t; - equality(const schema& s) : _t(get_compound_type(s)) {} - bool operator()(const TopLevelView& o1, const TopLevelView& o2) const { - return _t->equal(o1.representation(), o2.representation()); - } - }; - - bool equal(const schema& s, const TopLevelView& other) const { - return get_compound_type(s)->equal(representation(), other.representation()); - } - - // begin() and end() return iterators over components of this compound. The iterator yields a bytes_view to the component. - // The iterators satisfy InputIterator concept. - auto begin() const { - return TopLevelView::compound::element_type::begin(representation()); - } - - // See begin() - auto end() const { - return TopLevelView::compound::element_type::end(representation()); - } - - // begin() and end() return iterators over components of this compound. The iterator yields a bytes_view to the component. - // The iterators satisfy InputIterator concept. - auto begin(const schema& s) const { - return begin(); - } - - // See begin() - auto end(const schema& s) const { - return end(); - } - - bytes_view get_component(const schema& s, size_t idx) const { - auto it = begin(s); - std::advance(it, idx); - return *it; - } - - // Returns a range of bytes_view - auto components() const { - return TopLevelView::compound::element_type::components(representation()); - } - - // Returns a range of bytes_view - auto components(const schema& s) const { - return components(); - } - - template - void feed_hash(Hasher& h, const schema& s) const { - for (bytes_view v : components(s)) { - ::feed_hash(h, v); - } - } -}; - -template -class compound_wrapper { -protected: - managed_bytes _bytes; -protected: - compound_wrapper(managed_bytes&& b) : _bytes(std::move(b)) {} - - static inline const auto& get_compound_type(const schema& s) { - return TopLevel::get_compound_type(s); - } -public: - static TopLevel make_empty() { - return from_exploded(std::vector()); - } - - static TopLevel make_empty(const schema&) { - return make_empty(); - } - - template - static TopLevel from_exploded(RangeOfSerializedComponents&& v) { - return TopLevel::from_range(std::forward(v)); - } - - static TopLevel from_exploded(const schema& s, const std::vector& v) { - return from_exploded(v); - } - static TopLevel from_exploded_view(const std::vector& v) { - return from_exploded(v); - } - - // We don't allow optional values, but provide this method as an efficient adaptor - static TopLevel from_optional_exploded(const schema& s, const std::vector& v) { - return TopLevel::from_bytes(get_compound_type(s)->serialize_optionals(v)); - } - - static TopLevel from_deeply_exploded(const schema& s, const std::vector& v) { - return TopLevel::from_bytes(get_compound_type(s)->serialize_value_deep(v)); - } - - static TopLevel from_single_value(const schema& s, bytes v) { - return TopLevel::from_bytes(get_compound_type(s)->serialize_single(std::move(v))); - } - - template - static - TopLevel from_singular(const schema& s, const T& v) { - auto ct = get_compound_type(s); - if (!ct->is_singular()) { - throw std::invalid_argument("compound is not singular"); - } - auto type = ct->types()[0]; - return from_single_value(s, type->decompose(v)); - } - - TopLevelView view() const { - return TopLevelView::from_bytes(_bytes); - } - - operator TopLevelView() const { - return view(); - } - - // FIXME: return views - std::vector explode(const schema& s) const { - return get_compound_type(s)->deserialize_value(_bytes); - } - - std::vector explode() const { - std::vector result; - for (bytes_view c : components()) { - result.emplace_back(to_bytes(c)); - } - return result; - } - - struct tri_compare { - typename TopLevel::compound _t; - tri_compare(const schema& s) : _t(get_compound_type(s)) {} - int operator()(const TopLevel& k1, const TopLevel& k2) const { - return _t->compare(k1.representation(), k2.representation()); - } - int operator()(const TopLevelView& k1, const TopLevel& k2) const { - return _t->compare(k1.representation(), k2.representation()); - } - int operator()(const TopLevel& k1, const TopLevelView& k2) const { - return _t->compare(k1.representation(), k2.representation()); - } - }; - - struct less_compare { - typename TopLevel::compound _t; - less_compare(const schema& s) : _t(get_compound_type(s)) {} - bool operator()(const TopLevel& k1, const TopLevel& k2) const { - return _t->less(k1.representation(), k2.representation()); - } - bool operator()(const TopLevelView& k1, const TopLevel& k2) const { - return _t->less(k1.representation(), k2.representation()); - } - bool operator()(const TopLevel& k1, const TopLevelView& k2) const { - return _t->less(k1.representation(), k2.representation()); - } - }; - - struct hashing { - typename TopLevel::compound _t; - hashing(const schema& s) : _t(get_compound_type(s)) {} - size_t operator()(const TopLevel& o) const { - return _t->hash(o); - } - size_t operator()(const TopLevelView& o) const { - return _t->hash(o.representation()); - } - }; - - struct equality { - typename TopLevel::compound _t; - equality(const schema& s) : _t(get_compound_type(s)) {} - bool operator()(const TopLevel& o1, const TopLevel& o2) const { - return _t->equal(o1.representation(), o2.representation()); - } - bool operator()(const TopLevelView& o1, const TopLevel& o2) const { - return _t->equal(o1.representation(), o2.representation()); - } - bool operator()(const TopLevel& o1, const TopLevelView& o2) const { - return _t->equal(o1.representation(), o2.representation()); - } - }; - - bool equal(const schema& s, const TopLevel& other) const { - return get_compound_type(s)->equal(representation(), other.representation()); - } - - bool equal(const schema& s, const TopLevelView& other) const { - return get_compound_type(s)->equal(representation(), other.representation()); - } - - operator bytes_view() const { - return _bytes; - } - - const managed_bytes& representation() const { - return _bytes; - } - - // begin() and end() return iterators over components of this compound. The iterator yields a bytes_view to the component. - // The iterators satisfy InputIterator concept. - auto begin(const schema& s) const { - return get_compound_type(s)->begin(_bytes); - } - - // See begin() - auto end(const schema& s) const { - return get_compound_type(s)->end(_bytes); - } - - // Returns a range of bytes_view - auto components() const { - return TopLevelView::compound::element_type::components(representation()); - } - - // Returns a range of bytes_view - auto components(const schema& s) const { - return components(); - } - - bytes_view get_component(const schema& s, size_t idx) const { - auto it = begin(s); - std::advance(it, idx); - return *it; - } - - template - void feed_hash(Hasher& h, const schema& s) const { - view().feed_hash(h, s); - } - - // Returns the number of components of this compound. - size_t size(const schema& s) const { - return std::distance(begin(s), end(s)); - } - - size_t external_memory_usage() const { - return _bytes.external_memory_usage(); - } - - size_t memory_usage() const { - return sizeof(*this) + external_memory_usage(); - } -}; - -template -class prefix_view_on_full_compound { -public: - using iterator = typename compound_type::iterator; -private: - bytes_view _b; - unsigned _prefix_len; - iterator _begin; - iterator _end; -public: - prefix_view_on_full_compound(const schema& s, bytes_view b, unsigned prefix_len) - : _b(b) - , _prefix_len(prefix_len) - , _begin(TopLevel::get_compound_type(s)->begin(_b)) - , _end(_begin) - { - std::advance(_end, prefix_len); - } - - iterator begin() const { return _begin; } - iterator end() const { return _end; } - - struct less_compare_with_prefix { - typename PrefixTopLevel::compound prefix_type; - - less_compare_with_prefix(const schema& s) - : prefix_type(PrefixTopLevel::get_compound_type(s)) - { } - - bool operator()(const prefix_view_on_full_compound& k1, const PrefixTopLevel& k2) const { - return lexicographical_tri_compare( - prefix_type->types().begin(), prefix_type->types().end(), - k1.begin(), k1.end(), - prefix_type->begin(k2), prefix_type->end(k2), - tri_compare) < 0; - } - - bool operator()(const PrefixTopLevel& k1, const prefix_view_on_full_compound& k2) const { - return lexicographical_tri_compare( - prefix_type->types().begin(), prefix_type->types().end(), - prefix_type->begin(k1), prefix_type->end(k1), - k2.begin(), k2.end(), - tri_compare) < 0; - } - }; -}; - -template -class prefix_view_on_prefix_compound { -public: - using iterator = typename compound_type::iterator; -private: - bytes_view _b; - unsigned _prefix_len; - iterator _begin; - iterator _end; -public: - prefix_view_on_prefix_compound(const schema& s, bytes_view b, unsigned prefix_len) - : _b(b) - , _prefix_len(prefix_len) - , _begin(TopLevel::get_compound_type(s)->begin(_b)) - , _end(_begin) - { - std::advance(_end, prefix_len); - } - - iterator begin() const { return _begin; } - iterator end() const { return _end; } - - struct less_compare_with_prefix { - typename TopLevel::compound prefix_type; - - less_compare_with_prefix(const schema& s) - : prefix_type(TopLevel::get_compound_type(s)) - { } - - bool operator()(const prefix_view_on_prefix_compound& k1, const TopLevel& k2) const { - return lexicographical_tri_compare( - prefix_type->types().begin(), prefix_type->types().end(), - k1.begin(), k1.end(), - prefix_type->begin(k2), prefix_type->end(k2), - tri_compare) < 0; - } - - bool operator()(const TopLevel& k1, const prefix_view_on_prefix_compound& k2) const { - return lexicographical_tri_compare( - prefix_type->types().begin(), prefix_type->types().end(), - prefix_type->begin(k1), prefix_type->end(k1), - k2.begin(), k2.end(), - tri_compare) < 0; - } - }; -}; - -template -class prefixable_full_compound : public compound_wrapper { - using base = compound_wrapper; -protected: - prefixable_full_compound(bytes&& b) : base(std::move(b)) {} -public: - using prefix_view_type = prefix_view_on_full_compound; - - bool is_prefixed_by(const schema& s, const PrefixTopLevel& prefix) const { - auto t = base::get_compound_type(s); - auto prefix_type = PrefixTopLevel::get_compound_type(s); - return ::is_prefixed_by(t->types().begin(), - t->begin(*this), t->end(*this), - prefix_type->begin(prefix), prefix_type->end(prefix), - ::equal); - } - - struct less_compare_with_prefix { - typename PrefixTopLevel::compound prefix_type; - typename TopLevel::compound full_type; - - less_compare_with_prefix(const schema& s) - : prefix_type(PrefixTopLevel::get_compound_type(s)) - , full_type(TopLevel::get_compound_type(s)) - { } - - bool operator()(const TopLevel& k1, const PrefixTopLevel& k2) const { - return lexicographical_tri_compare( - prefix_type->types().begin(), prefix_type->types().end(), - full_type->begin(k1), full_type->end(k1), - prefix_type->begin(k2), prefix_type->end(k2), - tri_compare) < 0; - } - - bool operator()(const PrefixTopLevel& k1, const TopLevel& k2) const { - return lexicographical_tri_compare( - prefix_type->types().begin(), prefix_type->types().end(), - prefix_type->begin(k1), prefix_type->end(k1), - full_type->begin(k2), full_type->end(k2), - tri_compare) < 0; - } - }; - - // In prefix equality two sequences are equal if any of them is a prefix - // of the other. Otherwise lexicographical ordering is applied. - // Note: full compounds sorted according to lexicographical ordering are also - // sorted according to prefix equality ordering. - struct prefix_equality_less_compare { - typename PrefixTopLevel::compound prefix_type; - typename TopLevel::compound full_type; - - prefix_equality_less_compare(const schema& s) - : prefix_type(PrefixTopLevel::get_compound_type(s)) - , full_type(TopLevel::get_compound_type(s)) - { } - - bool operator()(const TopLevel& k1, const PrefixTopLevel& k2) const { - return prefix_equality_tri_compare(prefix_type->types().begin(), - full_type->begin(k1), full_type->end(k1), - prefix_type->begin(k2), prefix_type->end(k2), - tri_compare) < 0; - } - - bool operator()(const PrefixTopLevel& k1, const TopLevel& k2) const { - return prefix_equality_tri_compare(prefix_type->types().begin(), - prefix_type->begin(k1), prefix_type->end(k1), - full_type->begin(k2), full_type->end(k2), - tri_compare) < 0; - } - }; - - prefix_view_type prefix_view(const schema& s, unsigned prefix_len) const { - return { s, this->representation(), prefix_len }; - } -}; - -template -class prefix_compound_view_wrapper : public compound_view_wrapper { - using base = compound_view_wrapper; -protected: - prefix_compound_view_wrapper(bytes_view v) - : compound_view_wrapper(v) - { } - -public: - bool is_full(const schema& s) const { - return TopLevel::get_compound_type(s)->is_full(base::_bytes); - } - - bool is_empty(const schema& s) const { - return TopLevel::get_compound_type(s)->is_empty(base::_bytes); - } -}; - -template -class prefix_compound_wrapper : public compound_wrapper { - using base = compound_wrapper; -protected: - prefix_compound_wrapper(managed_bytes&& b) : base(std::move(b)) {} -public: - using prefix_view_type = prefix_view_on_prefix_compound; - - prefix_view_type prefix_view(const schema& s, unsigned prefix_len) const { - return { s, this->representation(), prefix_len }; - } - - bool is_full(const schema& s) const { - return TopLevel::get_compound_type(s)->is_full(base::_bytes); - } - - bool is_empty(const schema& s) const { - return TopLevel::get_compound_type(s)->is_empty(base::_bytes); - } - - // Can be called only if is_full() - FullTopLevel to_full(const schema& s) const { - return FullTopLevel::from_exploded(s, base::explode(s)); - } - - bool is_prefixed_by(const schema& s, const TopLevel& prefix) const { - auto t = base::get_compound_type(s); - return ::is_prefixed_by(t->types().begin(), - t->begin(*this), t->end(*this), - t->begin(prefix), t->end(prefix), - equal); - } - - // In prefix equality two sequences are equal if any of them is a prefix - // of the other. Otherwise lexicographical ordering is applied. - // Note: full compounds sorted according to lexicographical ordering are also - // sorted according to prefix equality ordering. - struct prefix_equality_less_compare { - typename TopLevel::compound prefix_type; - - prefix_equality_less_compare(const schema& s) - : prefix_type(TopLevel::get_compound_type(s)) - { } - - bool operator()(const TopLevel& k1, const TopLevel& k2) const { - return prefix_equality_tri_compare(prefix_type->types().begin(), - prefix_type->begin(k1), prefix_type->end(k1), - prefix_type->begin(k2), prefix_type->end(k2), - tri_compare) < 0; - } - }; - - // See prefix_equality_less_compare. - struct prefix_equal_tri_compare { - typename TopLevel::compound prefix_type; - - prefix_equal_tri_compare(const schema& s) - : prefix_type(TopLevel::get_compound_type(s)) - { } - - int operator()(const TopLevel& k1, const TopLevel& k2) const { - return prefix_equality_tri_compare(prefix_type->types().begin(), - prefix_type->begin(k1), prefix_type->end(k1), - prefix_type->begin(k2), prefix_type->end(k2), - tri_compare); - } - }; -}; - -class partition_key_view : public compound_view_wrapper { -public: - using c_type = compound_type; -private: - partition_key_view(bytes_view v) - : compound_view_wrapper(v) - { } -public: - using compound = lw_shared_ptr; - - static partition_key_view from_bytes(bytes_view v) { - return { v }; - } - - static const compound& get_compound_type(const schema& s) { - return s.partition_key_type(); - } - - // Returns key's representation which is compatible with Origin. - // The result is valid as long as the schema is live. - const legacy_compound_view legacy_form(const schema& s) const; - - // A trichotomic comparator for ordering compatible with Origin. - int legacy_tri_compare(const schema& s, partition_key_view o) const; - - // Checks if keys are equal in a way which is compatible with Origin. - bool legacy_equal(const schema& s, partition_key_view o) const { - return legacy_tri_compare(s, o) == 0; - } - - // A trichotomic comparator which orders keys according to their ordering on the ring. - int ring_order_tri_compare(const schema& s, partition_key_view o) const; - - friend std::ostream& operator<<(std::ostream& out, const partition_key_view& pk); -}; - -class partition_key : public compound_wrapper { - explicit partition_key(managed_bytes&& b) - : compound_wrapper(std::move(b)) - { } -public: - using c_type = compound_type; - - template - static partition_key from_range(RangeOfSerializedComponents&& v) { - return partition_key(managed_bytes(c_type::serialize_value(std::forward(v)))); - } - - partition_key(std::vector v) - : compound_wrapper(managed_bytes(c_type::serialize_value(std::move(v)))) - { } - - partition_key(partition_key&& v) = default; - partition_key(const partition_key& v) = default; - partition_key(partition_key& v) = default; - partition_key& operator=(const partition_key&) = default; - partition_key& operator=(partition_key&) = default; - partition_key& operator=(partition_key&&) = default; - - partition_key(partition_key_view key) - : partition_key(managed_bytes(key.representation())) - { } - - using compound = lw_shared_ptr; - - static partition_key from_bytes(bytes_view b) { - return partition_key(managed_bytes(b)); - } - - static const compound& get_compound_type(const schema& s) { - return s.partition_key_type(); - } - - // Returns key's representation which is compatible with Origin. - // The result is valid as long as the schema is live. - const legacy_compound_view legacy_form(const schema& s) const { - return view().legacy_form(s); - } - - // A trichotomic comparator for ordering compatible with Origin. - int legacy_tri_compare(const schema& s, const partition_key& o) const { - return view().legacy_tri_compare(s, o); - } - - // Checks if keys are equal in a way which is compatible with Origin. - bool legacy_equal(const schema& s, const partition_key& o) const { - return view().legacy_equal(s, o); - } - - void validate(const schema& s) const { - return s.partition_key_type()->validate(representation()); - } - - friend std::ostream& operator<<(std::ostream& out, const partition_key& pk); -}; - -class exploded_clustering_prefix { - std::vector _v; -public: - exploded_clustering_prefix(std::vector&& v) : _v(std::move(v)) {} - exploded_clustering_prefix() {} - size_t size() const { - return _v.size(); - } - auto const& components() const { - return _v; - } - explicit operator bool() const { - return !_v.empty(); - } - bool is_full(const schema& s) const { - return _v.size() == s.clustering_key_size(); - } - friend std::ostream& operator<<(std::ostream& os, const exploded_clustering_prefix& ecp); -}; - -class clustering_key_prefix_view : public prefix_compound_view_wrapper { - clustering_key_prefix_view(bytes_view v) - : prefix_compound_view_wrapper(v) - { } -public: - static clustering_key_prefix_view from_bytes(bytes_view v) { - return { v }; - } - - using compound = lw_shared_ptr>; - - static const compound& get_compound_type(const schema& s) { - return s.clustering_key_prefix_type(); - } -}; - -class clustering_key_prefix : public prefix_compound_wrapper { - explicit clustering_key_prefix(managed_bytes&& b) - : prefix_compound_wrapper(std::move(b)) - { } -public: - template - static clustering_key_prefix from_range(RangeOfSerializedComponents&& v) { - return clustering_key_prefix(compound::element_type::serialize_value(std::forward(v))); - } - - clustering_key_prefix(std::vector v) - : prefix_compound_wrapper(compound::element_type::serialize_value(std::move(v))) - { } - - clustering_key_prefix(clustering_key_prefix&& v) = default; - clustering_key_prefix(const clustering_key_prefix& v) = default; - clustering_key_prefix(clustering_key_prefix& v) = default; - clustering_key_prefix& operator=(const clustering_key_prefix&) = default; - clustering_key_prefix& operator=(clustering_key_prefix&) = default; - clustering_key_prefix& operator=(clustering_key_prefix&&) = default; - - clustering_key_prefix(clustering_key_prefix_view v) - : clustering_key_prefix(managed_bytes(v.representation())) - { } - - using compound = lw_shared_ptr>; - - static clustering_key_prefix from_bytes(bytes_view b) { - return clustering_key_prefix(managed_bytes(b)); - } - - static const compound& get_compound_type(const schema& s) { - return s.clustering_key_prefix_type(); - } - - static clustering_key_prefix from_clustering_prefix(const schema& s, const exploded_clustering_prefix& prefix) { - return from_exploded(s, prefix.components()); - } - - friend std::ostream& operator<<(std::ostream& out, const clustering_key_prefix& ckp); -}; - diff --git a/scylla/lister.cc b/scylla/lister.cc deleted file mode 100644 index 8dd821d..0000000 --- a/scylla/lister.cc +++ /dev/null @@ -1,77 +0,0 @@ -#include -#include -#include "lister.hh" -#include "disk-error-handler.hh" -#include "checked-file-impl.hh" - -static seastar::logger llogger("lister"); - -lister::lister(file f, dir_entry_types type, walker_type walker, filter_type filter, lister::path dir, lister::show_hidden do_show_hidden) - : _f(std::move(f)) - , _walker(std::move(walker)) - , _filter(std::move(filter)) - , _expected_type(std::move(type)) - , _listing(_f.list_directory([this] (directory_entry de) { return visit(de); })) - , _dir(std::move(dir)) - , _show_hidden(do_show_hidden) {} - -future<> lister::visit(directory_entry de) { - return guarantee_type(std::move(de)).then([this] (directory_entry de) { - // Hide all synthetic directories and hidden files if not requested to show them. - if ((!_expected_type.empty() && !_expected_type.count(*(de.type))) || (!_show_hidden && de.name[0] == '.')) { - return make_ready_future<>(); - } - - // apply a filter - if (!_filter(_dir, de)) { - return make_ready_future<>(); - } - - return _walker(_dir, std::move(de)); - }); -} - -future<> lister::done() { - return _listing.done().then([this] { - return _f.close(); - }); -} - -future lister::guarantee_type(directory_entry de) { - if (de.type) { - return make_ready_future(std::move(de)); - } else { - auto f = engine().file_type((_dir / de.name.c_str()).native()); - return f.then([dir = _dir, de = std::move(de)] (std::experimental::optional t) mutable { - // If some FS error occures - return an exceptional future - if (!t) { - return make_exception_future(std::runtime_error(sprint("Failed to get %s type.", (dir / de.name.c_str()).native()))); - } - de.type = t; - return make_ready_future(std::move(de)); - }); - } -} - -future<> lister::scan_dir(lister::path dir, lister::dir_entry_types type, lister::show_hidden do_show_hidden, walker_type walker, filter_type filter) { - return open_checked_directory(general_disk_error_handler, dir.native()).then([type = std::move(type), walker = std::move(walker), filter = std::move(filter), dir, do_show_hidden] (file f) { - auto l = make_lw_shared(std::move(f), std::move(type), std::move(walker), std::move(filter), std::move(dir), do_show_hidden); - return l->done().then([l] { }); - }); -} - -future<> lister::rmdir(lister::path dir) { - // first, kill the contents of the directory - return lister::scan_dir(dir, {}, show_hidden::yes, [] (lister::path parent_dir, directory_entry de) mutable { - lister::path current_entry_path(parent_dir / de.name.c_str()); - - if (de.type.value() == directory_entry_type::directory) { - return rmdir(std::move(current_entry_path)); - } else { - return io_check(remove_file, current_entry_path.native()); - } - }).then([dir] { - // ...then kill the directory itself - return io_check(remove_file, dir.native()); - }); -} diff --git a/scylla/lister.hh b/scylla/lister.hh deleted file mode 100644 index cace29d..0000000 --- a/scylla/lister.hh +++ /dev/null @@ -1,165 +0,0 @@ -/* - * Copyright (C) 2017 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include -#include -#include -#include -#include - -#include "seastarx.hh" - -class lister final { -public: - using path = boost::filesystem::path; - /** - * Types of entries to list. If empty - list all present entries except for - * hidden if not requested to. - */ - using dir_entry_types = std::unordered_set>; - /** - * This callback is going to be called for each entry in the given directory - * that has the corresponding type and meets the filter demands. - * - * First parameter is a boost::filesystem::path object for the base directory. - * - * Second parameter is a directory_entry object of the file for which this - * callback is being called. - * - * The first parameter of the callback represents a parent directory of - * each entry defined by the second parameter. - */ - using walker_type = std::function (path, directory_entry)>; - using filter_type = std::function; - - struct show_hidden_tag {}; - using show_hidden = bool_class; - -private: - file _f; - walker_type _walker; - filter_type _filter; - dir_entry_types _expected_type; - subscription _listing; - path _dir; - show_hidden _show_hidden; - -public: - /** - * Scans the directory calling a "walker" callback for each entry that satisfies the filtering. - * - * @param dir Directory to scan. - * @param type Type of entries to process. Entries of other types will be ignored. - * @param do_show_hidden if TRUE - the hidden entries are going to be processed as well. - * @param walker A callback to be called for each entry that satisfies the filtering rules. - * @param filter A filter callback that is called for each entry of the requested type: if returns FALSE - the entry will be skipped. - * - * @return A future that resolves when all entries processing is finished or an error occurs. In the later case an exceptional future is returened. - */ - static future<> scan_dir(path dir, dir_entry_types type, show_hidden do_show_hidden, walker_type walker, filter_type filter); - - /** - * Overload of scan_dir() that uses a show_hidden::no when it's not given. - */ - static future<> scan_dir(path dir, dir_entry_types type, walker_type walker, filter_type filter) { - return scan_dir(std::move(dir), std::move(type), show_hidden::no, std::move(walker), std::move(filter)); - } - - /** - * Overload of scan_dir() that uses a show_hidden::no and a filter that returns TRUE for every entry when they are not given. - */ - static future<> scan_dir(path dir, dir_entry_types type, walker_type walker) { - return scan_dir(std::move(dir), std::move(type), show_hidden::no, std::move(walker), [] (const path& parent_dir, const directory_entry& entry) { return true; }); - } - - /** - * Overload of scan_dir() that uses a filter that returns TRUE for every entry when filter is not given. - */ - static future<> scan_dir(path dir, dir_entry_types type, show_hidden do_show_hidden, walker_type walker) { - return scan_dir(std::move(dir), std::move(type), do_show_hidden, std::move(walker), [] (const path& parent_dir, const directory_entry& entry) { return true; }); - } - - /** Overloads accepting sstring as the first parameter */ - static future<> scan_dir(sstring dir, dir_entry_types type, show_hidden do_show_hidden, walker_type walker, filter_type filter) { - return scan_dir(path(std::move(dir)), std::move(type), do_show_hidden, std::move(walker), std::move(filter)); - } - static future<> scan_dir(sstring dir, dir_entry_types type, walker_type walker, filter_type filter) { - return scan_dir(path(std::move(dir)), std::move(type), show_hidden::no, std::move(walker), std::move(filter)); - } - static future<> scan_dir(sstring dir, dir_entry_types type, walker_type walker) { - return scan_dir(path(std::move(dir)), std::move(type), show_hidden::no, std::move(walker), [] (const path& parent_dir, const directory_entry& entry) { return true; }); - } - static future<> scan_dir(sstring dir, dir_entry_types type, show_hidden do_show_hidden, walker_type walker) { - return scan_dir(path(std::move(dir)), std::move(type), do_show_hidden, std::move(walker), [] (const path& parent_dir, const directory_entry& entry) { return true; }); - } - - /** - * Removes the given directory with all its contents (like 'rm -rf ' shell command). - * - * @param dir Directory to remove. - * @return A future that resolves when the operation is complete or an error occurs. - */ - static future<> rmdir(path dir); - - /** - * Constructor - * - * @param f A file instance for the directory to scan. - * @param type Types of entries to scan. - * @param walker A callback to be called for each entry that satisfies the filtering rules. - * @param filter A filter callback that is called for each entry of the requested type: if returns FALSE - the entry will be skipped. - * @param dir A seastar::path object for the directory to scan. - * @param do_show_hidden if TRUE - scan hidden entries as well. - */ - lister(file f, dir_entry_types type, walker_type walker, filter_type filter, path dir, show_hidden do_show_hidden); - - /** - * @return a future that resolves when the directory scanning is complete. - */ - future<> done(); - -private: - /** - * Handle a single entry. - * - * @param de Descriptor of the entry to handle. - * @return A future that resolves when the handling is over. - */ - future<> visit(directory_entry de); - - /** - * Validates that the input parameter has its "type" optional field engaged. - * - * This helper method is called before further processing the @param de in order - * to ensure that its "type" field is engaged. - * - * If it is engaged - returns the input value as is. - * If "type" isn't engaged - calls the engine().file_type() for file represented by @param de and sets - * "type" field of @param de to the returned value and then returns @param de. - * - * @param de entry to check and return - * @return a future that resolves with the @param de with the engaged de.type field or an - * exceptional future with std::system_error exception if type of the file represented by @param de may not be retrieved. - */ - future guarantee_type(directory_entry de); -}; diff --git a/scylla/locator/abstract_replication_strategy.cc b/scylla/locator/abstract_replication_strategy.cc deleted file mode 100644 index f7d5c1d..0000000 --- a/scylla/locator/abstract_replication_strategy.cc +++ /dev/null @@ -1,215 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "locator/abstract_replication_strategy.hh" -#include "utils/class_registrator.hh" -#include "exceptions/exceptions.hh" -#include "stdx.hh" - -namespace locator { - -logging::logger abstract_replication_strategy::logger("replication_strategy"); - -abstract_replication_strategy::abstract_replication_strategy( - const sstring& ks_name, - token_metadata& token_metadata, - snitch_ptr& snitch, - const std::map& config_options, - replication_strategy_type my_type) - : _ks_name(ks_name) - , _config_options(config_options) - , _token_metadata(token_metadata) - , _snitch(snitch) - , _my_type(my_type) {} - -std::unique_ptr abstract_replication_strategy::create_replication_strategy(const sstring& ks_name, const sstring& strategy_name, token_metadata& tk_metadata, const std::map& config_options) { - assert(locator::i_endpoint_snitch::get_local_snitch_ptr()); - try { - return create_object&> - (strategy_name, ks_name, tk_metadata, - locator::i_endpoint_snitch::get_local_snitch_ptr(), config_options); - } catch (const no_such_class& e) { - throw exceptions::configuration_exception(e.what()); - } -} - -void abstract_replication_strategy::validate_replication_strategy(const sstring& ks_name, - const sstring& strategy_name, - token_metadata& token_metadata, - const std::map& config_options) -{ - auto strategy = create_replication_strategy(ks_name, strategy_name, token_metadata, config_options); - strategy->validate_options(); - auto expected = strategy->recognized_options(); - if (expected) { - for (auto&& item : config_options) { - sstring key = item.first; - if (!expected->count(key)) { - throw exceptions::configuration_exception(sprint("Unrecognized strategy option {%s} passed to %s for keyspace %s", key, strategy_name, ks_name)); - } - } - } -} - -std::vector abstract_replication_strategy::get_natural_endpoints(const token& search_token) { - const token& key_token = _token_metadata.first_token(search_token); - auto& cached_endpoints = get_cached_endpoints(); - auto res = cached_endpoints.find(key_token); - - if (res == cached_endpoints.end()) { - auto endpoints = calculate_natural_endpoints(search_token, _token_metadata); - cached_endpoints.emplace(key_token, endpoints); - - return std::move(endpoints); - } - - ++_cache_hits_count; - return res->second; -} - -void abstract_replication_strategy::validate_replication_factor(sstring rf) const -{ - try { - if (std::stol(rf) < 0) { - throw exceptions::configuration_exception( - sstring("Replication factor must be non-negative; found ") + rf); - } - } catch (...) { - throw exceptions::configuration_exception( - sstring("Replication factor must be numeric; found ") + rf); - } -} - -inline std::unordered_map>& -abstract_replication_strategy::get_cached_endpoints() { - if (_last_invalidated_ring_version != _token_metadata.get_ring_version()) { - _cached_endpoints.clear(); - _last_invalidated_ring_version = _token_metadata.get_ring_version(); - } - - return _cached_endpoints; -} - -static -void -insert_token_range_to_sorted_container_while_unwrapping( - const dht::token& prev_tok, - const dht::token& tok, - dht::token_range_vector& ret) { - if (prev_tok < tok) { - ret.emplace_back( - dht::token_range::bound(prev_tok, false), - dht::token_range::bound(tok, true)); - } else { - ret.emplace_back( - dht::token_range::bound(prev_tok, false), - stdx::nullopt); - // Insert in front to maintain sorded order - ret.emplace( - ret.begin(), - stdx::nullopt, - dht::token_range::bound(tok, true)); - } -} - -dht::token_range_vector -abstract_replication_strategy::get_ranges(inet_address ep) const { - dht::token_range_vector ret; - auto prev_tok = _token_metadata.sorted_tokens().back(); - for (auto tok : _token_metadata.sorted_tokens()) { - for (inet_address a : calculate_natural_endpoints(tok, _token_metadata)) { - if (a == ep) { - insert_token_range_to_sorted_container_while_unwrapping(prev_tok, tok, ret); - break; - } - } - prev_tok = tok; - } - return ret; -} - -dht::token_range_vector -abstract_replication_strategy::get_primary_ranges(inet_address ep) { - dht::token_range_vector ret; - auto prev_tok = _token_metadata.sorted_tokens().back(); - for (auto tok : _token_metadata.sorted_tokens()) { - auto&& eps = calculate_natural_endpoints(tok, _token_metadata); - if (eps.size() > 0 && eps[0] == ep) { - insert_token_range_to_sorted_container_while_unwrapping(prev_tok, tok, ret); - } - prev_tok = tok; - } - return ret; -} - -std::unordered_multimap -abstract_replication_strategy::get_address_ranges(token_metadata& tm) const { - std::unordered_multimap ret; - for (auto& t : tm.sorted_tokens()) { - dht::token_range_vector r = tm.get_primary_ranges_for(t); - auto eps = calculate_natural_endpoints(t, tm); - logger.debug("token={}, primary_range={}, address={}", t, r, eps); - for (auto ep : eps) { - for (auto&& rng : r) { - ret.emplace(ep, rng); - } - } - } - return ret; -} - -std::unordered_multimap -abstract_replication_strategy::get_range_addresses(token_metadata& tm) const { - std::unordered_multimap ret; - for (auto& t : tm.sorted_tokens()) { - dht::token_range_vector r = tm.get_primary_ranges_for(t); - auto eps = calculate_natural_endpoints(t, tm); - for (auto ep : eps) { - for (auto&& rng : r) - ret.emplace(rng, ep); - } - } - return ret; -} - -dht::token_range_vector -abstract_replication_strategy::get_pending_address_ranges(token_metadata& tm, token pending_token, inet_address pending_address) { - return get_pending_address_ranges(tm, std::unordered_set{pending_token}, pending_address); -} - -dht::token_range_vector -abstract_replication_strategy::get_pending_address_ranges(token_metadata& tm, std::unordered_set pending_tokens, inet_address pending_address) { - dht::token_range_vector ret; - auto temp = tm.clone_only_token_map(); - temp.update_normal_tokens(pending_tokens, pending_address); - for (auto& x : get_address_ranges(temp)) { - if (x.first == pending_address) { - ret.push_back(x.second); - } - } - return ret; -} - -} // namespace locator diff --git a/scylla/locator/abstract_replication_strategy.hh b/scylla/locator/abstract_replication_strategy.hh deleted file mode 100644 index 83e2b7f..0000000 --- a/scylla/locator/abstract_replication_strategy.hh +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include -#include -#include -#include "gms/inet_address.hh" -#include "dht/i_partitioner.hh" -#include "token_metadata.hh" -#include "snitch_base.hh" - -// forward declaration since database.hh includes this file -class keyspace; - -namespace locator { - -using inet_address = gms::inet_address; -using token = dht::token; - -enum class replication_strategy_type { - simple, - local, - network_topology, - everywhere_topology, -}; - -class abstract_replication_strategy { -private: - long _last_invalidated_ring_version = 0; - std::unordered_map> _cached_endpoints; - uint64_t _cache_hits_count = 0; - - static logging::logger logger; - - std::unordered_map>& - get_cached_endpoints(); -protected: - sstring _ks_name; - // TODO: Do we need this member at all? - //keyspace* _keyspace = nullptr; - std::map _config_options; - token_metadata& _token_metadata; - snitch_ptr& _snitch; - replication_strategy_type _my_type; - - template - void err(const char* fmt, Args&&... args) const { - logger.error(fmt, std::forward(args)...); - } - - template - void warn(const char* fmt, Args&&... args) const { - logger.warn(fmt, std::forward(args)...); - } - - template - void debug(const char* fmt, Args&&... args) const { - logger.debug(fmt, std::forward(args)...); - } - - void validate_replication_factor(sstring rf) const; - -public: - abstract_replication_strategy( - const sstring& keyspace_name, - token_metadata& token_metadata, - snitch_ptr& snitch, - const std::map& config_options, - replication_strategy_type my_type); - virtual std::vector calculate_natural_endpoints(const token& search_token, token_metadata& tm) const = 0; - virtual ~abstract_replication_strategy() {} - static std::unique_ptr create_replication_strategy(const sstring& ks_name, const sstring& strategy_name, token_metadata& token_metadata, const std::map& config_options); - static void validate_replication_strategy(const sstring& ks_name, - const sstring& strategy_name, - token_metadata& token_metadata, - const std::map& config_options); - virtual std::vector get_natural_endpoints(const token& search_token); - virtual void validate_options() const = 0; - virtual std::experimental::optional> recognized_options() const = 0; - virtual size_t get_replication_factor() const = 0; - uint64_t get_cache_hits_count() const { return _cache_hits_count; } - replication_strategy_type get_type() const { return _my_type; } - - // get_ranges() returns the list of ranges held by the given endpoint. - // The list is sorted, and its elements are non overlapping and non wrap-around. - // It the analogue of Origin's getAddressRanges().get(endpoint). - // This function is not efficient, and not meant for the fast path. - dht::token_range_vector get_ranges(inet_address ep) const; - // get_primary_ranges() returns the list of "primary ranges" for the given - // endpoint. "Primary ranges" are the ranges that the node is responsible - // for storing replica primarily, which means this is the first node - // returned calculate_natural_endpoints(). - // This function is the analogue of Origin's - // StorageService.getPrimaryRangesForEndpoint(). - dht::token_range_vector get_primary_ranges(inet_address ep); - - std::unordered_multimap get_address_ranges(token_metadata& tm) const; - - std::unordered_multimap get_range_addresses(token_metadata& tm) const; - - dht::token_range_vector get_pending_address_ranges(token_metadata& tm, token pending_token, inet_address pending_address); - - dht::token_range_vector get_pending_address_ranges(token_metadata& tm, std::unordered_set pending_tokens, inet_address pending_address); -}; - -} diff --git a/scylla/locator/ec2_multi_region_snitch.cc b/scylla/locator/ec2_multi_region_snitch.cc deleted file mode 100644 index 1d14e29..0000000 --- a/scylla/locator/ec2_multi_region_snitch.cc +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "locator/ec2_multi_region_snitch.hh" -#include "locator/reconnectable_snitch_helper.hh" -#include "gms/gossiper.hh" -#include "service/storage_service.hh" - -namespace locator { -ec2_multi_region_snitch::ec2_multi_region_snitch(const sstring& fname, unsigned io_cpu_id) - : ec2_snitch(fname, io_cpu_id) {} - -future<> ec2_multi_region_snitch::start() { - _state = snitch_state::initializing; - - return ec2_snitch::load_config().then([this] { - if (engine().cpu_id() == io_cpu_id()) { - return aws_api_call(AWS_QUERY_SERVER_ADDR, PUBLIC_IP_QUERY_REQ).then([this](sstring pub_addr){ - inet_address local_public_address = inet_address(pub_addr); - logger().info("Ec2MultiRegionSnitch using publicIP as identifier: {}", local_public_address); - - // - // Use the Public IP to broadcast Address to other nodes. - // - // Cassandra 2.1 manual explicitly instructs to set broadcast_address - // value to a public address in cassandra.yaml. - // - utils::fb_utilities::set_broadcast_address(local_public_address); - utils::fb_utilities::set_broadcast_rpc_address(local_public_address); - - return aws_api_call(AWS_QUERY_SERVER_ADDR, PRIVATE_IP_QUERY_REQ).then( - [this] (sstring priv_addr) { - _local_private_address = priv_addr; - }); - }).then([this] { - // - // Gossiper main instance is currently running on CPU0 - - // therefore we need to make sure the _local_private_address is - // set on the shard0 so that it may be used when Gossiper is - // going to invoke gossiper_starting() method. - // - return _my_distributed->invoke_on(0, - [this] (snitch_ptr& local_s) { - - if (engine().cpu_id() != io_cpu_id()) { - local_s->set_local_private_addr(_local_private_address); - } - }); - }).then([this] { - set_snitch_ready(); - }); - } - - set_snitch_ready(); - return make_ready_future<>(); - }); -} - -void ec2_multi_region_snitch::set_local_private_addr(const sstring& addr_str) { - _local_private_address = addr_str; -} - -future<> ec2_multi_region_snitch::gossiper_starting() { - // - // Note: currently gossiper "main" instance always runs on CPU0 therefore - // this function will be executed on CPU0 only. - // - ec2_snitch::gossiper_starting(); - - using namespace gms; - auto& g = get_local_gossiper(); - auto& ss = service::get_local_storage_service(); - - return g.add_local_application_state(application_state::INTERNAL_IP, - ss.value_factory.internal_ip(_local_private_address)).then([this] { - if (!_gossip_started) { - gms::get_local_gossiper().register_(make_shared(_my_dc)); - _gossip_started = true; - } - }); - -} - -using registry_2_params = class_registrator; -static registry_2_params registrator2("org.apache.cassandra.locator.Ec2MultiRegionSnitch"); -static registry_2_params registrator2_short_name("Ec2MultiRegionSnitch"); - - -using registry_1_param = class_registrator; -static registry_1_param registrator1("org.apache.cassandra.locator.Ec2MultiRegionSnitch"); -static registry_1_param registrator1_short_name("Ec2MultiRegionSnitch"); - -using registry_default = class_registrator; -static registry_default registrator_default("org.apache.cassandra.locator.Ec2MultiRegionSnitch"); -static registry_default registrator_default_short_name("Ec2MultiRegionSnitch"); - -} // namespace locator diff --git a/scylla/locator/ec2_multi_region_snitch.hh b/scylla/locator/ec2_multi_region_snitch.hh deleted file mode 100644 index 3185bad..0000000 --- a/scylla/locator/ec2_multi_region_snitch.hh +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "locator/ec2_snitch.hh" - -namespace locator { -class ec2_multi_region_snitch : public ec2_snitch { -public: - ec2_multi_region_snitch(const sstring& fname = "", unsigned io_cpu_id = 0); - virtual future<> gossiper_starting() override; - virtual future<> start() override; - virtual void set_local_private_addr(const sstring& addr_str) override; - virtual sstring get_name() const override { - return "org.apache.cassandra.locator.Ec2MultiRegionSnitch"; - } -private: - static constexpr const char* PUBLIC_IP_QUERY_REQ = "/latest/meta-data/public-ipv4"; - static constexpr const char* PRIVATE_IP_QUERY_REQ = "/latest/meta-data/local-ipv4"; - sstring _local_private_address; -}; -} // namespace locator diff --git a/scylla/locator/ec2_snitch.cc b/scylla/locator/ec2_snitch.cc deleted file mode 100644 index 08b5bf6..0000000 --- a/scylla/locator/ec2_snitch.cc +++ /dev/null @@ -1,128 +0,0 @@ -#include "locator/ec2_snitch.hh" - -namespace locator { - -ec2_snitch::ec2_snitch(const sstring& fname, unsigned io_cpuid) : production_snitch_base(fname) { - if (engine().cpu_id() == io_cpuid) { - io_cpu_id() = io_cpuid; - } -} - -/** - * Read AWS and property file configuration and distribute it among other shards - * - * @return - */ -future<> ec2_snitch::load_config() { - using namespace boost::algorithm; - - if (engine().cpu_id() == io_cpu_id()) { - return aws_api_call(AWS_QUERY_SERVER_ADDR, ZONE_NAME_QUERY_REQ).then([this](sstring az){ - assert(az.size()); - - std::vector splits; - - // Split "us-east-1a" or "asia-1a" into "us-east"/"1a" and "asia"/"1a". - split(splits, az, is_any_of("-")); - assert(splits.size() > 1); - - _my_rack = splits[splits.size() - 1]; - - // hack for CASSANDRA-4026 - _my_dc = az.substr(0, az.size() - 1); - if (_my_dc[_my_dc.size() - 1] == '1') { - _my_dc = az.substr(0, az.size() - 3); - } - - return read_property_file().then([this] (sstring datacenter_suffix) { - _my_dc += datacenter_suffix; - logger().info("Ec2Snitch using region: {}, zone: {}.", _my_dc, _my_rack); - - return _my_distributed->invoke_on_all( - [this] (snitch_ptr& local_s) { - - // Distribute the new values on all CPUs but the current one - if (engine().cpu_id() != io_cpu_id()) { - local_s->set_my_dc(_my_dc); - local_s->set_my_rack(_my_rack); - } - }); - }); - }); - } - - return make_ready_future<>(); -} - -future<> ec2_snitch::start() { - _state = snitch_state::initializing; - - return load_config().then([this] { - set_snitch_ready(); - }); -} - -future ec2_snitch::aws_api_call(sstring addr, sstring cmd) { - return engine().net().connect(make_ipv4_address(ipv4_addr{addr})) - .then([this, addr, cmd] (connected_socket fd) { - _sd = std::move(fd); - _in = std::move(_sd.input()); - _out = std::move(_sd.output()); - _zone_req = sstring("GET ") + cmd + - sstring(" HTTP/1.1\r\nHost: ") +addr + - sstring("\r\n\r\n"); - - return _out.write(_zone_req.c_str()).then([this] { - return _out.flush(); - }); - }).then([this] { - _parser.init(); - return _in.consume(_parser).then([this] { - if (_parser.eof()) { - return make_exception_future("Bad HTTP response"); - } - - // Read HTTP response header first - auto _rsp = _parser.get_parsed_response(); - auto it = _rsp->_headers.find("Content-Length"); - if (it == _rsp->_headers.end()) { - return make_exception_future("Error: HTTP response does not contain: Content-Length\n"); - } - - auto content_len = std::stoi(it->second); - - // Read HTTP response body - return _in.read_exactly(content_len).then([this] (temporary_buffer buf) { - sstring res(buf.get(), buf.size()); - - return make_ready_future(std::move(res)); - }); - }); - }); -} - -future ec2_snitch::read_property_file() { - return load_property_file().then([this] { - sstring dc_suffix; - - if (_prop_values.count(dc_suffix_property_key)) { - dc_suffix = _prop_values[dc_suffix_property_key]; - } - - return dc_suffix; - }); -} - -using registry_2_params = class_registrator; -static registry_2_params registrator2("org.apache.cassandra.locator.Ec2Snitch"); -static registry_2_params registrator2_short_name("Ec2Snitch"); - - -using registry_1_param = class_registrator; -static registry_1_param registrator1("org.apache.cassandra.locator.Ec2Snitch"); -static registry_1_param registrator1_short_name("Ec2Snitch"); - -using registry_default = class_registrator; -static registry_default registrator_default("org.apache.cassandra.locator.Ec2Snitch"); -static registry_default registrator_default_short_name("Ec2Snitch"); -} // namespace locator diff --git a/scylla/locator/ec2_snitch.hh b/scylla/locator/ec2_snitch.hh deleted file mode 100644 index 954ca06..0000000 --- a/scylla/locator/ec2_snitch.hh +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ -#pragma once - -#include "locator/production_snitch_base.hh" -#include "http/http_response_parser.hh" - -namespace locator { -class ec2_snitch : public production_snitch_base { -public: - static constexpr const char* ZONE_NAME_QUERY_REQ = "/latest/meta-data/placement/availability-zone"; - static constexpr const char* AWS_QUERY_SERVER_ADDR = "169.254.169.254:80"; - - ec2_snitch(const sstring& fname = "", unsigned io_cpu_id = 0); - virtual future<> start() override; - virtual sstring get_name() const override { - return "org.apache.cassandra.locator.Ec2Snitch"; - } -protected: - future<> load_config(); - future aws_api_call(sstring addr, const sstring cmd); - future read_property_file(); -private: - connected_socket _sd; - input_stream _in; - output_stream _out; - http_response_parser _parser; - sstring _zone_req; -}; -} // namespace locator diff --git a/scylla/locator/everywhere_replication_strategy.cc b/scylla/locator/everywhere_replication_strategy.cc deleted file mode 100644 index 9ececbd..0000000 --- a/scylla/locator/everywhere_replication_strategy.cc +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - - -#include "locator/everywhere_replication_strategy.hh" -#include "utils/class_registrator.hh" - -namespace locator { - -everywhere_replication_strategy::everywhere_replication_strategy(const sstring& keyspace_name, token_metadata& token_metadata, snitch_ptr& snitch, const std::map& config_options) : - abstract_replication_strategy(keyspace_name, token_metadata, snitch, config_options, replication_strategy_type::everywhere_topology) {} - -using registry = class_registrator&>; -static registry registrator("org.apache.cassandra.locator.EverywhereStrategy"); -static registry registrator_short_name("EverywhereStrategy"); -} diff --git a/scylla/locator/everywhere_replication_strategy.hh b/scylla/locator/everywhere_replication_strategy.hh deleted file mode 100644 index 3896e9a..0000000 --- a/scylla/locator/everywhere_replication_strategy.hh +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "locator/abstract_replication_strategy.hh" -#include - -namespace locator { -class everywhere_replication_strategy : public abstract_replication_strategy { -public: - everywhere_replication_strategy(const sstring& keyspace_name, token_metadata& token_metadata, snitch_ptr& snitch, const std::map& config_options); - - virtual std::vector calculate_natural_endpoints(const token& search_token, token_metadata& tm) const override { - return tm.get_all_endpoints(); - } - - virtual void validate_options() const override { /* noop */ } - - std::experimental::optional> recognized_options() const override { - // We explicitely allow all options - return std::experimental::nullopt; - } - - virtual size_t get_replication_factor() const override { - return _token_metadata.get_all_endpoints_count(); - } -}; -} diff --git a/scylla/locator/gossiping_property_file_snitch.cc b/scylla/locator/gossiping_property_file_snitch.cc deleted file mode 100644 index c0a7b3c..0000000 --- a/scylla/locator/gossiping_property_file_snitch.cc +++ /dev/null @@ -1,349 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "locator/gossiping_property_file_snitch.hh" -#include "gms/versioned_value.hh" - -namespace locator { -future gossiping_property_file_snitch::property_file_was_modified() { - return open_file_dma(_prop_file_name, open_flags::ro) - .then([this](file f) { - return do_with(std::move(f), [] (file& f) { - return f.stat(); - }); - }).then_wrapped([this] (auto&& f) { - try { - auto st = std::get<0>(f.get()); - - if (!_last_file_mod || - _last_file_mod->tv_sec != st.st_mtim.tv_sec) { - _last_file_mod = st.st_mtim; - return true; - } else { - return false; - } - } catch (...) { - logger().error("Failed to open {} for read or to get stats", _prop_file_name); - throw; - } - }); -} - -gossiping_property_file_snitch::gossiping_property_file_snitch( - const sstring& fname, unsigned io_cpuid) -: production_snitch_base(fname), _file_reader_cpu_id(io_cpuid) { - if (engine().cpu_id() == _file_reader_cpu_id) { - io_cpu_id() = _file_reader_cpu_id; - } -} - -future<> gossiping_property_file_snitch::start() { - using namespace std::chrono_literals; - - _state = snitch_state::initializing; - - reset_io_state(); - - // Run a timer only on specific CPU - if (engine().cpu_id() == _file_reader_cpu_id) { - // - // Here we will create a timer that will read the properties file every - // minute and load its contents into the gossiper.endpoint_state_map - // - _file_reader.set_callback([this] { - periodic_reader_callback(); - }); - - return read_property_file().then([this] { - start_io(); - set_snitch_ready(); - return make_ready_future<>(); - }); - } - - set_snitch_ready(); - return make_ready_future<>(); -} - -void gossiping_property_file_snitch::periodic_reader_callback() { - _file_reader_runs = true; - property_file_was_modified().then([this] (bool was_modified) { - - if (was_modified) { - return read_property_file(); - } - - return make_ready_future<>(); - }).then_wrapped([this] (auto&& f) { - try { - f.get(); - } catch (...) { - logger().error("Exception has been thrown when parsing the property file."); - } - - if (_state == snitch_state::stopping || _state == snitch_state::io_pausing) { - this->set_stopped(); - } else if (_state != snitch_state::stopped) { - _file_reader.arm(reload_property_file_period()); - } - - _file_reader_runs = false; - }); -} - -future<> gossiping_property_file_snitch::gossiper_starting() { - using namespace gms; - using namespace service; - // - // Note: currently gossiper "main" instance always runs on CPU0 therefore - // this function will be executed on CPU0 only. - // - auto& g = get_local_gossiper(); - auto& ss = get_local_storage_service(); - - auto local_internal_addr = netw::get_local_messaging_service().listen_address(); - std::ostringstream ostrm; - - ostrm< gossiping_property_file_snitch::read_property_file() { - using namespace exceptions; - - return load_property_file().then([this] { - return reload_configuration(); - }).then_wrapped([this] (auto&& f) { - try { - f.get(); - return make_ready_future<>(); - } catch (...) { - // - // In case of an error: - // - Halt if in the constructor. - // - Print an error when reloading. - // - if (_state == snitch_state::initializing) { - logger().error("Failed to parse a properties file ({}). Halting...", _prop_file_name); - throw; - } else { - logger().warn("Failed to reload a properties file ({}). Using previous values.", _prop_file_name); - return make_ready_future<>(); - } - } - }); -} - -future<> gossiping_property_file_snitch::reload_configuration() { - // "prefer_local" is FALSE by default - bool new_prefer_local = false; - sstring new_dc; - sstring new_rack; - - // Rack and Data Center have to be defined in the properties file! - if (!_prop_values.count(dc_property_key) || !_prop_values.count(rack_property_key)) { - throw_incomplete_file(); - } - - new_dc = _prop_values[dc_property_key]; - new_rack = _prop_values[rack_property_key]; - - if (_prop_values.count(prefer_local_property_key)) { - if (_prop_values[prefer_local_property_key] == "false") { - new_prefer_local = false; - } else if (_prop_values[prefer_local_property_key] == "true") { - new_prefer_local = true; - } else { - throw_bad_format("prefer_local configuration is malformed"); - } - } - - if (_state == snitch_state::initializing || _my_dc != new_dc || - _my_rack != new_rack || _prefer_local != new_prefer_local) { - - _my_dc = new_dc; - _my_rack = new_rack; - _prefer_local = new_prefer_local; - - assert(_my_distributed); - - return _my_distributed->invoke_on_all( - [this] (snitch_ptr& local_s) { - - // Distribute the new values on all CPUs but the current one - if (engine().cpu_id() != _file_reader_cpu_id) { - local_s->set_my_dc(_my_dc); - local_s->set_my_rack(_my_rack); - local_s->set_prefer_local(_prefer_local); - } - }).then([this] { - return seastar::async([this] { - // reload Gossiper state (executed on CPU0 only) - smp::submit_to(0, [] { - auto& local_snitch_ptr = get_local_snitch_ptr(); - local_snitch_ptr->reload_gossiper_state(); - }).get(); - - // update Storage Service on each shard - auto cpus = boost::irange(0u, smp::count); - parallel_for_each(cpus.begin(), cpus.end(), [] (unsigned int c) { - return smp::submit_to(c, [] { - if (service::get_storage_service().local_is_initialized()) { - auto& tmd = service::get_local_storage_service().get_token_metadata(); - - // initiate the token metadata endpoints cache reset - tmd.invalidate_cached_rings(); - // re-read local rack and DC info - tmd.update_topology(utils::fb_utilities::get_broadcast_address()); - } - }); - }).get(); - - - // spread the word... - smp::submit_to(0, [] { - auto& local_snitch_ptr = get_local_snitch_ptr(); - if (local_snitch_ptr->local_gossiper_started() && service::get_storage_service().local_is_initialized()) { - return service::get_local_storage_service().gossip_snitch_info(); - } - - return make_ready_future<>(); - }).get(); - }); - }); - } - - return make_ready_future<>(); -} - -void gossiping_property_file_snitch::set_stopped() { - if (_state == snitch_state::stopping) { - _state = snitch_state::stopped; - } else { - _state = snitch_state::io_paused; - } - - _io_is_stopped.set_value(); -} - -future<> gossiping_property_file_snitch::stop_io() { - if (engine().cpu_id() == _file_reader_cpu_id) { - _file_reader.cancel(); - - // If timer is not running then set the STOPPED state right away. - if (!_file_reader_runs) { - set_stopped(); - } - } else { - set_stopped(); - } - - return _io_is_stopped.get_future(); -} - -void gossiping_property_file_snitch::resume_io() { - reset_io_state(); - start_io(); - set_snitch_ready(); -} - -void gossiping_property_file_snitch::start_io() { - // Run a timer only on specific CPU - if (engine().cpu_id() == _file_reader_cpu_id) { - _file_reader.arm(reload_property_file_period()); - } -} - -future<> gossiping_property_file_snitch::stop() { - if (_state == snitch_state::stopped || _state == snitch_state::io_paused) { - return make_ready_future<>(); - } - - _state = snitch_state::stopping; - - return stop_io(); -} - -future<> gossiping_property_file_snitch::pause_io() { - if (_state == snitch_state::stopped || _state == snitch_state::io_paused) { - return make_ready_future<>(); - } - - _state = snitch_state::io_pausing; - - return stop_io(); -} - -// should be invoked of CPU0 only -void gossiping_property_file_snitch::reload_gossiper_state() { - if (!_gossip_started) { - return; - } - - if (_reconnectable_helper) { - gms::get_local_gossiper().unregister_(_reconnectable_helper); - } - - if (!_prefer_local) { - return; - } - - _reconnectable_helper = make_shared(_my_dc); - gms::get_local_gossiper().register_(_reconnectable_helper); -} - -using registry_2_params = class_registrator; -static registry_2_params registrator2("org.apache.cassandra.locator.GossipingPropertyFileSnitch"); - -using registry_1_param = class_registrator; -static registry_1_param registrator1("org.apache.cassandra.locator.GossipingPropertyFileSnitch"); - -using registry_default = class_registrator; -static registry_default registrator_default("org.apache.cassandra.locator.GossipingPropertyFileSnitch"); -static registry_default registrator_default_short_name("GossipingPropertyFileSnitch"); -} // namespace locator diff --git a/scylla/locator/gossiping_property_file_snitch.hh b/scylla/locator/gossiping_property_file_snitch.hh deleted file mode 100644 index c23f273..0000000 --- a/scylla/locator/gossiping_property_file_snitch.hh +++ /dev/null @@ -1,143 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include -#include -#include -#include -#include -#include "production_snitch_base.hh" -#include "exceptions/exceptions.hh" -#include "service/storage_service.hh" -#include "core/file.hh" -#include "log.hh" -#include "locator/reconnectable_snitch_helper.hh" - -namespace locator { - -/** - * cassandra-rackdc.properties file has the following format: - * - * dc= - * rack= - * prefer_local= - */ -class gossiping_property_file_snitch : public production_snitch_base { -public: - // Check the property file for changes every 60s. - static constexpr timer::duration reload_property_file_period() { - return std::chrono::seconds(60); - } - - virtual future<> gossiper_starting() override; - virtual future<> stop() override; - virtual future<> start() override; - virtual future<> pause_io() override; - virtual void resume_io() override; - virtual sstring get_name() const override { - return "org.apache.cassandra.locator.GossipingPropertyFileSnitch"; - } - - gossiping_property_file_snitch( - const sstring& fname = "", - unsigned io_cpuid = 0); - - /** - * This function register a Gossiper subscriber to reconnect according to - * the new "prefer_local" value, namely use either an internal or extenal IP - * address. - * - * @note Currently in order to be backward compatible we are mimicking the C* - * behavior, which is a bit strange: while allowing the change of - * prefer_local value during the same run it won't actually trigger - * disconnect from all remote nodes as would be logical (in order to - * connect using a new configuration). On the contrary, if the new - * prefer_local value is TRUE, it will trigger the reconnect only when - * there is a corresponding gossip event (e.g. on_change()) from the - * corresponding node has been accepted. If the new value is FALSE - * then it won't trigger disconnect at all! And in any case a remote - * node will be reconnected using the PREFERED_IP value stored in the - * system_table.peer. - * - * This is currently relevant to EC2/GCE(?) only. - */ - virtual void reload_gossiper_state() override; - -private: - void periodic_reader_callback(); - - /** - * Parse the property file and indicate the StorageService and a Gossiper if - * there was a configuration change. - * - * @return a ready-future when we are done - */ - future<> reload_configuration(); - - /** - * Check if the property file has been modified since the last time we - * parsed it. - * - * @return TRUE if property file has been modified - */ - future property_file_was_modified(); - - /** - * Read the propery file if it has changed since the last time we read it. - */ - future<> read_property_file(); - - /** - * Indicate that the snitch has stopped its I/O. - */ - void set_stopped(); - - future<> stop_io(); - void start_io(); - -private: - timer _file_reader; - std::experimental::optional _last_file_mod; - std::istringstream _istrm; - bool _file_reader_runs = false; - unsigned _file_reader_cpu_id; - shared_ptr _reconnectable_helper; -}; -} // namespace locator diff --git a/scylla/locator/local_strategy.cc b/scylla/locator/local_strategy.cc deleted file mode 100644 index 2abad0c..0000000 --- a/scylla/locator/local_strategy.cc +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include -#include "local_strategy.hh" -#include "utils/class_registrator.hh" -#include "utils/fb_utilities.hh" - - -namespace locator { - -local_strategy::local_strategy(const sstring& keyspace_name, token_metadata& token_metadata, snitch_ptr& snitch, const std::map& config_options) : - abstract_replication_strategy(keyspace_name, token_metadata, snitch, config_options, replication_strategy_type::local) {} - -std::vector local_strategy::get_natural_endpoints(const token& t) { - return calculate_natural_endpoints(t, _token_metadata); -} - -std::vector local_strategy::calculate_natural_endpoints(const token& t, token_metadata& tm) const { - return std::vector({utils::fb_utilities::get_broadcast_address()}); -} - -void local_strategy::validate_options() const { -} - -std::experimental::optional> local_strategy::recognized_options() const { - // LocalStrategy doesn't expect any options. - return {}; -} - -size_t local_strategy::get_replication_factor() const { - return 1; -} - -using registry = class_registrator&>; -static registry registrator("org.apache.cassandra.locator.LocalStrategy"); -static registry registrator_short_name("LocalStrategy"); - -} diff --git a/scylla/locator/local_strategy.hh b/scylla/locator/local_strategy.hh deleted file mode 100644 index ba9a251..0000000 --- a/scylla/locator/local_strategy.hh +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "abstract_replication_strategy.hh" - -#include -#include - -// forward declaration since database.hh includes this file -class keyspace; - -namespace locator { - -using inet_address = gms::inet_address; -using token = dht::token; - -class local_strategy : public abstract_replication_strategy { -protected: - virtual std::vector calculate_natural_endpoints(const token& search_token, token_metadata& tm) const override; -public: - local_strategy(const sstring& keyspace_name, token_metadata& token_metadata, snitch_ptr& snitch, const std::map& config_options); - virtual ~local_strategy() {}; - virtual size_t get_replication_factor() const; - /** - * We need to override this even if we override calculateNaturalEndpoints, - * because the default implementation depends on token calculations but - * LocalStrategy may be used before tokens are set up. - */ - std::vector get_natural_endpoints(const token& search_token) override; - - virtual void validate_options() const override; - - virtual std::experimental::optional> recognized_options() const override; -}; - -} diff --git a/scylla/locator/locator.cc b/scylla/locator/locator.cc deleted file mode 100644 index ed6b21a..0000000 --- a/scylla/locator/locator.cc +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -// Used to ensure that all .hh files build, as well as a place to put -// out-of-line implementations. - -#include "locator/simple_snitch.hh" -#include "locator/rack_inferring_snitch.hh" -#include "locator/gossiping_property_file_snitch.hh" diff --git a/scylla/locator/network_topology_strategy.cc b/scylla/locator/network_topology_strategy.cc deleted file mode 100644 index df41b2d..0000000 --- a/scylla/locator/network_topology_strategy.cc +++ /dev/null @@ -1,246 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "locator/network_topology_strategy.hh" -#include "utils/sequenced_set.hh" -#include - -namespace locator { - - -network_topology_strategy::network_topology_strategy( - const sstring& keyspace_name, - token_metadata& token_metadata, - snitch_ptr& snitch, - const std::map& config_options) : - abstract_replication_strategy(keyspace_name, - token_metadata, - snitch, - config_options, - replication_strategy_type::network_topology) { - for (auto& config_pair : config_options) { - auto& key = config_pair.first; - auto& val = config_pair.second; - - // - // FIXME!!! - // The first option we get at the moment is a class name. Skip it! - // - if (boost::iequals(key, "class")) { - continue; - } - - if (boost::iequals(key, "replication_factor")) { - throw exceptions::configuration_exception( - "replication_factor is an option for SimpleStrategy, not " - "NetworkTopologyStrategy"); - } - - _dc_rep_factor.emplace(key, std::stol(val)); - _datacenteres.push_back(key); - } - - _rep_factor = 0; - - for (auto& one_dc_rep_factor : _dc_rep_factor) { - _rep_factor += one_dc_rep_factor.second; - } - - debug("Configured datacenter replicas are:"); - for (auto& p : _dc_rep_factor) { - debug("{}: {}", p.first, p.second); - } -} - -std::vector -network_topology_strategy::calculate_natural_endpoints( - const token& search_token, token_metadata& tm) const { - // - // We want to preserve insertion order so that the first added endpoint - // becomes primary. - // - utils::sequenced_set replicas; - - // replicas we have found in each DC - std::unordered_map> dc_replicas; - // tracks the racks we have already placed replicas in - std::unordered_map> seen_racks; - // - // tracks the endpoints that we skipped over while looking for unique racks - // when we relax the rack uniqueness we can append this to the current - // result so we don't have to wind back the iterator - // - std::unordered_map> - skipped_dc_endpoints; - - // - // Populate the temporary data structures. - // - replicas.reserve(get_replication_factor()); - for (auto& dc_rep_factor_pair : _dc_rep_factor) { - auto& dc_name = dc_rep_factor_pair.first; - - dc_replicas[dc_name].reserve(dc_rep_factor_pair.second); - seen_racks[dc_name] = {}; - skipped_dc_endpoints[dc_name] = {}; - } - - topology& tp = tm.get_topology(); - - // - // all endpoints in each DC, so we can check when we have exhausted all - // the members of a DC - // - std::unordered_map>& - all_endpoints = tp.get_datacenter_endpoints(); - // - // all racks in a DC so we can check when we have exhausted all racks in a - // DC - // - std::unordered_map>>& - racks = tp.get_datacenter_racks(); - - // not aware of any cluster members - assert(!all_endpoints.empty() && !racks.empty()); - - for (auto& next : tm.ring_range(search_token)) { - - if (has_sufficient_replicas(dc_replicas, all_endpoints)) { - break; - } - - inet_address ep = *tm.get_endpoint(next); - sstring dc = _snitch->get_datacenter(ep); - - auto& seen_racks_dc_set = seen_racks[dc]; - auto& racks_dc_map = racks[dc]; - auto& skipped_dc_endpoints_set = skipped_dc_endpoints[dc]; - auto& dc_replicas_dc_set = dc_replicas[dc]; - - // have we already found all replicas for this dc? - if (_dc_rep_factor.find(dc) == _dc_rep_factor.end() || - has_sufficient_replicas(dc, dc_replicas, all_endpoints)) { - continue; - } - - // - // can we skip checking the rack? - namely, we've seen all racks in this - // DC already and may add this endpoint right away. - // - if (seen_racks_dc_set.size() == racks_dc_map.size()) { - dc_replicas_dc_set.insert(ep); - replicas.push_back(ep); - } else { - sstring rack = _snitch->get_rack(ep); - // is this a new rack? - we prefer to replicate on different racks - if (seen_racks_dc_set.find(rack) != seen_racks_dc_set.end()) { - skipped_dc_endpoints_set.push_back(ep); - } else { // this IS a new rack - dc_replicas_dc_set.insert(ep); - replicas.push_back(ep); - seen_racks_dc_set.insert(rack); - // - // if we've run out of distinct racks, add the hosts we skipped - // past already (up to RF) - // - if (seen_racks_dc_set.size() == racks_dc_map.size()) - { - auto skipped_it = skipped_dc_endpoints_set.begin(); - while (skipped_it != skipped_dc_endpoints_set.end() && - !has_sufficient_replicas(dc, dc_replicas, all_endpoints)) { - inet_address skipped = *skipped_it++; - dc_replicas_dc_set.insert(skipped); - replicas.push_back(skipped); - } - } - } - } - } - - return std::move(replicas.get_vector()); -} - -void network_topology_strategy::validate_options() const { - for (auto& c : _config_options) { - if (c.first == sstring("replication_factor")) { - throw exceptions::configuration_exception( - "replication_factor is an option for simple_strategy, not " - "network_topology_strategy"); - } - validate_replication_factor(c.second); - } -} - -std::experimental::optional> network_topology_strategy::recognized_options() const { - // We explicitely allow all options - return std::experimental::nullopt; -} - -inline bool network_topology_strategy::has_sufficient_replicas( - const sstring& dc, - std::unordered_map>& dc_replicas, - std::unordered_map>& all_endpoints) const { - - return dc_replicas[dc].size() >= - std::min(all_endpoints[dc].size(), get_replication_factor(dc)); -} - -inline bool network_topology_strategy::has_sufficient_replicas( - std::unordered_map>& dc_replicas, - std::unordered_map>& all_endpoints) const { - - for (auto& dc : get_datacenters()) { - if (!has_sufficient_replicas(dc, dc_replicas, all_endpoints)) { - return false; - } - } - - return true; -} - -using registry = class_registrator&>; -static registry registrator("org.apache.cassandra.locator.NetworkTopologyStrategy"); -static registry registrator_short_name("NetworkTopologyStrategy"); -} diff --git a/scylla/locator/network_topology_strategy.hh b/scylla/locator/network_topology_strategy.hh deleted file mode 100644 index d59c9a5..0000000 --- a/scylla/locator/network_topology_strategy.hh +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "locator/abstract_replication_strategy.hh" -#include "exceptions/exceptions.hh" - -#include -#include - -namespace locator { -class network_topology_strategy : public abstract_replication_strategy { -public: - network_topology_strategy( - const sstring& keyspace_name, - token_metadata& token_metadata, - snitch_ptr& snitch, - const std::map& config_options); - - virtual size_t get_replication_factor() const override { - return _rep_factor; - } - - size_t get_replication_factor(const sstring& dc) const { - auto dc_factor = _dc_rep_factor.find(dc); - return (dc_factor == _dc_rep_factor.end()) ? 0 : dc_factor->second; - } - - const std::vector& get_datacenters() const { - return _datacenteres; - } - -protected: - /** - * calculate endpoints in one pass through the tokens by tracking our - * progress in each DC, rack etc. - */ - virtual std::vector calculate_natural_endpoints( - const token& search_token, token_metadata& tm) const override; - - virtual void validate_options() const override; - - virtual std::experimental::optional> recognized_options() const override; - -private: - bool has_sufficient_replicas( - const sstring& dc, - std::unordered_map>& dc_replicas, - std::unordered_map>& all_endpoints) const; - - bool has_sufficient_replicas( - std::unordered_map>& dc_replicas, - std::unordered_map>& all_endpoints) const; - -private: - // map: data centers -> replication factor - std::unordered_map _dc_rep_factor; - - std::vector _datacenteres; - size_t _rep_factor; -}; -} // namespace locator diff --git a/scylla/locator/production_snitch_base.cc b/scylla/locator/production_snitch_base.cc deleted file mode 100644 index d00d15c..0000000 --- a/scylla/locator/production_snitch_base.cc +++ /dev/null @@ -1,59 +0,0 @@ -#include "locator/production_snitch_base.hh" - -namespace locator { -future<> production_snitch_base::load_property_file() { - return open_file_dma(_prop_file_name, open_flags::ro) - .then([this] (file f) { - return do_with(std::move(f), [this] (file& f) { - return f.size().then([this, &f] (size_t s) { - _prop_file_size = s; - - return f.dma_read_exactly(0, s); - }); - }).then([this] (temporary_buffer tb) { - _prop_file_contents = std::move(std::string(tb.get(), _prop_file_size)); - parse_property_file(); - - return make_ready_future<>(); - }); - }); -} - -void production_snitch_base::parse_property_file() { - using namespace boost::algorithm; - - std::string line; - std::istringstream istrm(_prop_file_contents); - std::vector split_line; - _prop_values.clear(); - - while (std::getline(istrm, line)) { - trim(line); - - // Skip comments or empty lines - if (!line.size() || line.at(0) == '#') { - continue; - } - - split_line.clear(); - split(split_line, line, is_any_of("=")); - - if (split_line.size() != 2) { - throw_bad_format(line); - } - - auto key = split_line[0]; trim(key); - auto val = split_line[1]; trim(val); - - if (val.empty() || !allowed_property_keys.count(key)) { - throw_bad_format(line); - } - - if (_prop_values.count(key)) { - throw_double_declaration(key); - } - - _prop_values[key] = val; - } -} -} // namespace locator diff --git a/scylla/locator/production_snitch_base.hh b/scylla/locator/production_snitch_base.hh deleted file mode 100644 index bc463a5..0000000 --- a/scylla/locator/production_snitch_base.hh +++ /dev/null @@ -1,206 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include -#include -#include -#include -#include - -#include "gms/endpoint_state.hh" -#include "gms/gossiper.hh" -#include "utils/fb_utilities.hh" -#include "locator/token_metadata.hh" -#include "db/system_keyspace.hh" -#include "db/config.hh" -#include "core/sstring.hh" -#include "snitch_base.hh" - -namespace locator { - -class bad_property_file_error : public std::exception {}; - -class production_snitch_base : public snitch_base { -public: - // map of inet address to (datacenter, rack) pair - typedef std::unordered_map addr2dc_rack_map; - - static constexpr const char* default_dc = "UNKNOWN_DC"; - static constexpr const char* default_rack = "UNKNOWN_RACK"; - static constexpr const char* snitch_properties_filename = "cassandra-rackdc.properties"; - - // only these property values are supported - static constexpr const char* dc_property_key = "dc"; - static constexpr const char* rack_property_key = "rack"; - static constexpr const char* prefer_local_property_key = "prefer_local"; - static constexpr const char* dc_suffix_property_key = "dc_suffix"; - const std::unordered_set allowed_property_keys; - - production_snitch_base(const sstring& prop_file_name = "") - : allowed_property_keys({ dc_property_key, - rack_property_key, - prefer_local_property_key, - dc_suffix_property_key }){ - if (!prop_file_name.empty()) { - _prop_file_name = prop_file_name; - } else { - using namespace boost::filesystem; - - path def_prop_file(db::config::get_conf_dir()); - def_prop_file /= path(snitch_properties_filename); - - _prop_file_name = def_prop_file.string(); - } - } - - virtual sstring get_rack(inet_address endpoint) { - if (endpoint == utils::fb_utilities::get_broadcast_address()) { - return _my_rack; - } - - return get_endpoint_info(endpoint, - gms::application_state::RACK, - default_rack); - } - - virtual sstring get_datacenter(inet_address endpoint) { - if (endpoint == utils::fb_utilities::get_broadcast_address()) { - return _my_dc; - } - - return get_endpoint_info(endpoint, - gms::application_state::DC, - default_dc); - } - - virtual void set_my_distributed(distributed* d) override { - _my_distributed = d; - } - - void reset_io_state() { - // - // Reset the promise to allow repeating - // start()+stop()/pause_io()+resume_io() call sequences. - // - _io_is_stopped = promise<>(); - } - -private: - sstring get_endpoint_info(inet_address endpoint, gms::application_state key, - const sstring& default_val) { - gms::gossiper& local_gossiper = gms::get_local_gossiper(); - auto state = local_gossiper.get_endpoint_state_for_endpoint(endpoint); - - // First, look in the gossiper::endpoint_state_map... - if (state) { - auto ep_state = state->get_application_state(key); - if (ep_state) { - return ep_state->value; - } - } - - // ...if not found - look in the SystemTable... - if (!_saved_endpoints) { - _saved_endpoints = db::system_keyspace::load_dc_rack_info(); - } - - auto it = _saved_endpoints->find(endpoint); - - if (it != _saved_endpoints->end()) { - if (key == gms::application_state::RACK) { - return it->second.rack; - } else { // gms::application_state::DC - return it->second.dc; - } - } - - // ...if still not found - return a default value - return default_val; - } - - virtual void set_my_dc(const sstring& new_dc) override { - _my_dc = new_dc; - } - - virtual void set_my_rack(const sstring& new_rack) override { - _my_rack = new_rack; - } - - virtual void set_prefer_local(bool prefer_local) override { - _prefer_local = prefer_local; - } - - void parse_property_file(); - -protected: - /** - * Loads the contents of the property file into the map - * - * @return ready future when the file contents has been loaded. - */ - future<> load_property_file(); - - void throw_double_declaration(const sstring& key) const { - logger().error("double \"{}\" declaration in {}", key, _prop_file_name); - throw bad_property_file_error(); - } - - void throw_bad_format(const sstring& line) const { - logger().error("Bad format in properties file {}: {}", _prop_file_name, line); - throw bad_property_file_error(); - } - - void throw_incomplete_file() const { - logger().error("Property file {} is incomplete. Some obligatory fields are missing.", _prop_file_name); - throw bad_property_file_error(); - } - -protected: - promise<> _io_is_stopped; - std::experimental::optional _saved_endpoints; - distributed* _my_distributed = nullptr; - std::string _prop_file_contents; - sstring _prop_file_name; - std::unordered_map _prop_values; - -private: - size_t _prop_file_size; -}; -} // namespace locator diff --git a/scylla/locator/rack_inferring_snitch.cc b/scylla/locator/rack_inferring_snitch.cc deleted file mode 100644 index c82a1e2..0000000 --- a/scylla/locator/rack_inferring_snitch.cc +++ /dev/null @@ -1,29 +0,0 @@ - -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "locator/rack_inferring_snitch.hh" - -namespace locator { -using registry = class_registrator; -static registry registrator1("org.apache.cassandra.locator.RackInferringSnitch"); -static registry registrator2("RackInferringSnitch"); -} diff --git a/scylla/locator/rack_inferring_snitch.hh b/scylla/locator/rack_inferring_snitch.hh deleted file mode 100644 index c842c62..0000000 --- a/scylla/locator/rack_inferring_snitch.hh +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "core/sstring.hh" -#include "gms/inet_address.hh" -#include "snitch_base.hh" -#include "utils/fb_utilities.hh" - -namespace locator { - -using inet_address = gms::inet_address; - -/** - * A simple endpoint snitch implementation that assumes datacenter and rack information is encoded - * in the 2nd and 3rd octets of the ip address, respectively. - */ -struct rack_inferring_snitch : public snitch_base { - rack_inferring_snitch() { - _my_dc = get_datacenter(utils::fb_utilities::get_broadcast_address()); - _my_rack = get_rack(utils::fb_utilities::get_broadcast_address()); - - // This snitch is ready on creation - set_snitch_ready(); - } - - virtual sstring get_rack(inet_address endpoint) override { - return std::to_string((endpoint.raw_addr() >> 8) & 0xFF); - } - - virtual sstring get_datacenter(inet_address endpoint) override { - return std::to_string((endpoint.raw_addr() >> 16) & 0xFF); - } - - virtual sstring get_name() const override { - return "org.apache.cassandra.locator.RackInferringSnitch"; - } -}; - -} // namespace locator diff --git a/scylla/locator/reconnectable_snitch_helper.hh b/scylla/locator/reconnectable_snitch_helper.hh deleted file mode 100644 index 989f3b6..0000000 --- a/scylla/locator/reconnectable_snitch_helper.hh +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "gms/i_endpoint_state_change_subscriber.hh" -#include "message/messaging_service.hh" -#include "locator/snitch_base.hh" - -namespace locator { - -// @note all callbacks should be called in seastar::async() context -class reconnectable_snitch_helper : public gms::i_endpoint_state_change_subscriber { -private: - static logging::logger& logger() { - static logging::logger _logger("reconnectable_snitch_helper"); - return _logger; - } - - sstring _local_dc; - -private: - - void reconnect(gms::inet_address public_address, gms::versioned_value local_address_value) { - reconnect(public_address, gms::inet_address(local_address_value.value)); - } - - void reconnect(gms::inet_address public_address, gms::inet_address local_address) { - auto& ms = netw::get_local_messaging_service(); - auto& sn_ptr = locator::i_endpoint_snitch::get_local_snitch_ptr(); - - if (sn_ptr->get_datacenter(public_address) == _local_dc && - ms.get_preferred_ip(public_address) != local_address) { - // - // First, store the local address in the system_table... - // - db::system_keyspace::update_preferred_ip(public_address, local_address).get(); - - // - // ...then update messaging_service cache and reset the currently - // open connections to this endpoint on all shards... - // - netw::get_messaging_service().invoke_on_all([public_address, local_address] (auto& local_ms) { - local_ms.cache_preferred_ip(public_address, local_address); - - netw::msg_addr id = { - .addr = public_address - }; - local_ms.remove_rpc_client(id); - }).get(); - - logger().debug("Initiated reconnect to an Internal IP {} for the {}", local_address, public_address); - } - } - -public: - reconnectable_snitch_helper(sstring local_dc) - : _local_dc(local_dc) {} - - void before_change(gms::inet_address endpoint, gms::endpoint_state cs, gms::application_state new_state_key, const gms::versioned_value& new_value) override { - // do nothing. - } - - void on_join(gms::inet_address endpoint, gms::endpoint_state ep_state) override { - auto internal_ip_state_opt = ep_state.get_application_state(gms::application_state::INTERNAL_IP); - - if (internal_ip_state_opt) { - reconnect(endpoint, *internal_ip_state_opt); - } - } - - void on_change(gms::inet_address endpoint, gms::application_state state, const gms::versioned_value& value) override { - if (state == gms::application_state::INTERNAL_IP) { - reconnect(endpoint, value); - } - } - - void on_alive(gms::inet_address endpoint, gms::endpoint_state ep_state) override { - auto internal_ip_state_opt = ep_state.get_application_state(gms::application_state::INTERNAL_IP); - - if (internal_ip_state_opt) { - reconnect(endpoint, *internal_ip_state_opt); - } - } - - void on_dead(gms::inet_address endpoint, gms::endpoint_state ep_state) override { - // do nothing. - } - - void on_remove(gms::inet_address endpoint) override { - // do nothing. - } - - void on_restart(gms::inet_address endpoint, gms::endpoint_state state) override { - // do nothing. - } -}; -} // namespace locator diff --git a/scylla/locator/simple_snitch.cc b/scylla/locator/simple_snitch.cc deleted file mode 100644 index f3ff38e..0000000 --- a/scylla/locator/simple_snitch.cc +++ /dev/null @@ -1,30 +0,0 @@ - -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "locator/simple_snitch.hh" -#include "utils/class_registrator.hh" - -namespace locator { -using registry = class_registrator; -static registry registrator1("org.apache.cassandra.locator.SimpleSnitch"); -static registry registrator2("SimpleSnitch"); -} diff --git a/scylla/locator/simple_snitch.hh b/scylla/locator/simple_snitch.hh deleted file mode 100644 index fa5cf20..0000000 --- a/scylla/locator/simple_snitch.hh +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once -#include "snitch_base.hh" -#include "utils/fb_utilities.hh" -#include - -namespace locator { - -/** - * A simple endpoint snitch implementation that treats Strategy order as - * proximity, allowing non-read-repaired reads to prefer a single endpoint, - * which improves cache locality. - */ -struct simple_snitch : public snitch_base { - simple_snitch() { - _my_dc = get_datacenter(utils::fb_utilities::get_broadcast_address()); - _my_rack = get_rack(utils::fb_utilities::get_broadcast_address()); - - // This snitch is ready on creation - set_snitch_ready(); - } - - virtual sstring get_rack(inet_address endpoint) override { - return "rack1"; - } - - virtual sstring get_datacenter(inet_address endpoint) override { - return "datacenter1"; - } - - virtual void sort_by_proximity( - inet_address address, std::vector& addresses) override { - // Optimization to avoid walking the list - } - - virtual int compare_endpoints(inet_address& target, inet_address& a1, - inet_address& a2) override { - // - // "Making all endpoints equal ensures we won't change the original - // ordering." - quote from C* code. - // - // Effectively this would return 0 even in the following case: - // - // compare_endpoints(NodeA, NodeA, NodeB) // -1 should be returned - // - // The snitch_base implementation would handle the above case correctly. - // - // I'm leaving the this implementation anyway since it's the C*'s - // implementation and some installations may depend on it. - // - return 0; - } - - virtual sstring get_name() const override { - return "org.apache.cassandra.locator.SimpleSnitch"; - } -}; - -} diff --git a/scylla/locator/simple_strategy.cc b/scylla/locator/simple_strategy.cc deleted file mode 100644 index 78c7709..0000000 --- a/scylla/locator/simple_strategy.cc +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include -#include "simple_strategy.hh" -#include "utils/class_registrator.hh" -#include -#include "utils/sequenced_set.hh" - -namespace locator { - -simple_strategy::simple_strategy(const sstring& keyspace_name, token_metadata& token_metadata, snitch_ptr& snitch, const std::map& config_options) : - abstract_replication_strategy(keyspace_name, token_metadata, snitch, config_options, replication_strategy_type::simple) { - for (auto& config_pair : config_options) { - auto& key = config_pair.first; - auto& val = config_pair.second; - - if (boost::iequals(key, "replication_factor")) { - _replication_factor = std::stol(val); - - break; - } - } -} - -std::vector simple_strategy::calculate_natural_endpoints(const token& t, token_metadata& tm) const { - const std::vector& tokens = tm.sorted_tokens(); - - if (tokens.empty()) { - return std::vector(); - } - - size_t replicas = get_replication_factor(); - utils::sequenced_set endpoints; - endpoints.reserve(replicas); - - for (auto& token : tm.ring_range(t)) { - auto ep = tm.get_endpoint(token); - assert(ep); - - endpoints.push_back(*ep); - if (endpoints.size() == replicas) { - break; - } - } - - return std::move(endpoints.get_vector()); -} - -size_t simple_strategy::get_replication_factor() const { - return _replication_factor; -} - -void simple_strategy::validate_options() const { - auto it = _config_options.find("replication_factor"); - if (it == _config_options.end()) { - throw exceptions::configuration_exception("SimpleStrategy requires a replication_factor strategy option."); - } - validate_replication_factor(it->second); -} - -std::experimental::optional>simple_strategy::recognized_options() const { - return {{ "replication_factor" }}; -} - -using registry = class_registrator&>; -static registry registrator("org.apache.cassandra.locator.SimpleStrategy"); -static registry registrator_short_name("SimpleStrategy"); - -} diff --git a/scylla/locator/simple_strategy.hh b/scylla/locator/simple_strategy.hh deleted file mode 100644 index 7d161bd..0000000 --- a/scylla/locator/simple_strategy.hh +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "abstract_replication_strategy.hh" - -#include -#include - -namespace locator { - -class simple_strategy : public abstract_replication_strategy { -protected: - virtual std::vector calculate_natural_endpoints(const token& search_token, token_metadata& tm) const override; -public: - simple_strategy(const sstring& keyspace_name, token_metadata& token_metadata, snitch_ptr& snitch, const std::map& config_options); - virtual ~simple_strategy() {}; - virtual size_t get_replication_factor() const override; - virtual void validate_options() const override; - virtual std::experimental::optional> recognized_options() const override; -private: - size_t _replication_factor = 1; -}; - -} diff --git a/scylla/locator/snitch_base.cc b/scylla/locator/snitch_base.cc deleted file mode 100644 index 773ab45..0000000 --- a/scylla/locator/snitch_base.cc +++ /dev/null @@ -1,147 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "locator/snitch_base.hh" - -namespace locator { - -std::vector snitch_base::get_sorted_list_by_proximity( - inet_address address, - std::unordered_set& unsorted_address) { - - std::vector - preferred(unsorted_address.begin(), unsorted_address.end()); - - sort_by_proximity(address, preferred); - return preferred; -} - -void snitch_base::sort_by_proximity( - inet_address address, std::vector& addresses) { - - std::sort(addresses.begin(), addresses.end(), - [this, &address](inet_address& a1, inet_address& a2) - { - return compare_endpoints(address, a1, a2) < 0; - }); -} - -int snitch_base::compare_endpoints( - inet_address& address, inet_address& a1, inet_address& a2) { - - // - // if one of the Nodes IS the Node we are comparing to and the other one - // IS NOT - then return the appropriate result. - // - if (address == a1 && address != a2) { - return -1; - } - - if (address == a2 && address != a1) { - return 1; - } - - // ...otherwise perform the similar check in regard to Data Center - sstring address_datacenter = get_datacenter(address); - sstring a1_datacenter = get_datacenter(a1); - sstring a2_datacenter = get_datacenter(a2); - - if (address_datacenter == a1_datacenter && - address_datacenter != a2_datacenter) { - return -1; - } else if (address_datacenter == a2_datacenter && - address_datacenter != a1_datacenter) { - return 1; - } else if (address_datacenter == a2_datacenter && - address_datacenter == a1_datacenter) { - // - // ...otherwise (in case Nodes belong to the same Data Center) check - // the racks they belong to. - // - sstring address_rack = get_rack(address); - sstring a1_rack = get_rack(a1); - sstring a2_rack = get_rack(a2); - - if (address_rack == a1_rack && address_rack != a2_rack) { - return -1; - } - - if (address_rack == a2_rack && address_rack != a1_rack) { - return 1; - } - } - // - // We don't differentiate between Nodes if all Nodes belong to different - // Data Centers, thus make them equal. - // - return 0; -} - -bool snitch_base::is_worth_merging_for_range_query( - std::vector& merged, - std::vector& l1, - std::vector& l2) { - // - // Querying remote DC is likely to be an order of magnitude slower than - // querying locally, so 2 queries to local nodes is likely to still be - // faster than 1 query involving remote ones - // - bool merged_has_remote = has_remote_node(merged); - return merged_has_remote - ? (has_remote_node(l1) || has_remote_node(l2)) - : true; -} - -bool snitch_base::has_remote_node(std::vector& l) { - for (auto&& ep : l) { - if (_my_dc != get_datacenter(ep)) { - return true; - } - } - - return false; -} - -future<> i_endpoint_snitch::stop_snitch() { - // First stop the instance on a CPU where I/O is running - return snitch_instance().invoke_on(io_cpu_id(), [] (snitch_ptr& s) { - return s->stop(); - }).then([] { return snitch_instance().stop(); }); -} - -} // namespace locator diff --git a/scylla/locator/snitch_base.hh b/scylla/locator/snitch_base.hh deleted file mode 100644 index bcd8bf0..0000000 --- a/scylla/locator/snitch_base.hh +++ /dev/null @@ -1,437 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include -#include - -#include "gms/inet_address.hh" -#include "core/shared_ptr.hh" -#include "core/thread.hh" -#include "core/distributed.hh" -#include "utils/class_registrator.hh" - -namespace locator { - -struct snitch_ptr; - -typedef gms::inet_address inet_address; - -struct i_endpoint_snitch { -private: - template - static future<> init_snitch_obj( - distributed& snitch_obj, const sstring& snitch_name, A&&... a); -public: - template - static future<> create_snitch(const sstring& snitch_name, A&&... a); - - template - static future<> reset_snitch(const sstring& snitch_name, A&&... a); - - static future<> stop_snitch(); - - /** - * returns a String representing the rack this endpoint belongs to - */ - virtual sstring get_rack(inet_address endpoint) = 0; - - /** - * returns a String representing the datacenter this endpoint belongs to - */ - virtual sstring get_datacenter(inet_address endpoint) = 0; - - /** - * returns a new List sorted by proximity to the given endpoint - */ - virtual std::vector get_sorted_list_by_proximity( - inet_address address, - std::unordered_set& unsorted_address) = 0; - - /** - * This method will sort the List by proximity to the given - * address. - */ - virtual void sort_by_proximity( - inet_address address, std::vector& addresses) = 0; - - /** - * compares two endpoints in relation to the target endpoint, returning as - * Comparator.compare would - */ - virtual int compare_endpoints( - inet_address& target, inet_address& a1, inet_address& a2) = 0; - - /** - * called after Gossiper instance exists immediately before it starts - * gossiping - */ - virtual future<> gossiper_starting() { - _gossip_started = true; - return make_ready_future<>(); - } - - /** - * Returns whether for a range query doing a query against merged is likely - * to be faster than 2 sequential queries, one against l1 followed by one - * against l2. - */ - virtual bool is_worth_merging_for_range_query( - std::vector& merged, - std::vector& l1, - std::vector& l2) = 0; - - virtual ~i_endpoint_snitch() { assert(_state == snitch_state::stopped); }; - - // noop by default - virtual future<> stop() { - _state = snitch_state::stopped; - return make_ready_future<>(); - } - - // noop by default - virtual future<> pause_io() { - _state = snitch_state::io_paused; - return make_ready_future<>(); - }; - - // noop by default - virtual void resume_io() { - _state = snitch_state::running; - }; - - // noop by default - virtual future<> start() { - _state = snitch_state::running; - return make_ready_future<>(); - } - - // noop by default - virtual void set_my_dc(const sstring& new_dc) {}; - virtual void set_my_rack(const sstring& new_rack) {}; - virtual void set_prefer_local(bool prefer_local) {}; - virtual void set_local_private_addr(const sstring& addr_str) {}; - - static distributed& snitch_instance() { - // FIXME: leaked intentionally to avoid shutdown problems, see #293 - static distributed* snitch_inst = new distributed(); - - return *snitch_inst; - } - - static snitch_ptr& get_local_snitch_ptr() { - return snitch_instance().local(); - } - - void set_snitch_ready() { - _state = snitch_state::running; - } - - virtual sstring get_name() const = 0; - - // should be called for production snitches before calling start() - virtual void set_my_distributed(distributed* d) { - //noop by default - } - - bool local_gossiper_started() { - return _gossip_started; - } - - virtual void reload_gossiper_state() { - // noop by default - } - -protected: - static logging::logger& logger() { - static logging::logger snitch_logger("snitch_logger"); - return snitch_logger; - } - - static unsigned& io_cpu_id() { - static unsigned id = 0; - return id; - } - -protected: - enum class snitch_state { - initializing, - running, - io_pausing, - io_paused, - stopping, - stopped - } _state = snitch_state::initializing; - bool _gossip_started = false; -}; - -struct snitch_ptr { - typedef std::unique_ptr ptr_type; - future<> stop() { - if (_ptr) { - return _ptr->stop(); - } else { - return make_ready_future<>(); - } - } - - future<> start() { - if (_ptr) { - return _ptr->start(); - } else { - return make_ready_future<>(); - } - } - - i_endpoint_snitch* operator->() { - return _ptr.get(); - } - - snitch_ptr& operator=(ptr_type&& new_val) { - _ptr = std::move(new_val); - - return *this; - } - - snitch_ptr& operator=(snitch_ptr&& new_val) { - _ptr = std::move(new_val._ptr); - - return *this; - } - - operator bool() const { - return _ptr ? true : false; - } - -private: - ptr_type _ptr; -}; - -/** - * Initializes the distributed object - * - * @note The local snitch objects will remain not start()ed. - * - * @param snitch_obj distributed<> object to initialize - * @param snitch_name name of the snitch class to create - * @param a snitch constructor arguments - * - * @return ready future when the snitch has been successfully created - */ -template -future<> i_endpoint_snitch::init_snitch_obj( - distributed& snitch_obj, const sstring& snitch_name, A&&... a) { - - // First, create the snitch_ptr objects... - return snitch_obj.start().then( - [&snitch_obj, snitch_name = std::move(snitch_name), a = std::make_tuple(std::forward(a)...)] () { - // ...then, create the snitches... - return snitch_obj.invoke_on_all( - [snitch_name, a, &snitch_obj] (snitch_ptr& local_inst) { - try { - auto s(std::move(apply([snitch_name] (A&&... a) { - return create_object(snitch_name, std::forward(a)...); - }, std::move(a)))); - - s->set_my_distributed(&snitch_obj); - local_inst = std::move(s); - } catch (no_such_class& e) { - logger().error("Can't create snitch {}: not supported", snitch_name); - throw; - } catch (...) { - throw; - } - - return make_ready_future<>(); - }); - }); -} -/** - * Creates the distributed i_endpoint_snitch::snitch_instane object - * - * @param snitch_name name of the snitch class (comes from the cassandra.yaml) - * - * @return ready future when the distributed object is ready. - */ -template -future<> i_endpoint_snitch::create_snitch( - const sstring& snitch_name, A&&... a) { - - // First, create and "start" the distributed snitch object... - return init_snitch_obj(snitch_instance(), snitch_name, std::forward(a)...).then([] { - // ...and then start each local snitch. - return snitch_instance().invoke_on_all([] (snitch_ptr& local_inst) { - return local_inst.start(); - }).then_wrapped([] (auto&& f) { - try { - f.get(); - return make_ready_future<>(); - } catch (...) { - auto eptr = std::current_exception(); - - return stop_snitch().then([eptr] () { - std::rethrow_exception(eptr); - }); - } - }); - }); -} - -/** - * Resets the global snitch instance with the new value - * - * @param snitch_name Name of a new snitch - * @param A optional parameters for a new snitch constructor - * - * @return ready future when the transition is complete - * - * The flow goes as follows: - * 1) Create a new distributed and initialize it with the new - * snitch. - * 2) Start the new snitches above - this will initialize the snitches objects - * and will make them ready to be used. - * 3) Stop() the current global per-shard snitch objects. - * 4) Pause the per-shard snitch objects from (1) - this will stop the async - * I/O parts of the snitches if any. - * 5) Assign the per-shard snitch_ptr's from new distributed from (1) to the - * global one and update the distributed<> pointer in the new snitch - * instances. - * 6) Start the new snitches. - * 7) Stop() the temporary distributed from (1). - */ -template -future<> i_endpoint_snitch::reset_snitch( - const sstring& snitch_name, A&&... a) { - return seastar::async( - [snitch_name, a = std::make_tuple(std::forward(a)...)] { - - // (1) create a new snitch - distributed tmp_snitch; - try { - apply([snitch_name,&tmp_snitch](A&& ... a) { - return init_snitch_obj(tmp_snitch, snitch_name, std::forward(a)...); - }, std::move(a)).get(); - - // (2) start the local instances of the new snitch - tmp_snitch.invoke_on_all([] (snitch_ptr& local_inst) { - return local_inst.start(); - }).get(); - } catch (...) { - tmp_snitch.stop().get(); - throw; - } - - // If we've got here then we may not fail - - // (3) stop the current snitch instances on all CPUs - snitch_instance().invoke_on(io_cpu_id(), [] (snitch_ptr& s) { - // stop the instance on an I/O CPU first - return s->stop(); - }).get(); - snitch_instance().invoke_on_all([] (snitch_ptr& s) { - return s->stop(); - }).get(); - - // - // (4) If we've got here - the new snitch has been successfully created - // and initialized. We may pause its I/O it now and start moving - // pointers... - // - tmp_snitch.invoke_on(io_cpu_id(), [] (snitch_ptr& local_inst) { - // pause the instance on an I/O CPU first - return local_inst->pause_io(); - }).get(); - tmp_snitch.invoke_on_all([] (snitch_ptr& local_inst) { - return local_inst->pause_io(); - }).get(); - - // - // (5) move the pointers - this would ensure the atomicity on a - // per-shard level (since users are holding snitch_ptr objects only) - // - tmp_snitch.invoke_on_all([] (snitch_ptr& local_inst) { - local_inst->set_my_distributed(&snitch_instance()); - snitch_instance().local() = std::move(local_inst); - - return make_ready_future<>(); - }).get(); - - // (6) re-start I/O on the new snitches - snitch_instance().invoke_on_all([] (snitch_ptr& local_inst) { - local_inst->resume_io(); - }).get(); - - // (7) stop the temporary from (1) - tmp_snitch.stop().get(); - }); -} - -class snitch_base : public i_endpoint_snitch { -public: - // - // Sons have to implement: - // virtual sstring get_rack(inet_address endpoint) = 0; - // virtual sstring get_datacenter(inet_address endpoint) = 0; - // - - virtual std::vector get_sorted_list_by_proximity( - inet_address address, - std::unordered_set& unsorted_address) override; - - virtual void sort_by_proximity( - inet_address address, std::vector& addresses) override; - - virtual int compare_endpoints( - inet_address& address, inet_address& a1, inet_address& a2) override; - - virtual bool is_worth_merging_for_range_query( - std::vector& merged, - std::vector& l1, - std::vector& l2) override; - -private: - bool has_remote_node(std::vector& l); - -protected: - sstring _my_dc; - sstring _my_rack; - bool _prefer_local = false; -}; - -} // namespace locator diff --git a/scylla/locator/token_metadata.cc b/scylla/locator/token_metadata.cc deleted file mode 100644 index 1df0d15..0000000 --- a/scylla/locator/token_metadata.cc +++ /dev/null @@ -1,672 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "utils/UUID.hh" -#include "token_metadata.hh" -#include -#include "locator/snitch_base.hh" -#include "locator/abstract_replication_strategy.hh" -#include "log.hh" -#include "stdx.hh" -#include "partition_range_compat.hh" -#include -#include -#include -#include - -namespace locator { - -static logging::logger tlogger("token_metadata"); - -template -static void remove_by_value(C& container, V value) { - for (auto it = container.begin(); it != container.end();) { - if (it->second == value) { - it = container.erase(it); - } else { - it++; - } - } -} - -token_metadata::token_metadata(std::map token_to_endpoint_map, std::unordered_map endpoints_map, topology topology) : - _token_to_endpoint_map(token_to_endpoint_map), _endpoint_to_host_id_map(endpoints_map), _topology(topology) { - _sorted_tokens = sort_tokens(); -} - -std::vector token_metadata::sort_tokens() { - std::vector sorted; - sorted.reserve(_token_to_endpoint_map.size()); - - for (auto&& i : _token_to_endpoint_map) { - sorted.push_back(i.first); - } - - return sorted; -} - -const std::vector& token_metadata::sorted_tokens() const { - return _sorted_tokens; -} - -std::vector token_metadata::get_tokens(const inet_address& addr) const { - std::vector res; - for (auto&& i : _token_to_endpoint_map) { - if (i.second == addr) { - res.push_back(i.first); - } - } - return res; -} -/** - * Update token map with a single token/endpoint pair in normal state. - */ -void token_metadata::update_normal_token(token t, inet_address endpoint) -{ - update_normal_tokens(std::unordered_set({t}), endpoint); -} - -void token_metadata::update_normal_tokens(std::unordered_set tokens, inet_address endpoint) { - if (tokens.empty()) { - return; - } - std::unordered_map> endpoint_tokens ({{endpoint, tokens}}); - update_normal_tokens(endpoint_tokens); -} - -/** - * Update token map with a set of token/endpoint pairs in normal state. - * - * Prefer this whenever there are multiple pairs to update, as each update (whether a single or multiple) - * is expensive (CASSANDRA-3831). - * - * @param endpointTokens - */ -void token_metadata::update_normal_tokens(std::unordered_map>& endpoint_tokens) { - if (endpoint_tokens.empty()) { - return; - } - - bool should_sort_tokens = false; - for (auto&& i : endpoint_tokens) { - inet_address endpoint = i.first; - std::unordered_set& tokens = i.second; - - assert(!tokens.empty()); - - for(auto it = _token_to_endpoint_map.begin(), ite = _token_to_endpoint_map.end(); it != ite;) { - if(it->second == endpoint) { - it = _token_to_endpoint_map.erase(it); - } else { - ++it; - } - } - - _topology.add_endpoint(endpoint); - remove_by_value(_bootstrap_tokens, endpoint); - _leaving_endpoints.erase(endpoint); - remove_from_moving(endpoint); // also removing this endpoint from moving - for (const token& t : tokens) - { - auto prev = _token_to_endpoint_map.insert(std::pair(t, endpoint)); - should_sort_tokens |= prev.second; // new token inserted -> sort - if (prev.first->second != endpoint) { - tlogger.warn("Token {} changing ownership from {} to {}", t, prev.first->second, endpoint); - prev.first->second = endpoint; - } - } - } - - if (should_sort_tokens) { - _sorted_tokens = sort_tokens(); - } -} - -size_t token_metadata::first_token_index(const token& start) const { - assert(_sorted_tokens.size() > 0); - auto it = std::lower_bound(_sorted_tokens.begin(), _sorted_tokens.end(), start); - if (it == _sorted_tokens.end()) { - return 0; - } else { - return std::distance(_sorted_tokens.begin(), it); - } -} - -const token& token_metadata::first_token(const token& start) const { - return _sorted_tokens[first_token_index(start)]; -} - -std::experimental::optional token_metadata::get_endpoint(const token& token) const { - auto it = _token_to_endpoint_map.find(token); - if (it == _token_to_endpoint_map.end()) { - return std::experimental::nullopt; - } else { - return it->second; - } -} - -void token_metadata::debug_show() { - auto reporter = std::make_shared>(); - reporter->set_callback ([reporter, this] { - print("Endpoint -> Token\n"); - for (auto x : _token_to_endpoint_map) { - print("inet_address=%s, token=%s\n", x.second, x.first); - } - print("Endpoint -> UUID\n"); - for (auto x : _endpoint_to_host_id_map) { - print("inet_address=%s, uuid=%s\n", x.first, x.second); - } - print("Sorted Token\n"); - for (auto x : _sorted_tokens) { - print("token=%s\n", x); - } - }); - reporter->arm_periodic(std::chrono::seconds(1)); -} - -void token_metadata::update_host_id(const UUID& host_id, inet_address endpoint) { -#if 0 - assert host_id != null; - assert endpoint != null; - - InetAddress storedEp = _endpoint_to_host_id_map.inverse().get(host_id); - if (storedEp != null) { - if (!storedEp.equals(endpoint) && (FailureDetector.instance.isAlive(storedEp))) { - throw new RuntimeException(String.format("Host ID collision between active endpoint %s and %s (id=%s)", - storedEp, - endpoint, - host_id)); - } - } - - UUID storedId = _endpoint_to_host_id_map.get(endpoint); - // if ((storedId != null) && (!storedId.equals(host_id))) - tlogger.warn("Changing {}'s host ID from {} to {}", endpoint, storedId, host_id); -#endif - _endpoint_to_host_id_map[endpoint] = host_id; -} - -utils::UUID token_metadata::get_host_id(inet_address endpoint) { - if (!_endpoint_to_host_id_map.count(endpoint)) { - throw std::runtime_error(sprint("host_id for endpoint %s is not found", endpoint)); - } - return _endpoint_to_host_id_map.at(endpoint); -} - -std::experimental::optional token_metadata::get_endpoint_for_host_id(UUID host_id) { - auto beg = _endpoint_to_host_id_map.cbegin(); - auto end = _endpoint_to_host_id_map.cend(); - auto it = std::find_if(beg, end, [host_id] (auto x) { - return x.second == host_id; - }); - if (it == end) { - return {}; - } else { - return (*it).first; - } -} - -const std::unordered_map& token_metadata::get_endpoint_to_host_id_map_for_reading() const{ - return _endpoint_to_host_id_map; -} - -bool token_metadata::is_member(inet_address endpoint) { - auto beg = _token_to_endpoint_map.cbegin(); - auto end = _token_to_endpoint_map.cend(); - return end != std::find_if(beg, end, [endpoint] (const auto& x) { - return x.second == endpoint; - }); -} - -void token_metadata::add_bootstrap_token(token t, inet_address endpoint) { - std::unordered_set tokens{t}; - add_bootstrap_tokens(tokens, endpoint); -} - -boost::iterator_range -token_metadata::ring_range( - const std::experimental::optional& start, - bool include_min) const -{ - auto r = ring_range(start ? start->value().token() : dht::minimum_token(), include_min); - - if (!r.empty()) { - // We should skip the first token if it's excluded by the range. - if (start - && !start->is_inclusive() - && !start->value().has_key() - && start->value().token() == *r.begin()) - { - r.pop_front(); - } - } - - return r; -} - -void token_metadata::add_bootstrap_tokens(std::unordered_set tokens, inet_address endpoint) { - for (auto t : tokens) { - auto old_endpoint = _bootstrap_tokens.find(t); - if (old_endpoint != _bootstrap_tokens.end() && (*old_endpoint).second != endpoint) { - auto msg = sprint("Bootstrap Token collision between %s and %s (token %s", (*old_endpoint).second, endpoint, t); - throw std::runtime_error(msg); - } - - auto old_endpoint2 = _token_to_endpoint_map.find(t); - if (old_endpoint2 != _token_to_endpoint_map.end() && (*old_endpoint2).second != endpoint) { - auto msg = sprint("Bootstrap Token collision between %s and %s (token %s", (*old_endpoint2).second, endpoint, t); - throw std::runtime_error(msg); - } - } - - // Unfortunately, std::remove_if does not work with std::map - for (auto it = _bootstrap_tokens.begin(); it != _bootstrap_tokens.end();) { - if ((*it).second == endpoint) { - it = _bootstrap_tokens.erase(it); - } else { - it++; - } - } - - for (auto t : tokens) { - _bootstrap_tokens[t] = endpoint; - } -} - -void token_metadata::remove_bootstrap_tokens(std::unordered_set tokens) { - assert(!tokens.empty()); - for (auto t : tokens) { - _bootstrap_tokens.erase(t); - } -} - -bool token_metadata::is_leaving(inet_address endpoint) { - return _leaving_endpoints.count(endpoint); -} - -void token_metadata::remove_endpoint(inet_address endpoint) { - remove_by_value(_bootstrap_tokens, endpoint); - remove_by_value(_token_to_endpoint_map, endpoint); - _topology.remove_endpoint(endpoint); - _leaving_endpoints.erase(endpoint); - _endpoint_to_host_id_map.erase(endpoint); - _sorted_tokens = sort_tokens(); - invalidate_cached_rings(); -} - -void token_metadata::remove_from_moving(inet_address endpoint) { - remove_by_value(_moving_endpoints, endpoint); - invalidate_cached_rings(); -} - -token token_metadata::get_predecessor(token t) { - auto& tokens = sorted_tokens(); - auto it = std::lower_bound(tokens.begin(), tokens.end(), t); - assert(it != tokens.end() && *it == t); - if (it == tokens.begin()) { - // If the token is the first element, its preprocessor is the last element - return tokens.back(); - } else { - return *(--it); - } -} - -dht::token_range_vector token_metadata::get_primary_ranges_for(std::unordered_set tokens) { - dht::token_range_vector ranges; - ranges.reserve(tokens.size() + 1); // one of the ranges will wrap - for (auto right : tokens) { - auto left = get_predecessor(right); - compat::unwrap_into( - wrapping_range(range_bound(left, false), range_bound(right)), - dht::token_comparator(), - [&] (auto&& rng) { ranges.push_back(std::move(rng)); }); - } - return ranges; -} - -dht::token_range_vector token_metadata::get_primary_ranges_for(token right) { - return get_primary_ranges_for(std::unordered_set{right}); -} - -boost::icl::interval::interval_type -token_metadata::range_to_interval(range r) { - bool start_inclusive = false; - bool end_inclusive = false; - token start = dht::minimum_token(); - token end = dht::maximum_token(); - - if (r.start()) { - start = r.start()->value(); - start_inclusive = r.start()->is_inclusive(); - } - - if (r.end()) { - end = r.end()->value(); - end_inclusive = r.end()->is_inclusive(); - } - - if (start_inclusive == false && end_inclusive == false) { - return boost::icl::interval::open(std::move(start), std::move(end)); - } else if (start_inclusive == false && end_inclusive == true) { - return boost::icl::interval::left_open(std::move(start), std::move(end)); - } else if (start_inclusive == true && end_inclusive == false) { - return boost::icl::interval::right_open(std::move(start), std::move(end)); - } else { - return boost::icl::interval::closed(std::move(start), std::move(end)); - } -} - -range -token_metadata::interval_to_range(boost::icl::interval::interval_type i) { - bool start_inclusive; - bool end_inclusive; - auto bounds = i.bounds().bits(); - if (bounds == boost::icl::interval_bounds::static_open) { - start_inclusive = false; - end_inclusive = false; - } else if (bounds == boost::icl::interval_bounds::static_left_open) { - start_inclusive = false; - end_inclusive = true; - } else if (bounds == boost::icl::interval_bounds::static_right_open) { - start_inclusive = true; - end_inclusive = false; - } else if (bounds == boost::icl::interval_bounds::static_closed) { - start_inclusive = true; - end_inclusive = true; - } else { - throw std::runtime_error("Invalid boost::icl::interval bounds"); - } - return range({{i.lower(), start_inclusive}}, {{i.upper(), end_inclusive}}); -} - -void token_metadata::set_pending_ranges(const sstring& keyspace_name, - std::unordered_multimap, inet_address> new_pending_ranges) { - if (new_pending_ranges.empty()) { - _pending_ranges.erase(keyspace_name); - _pending_ranges_map.erase(keyspace_name); - _pending_ranges_interval_map.erase(keyspace_name); - return; - } - std::unordered_map, std::unordered_set> map; - for (const auto& x : new_pending_ranges) { - map[x.first].emplace(x.second); - } - - // construct a interval map to speed up the search - _pending_ranges_interval_map[keyspace_name] = {}; - for (const auto& m : map) { - _pending_ranges_interval_map[keyspace_name] += - std::make_pair(range_to_interval(m.first), m.second); - } - _pending_ranges[keyspace_name] = std::move(new_pending_ranges); - _pending_ranges_map[keyspace_name] = std::move(map); -} - -std::unordered_multimap, inet_address>& -token_metadata::get_pending_ranges_mm(sstring keyspace_name) { - return _pending_ranges[keyspace_name]; -} - -const std::unordered_map, std::unordered_set>& -token_metadata::get_pending_ranges(sstring keyspace_name) { - return _pending_ranges_map[keyspace_name]; -} - -std::vector> -token_metadata::get_pending_ranges(sstring keyspace_name, inet_address endpoint) { - std::vector> ret; - for (auto x : get_pending_ranges_mm(keyspace_name)) { - auto& range_token = x.first; - auto& ep = x.second; - if (ep == endpoint) { - ret.push_back(range_token); - } - } - return ret; -} - -void token_metadata::calculate_pending_ranges(abstract_replication_strategy& strategy, const sstring& keyspace_name) { - std::unordered_multimap, inet_address> new_pending_ranges; - - if (_bootstrap_tokens.empty() && _leaving_endpoints.empty() && _moving_endpoints.empty()) { - tlogger.debug("No bootstrapping, leaving or moving nodes -> empty pending ranges for {}", keyspace_name); - set_pending_ranges(keyspace_name, std::move(new_pending_ranges)); - return; - } - - std::unordered_multimap address_ranges = strategy.get_address_ranges(*this); - - // FIMXE - // Copy of metadata reflecting the situation after all leave operations are finished. - auto all_left_metadata = clone_after_all_left(); - - // get all ranges that will be affected by leaving nodes - std::unordered_set> affected_ranges; - for (auto endpoint : _leaving_endpoints) { - auto r = address_ranges.equal_range(endpoint); - for (auto x = r.first; x != r.second; x++) { - affected_ranges.emplace(x->second); - } - } - // for each of those ranges, find what new nodes will be responsible for the range when - // all leaving nodes are gone. - auto metadata = clone_only_token_map(); // don't do this in the loop! #7758 - for (const auto& r : affected_ranges) { - auto t = r.end() ? r.end()->value() : dht::maximum_token(); - auto current_endpoints = strategy.calculate_natural_endpoints(t, metadata); - auto new_endpoints = strategy.calculate_natural_endpoints(t, all_left_metadata); - std::vector diff; - std::sort(current_endpoints.begin(), current_endpoints.end()); - std::sort(new_endpoints.begin(), new_endpoints.end()); - std::set_difference(new_endpoints.begin(), new_endpoints.end(), - current_endpoints.begin(), current_endpoints.end(), std::back_inserter(diff)); - for (auto& ep : diff) { - new_pending_ranges.emplace(r, ep); - } - } - - // At this stage newPendingRanges has been updated according to leave operations. We can - // now continue the calculation by checking bootstrapping nodes. - - // For each of the bootstrapping nodes, simply add and remove them one by one to - // allLeftMetadata and check in between what their ranges would be. - std::unordered_multimap bootstrap_addresses; - for (auto& x : _bootstrap_tokens) { - bootstrap_addresses.emplace(x.second, x.first); - } - - // TODO: share code with unordered_multimap_to_unordered_map - std::unordered_map> tmp; - for (auto& x : bootstrap_addresses) { - auto& addr = x.first; - auto& t = x.second; - tmp[addr].insert(t); - } - for (auto& x : tmp) { - auto& endpoint = x.first; - auto& tokens = x.second; - all_left_metadata.update_normal_tokens(tokens, endpoint); - for (auto& x : strategy.get_address_ranges(all_left_metadata)) { - if (x.first == endpoint) { - new_pending_ranges.emplace(x.second, endpoint); - } - } - all_left_metadata.remove_endpoint(endpoint); - } - - // At this stage newPendingRanges has been updated according to leaving and bootstrapping nodes. - // We can now finish the calculation by checking moving nodes. - - // For each of the moving nodes, we do the same thing we did for bootstrapping: - // simply add and remove them one by one to allLeftMetadata and check in between what their ranges would be. - for (auto& moving : _moving_endpoints) { - auto& t = moving.first; - auto& endpoint = moving.second; // address of the moving node - - // moving.left is a new token of the endpoint - all_left_metadata.update_normal_token(t, endpoint); - - for (auto& x : strategy.get_address_ranges(all_left_metadata)) { - if (x.first == endpoint) { - new_pending_ranges.emplace(x.second, endpoint); - } - } - - all_left_metadata.remove_endpoint(endpoint); - } - - set_pending_ranges(keyspace_name, std::move(new_pending_ranges)); - - if (tlogger.is_enabled(logging::log_level::debug)) { - tlogger.debug("Pending ranges: {}", (_pending_ranges.empty() ? "" : print_pending_ranges())); - } -} -sstring token_metadata::print_pending_ranges() { - std::stringstream ss; - - for (auto& x : _pending_ranges) { - auto& keyspace_name = x.first; - ss << "\nkeyspace_name = " << keyspace_name << " {\n"; - for (auto& m : x.second) { - ss << m.second << " : " << m.first << "\n"; - } - ss << "}\n"; - } - - return sstring(ss.str()); -} - -void token_metadata::add_leaving_endpoint(inet_address endpoint) { - _leaving_endpoints.emplace(endpoint); -} - -token_metadata token_metadata::clone_after_all_settled() { - token_metadata metadata = clone_only_token_map(); - - for (auto endpoint : _leaving_endpoints) { - metadata.remove_endpoint(endpoint); - } - - - for (auto x : _moving_endpoints) { - metadata.update_normal_token(x.first, x.second); - } - - return metadata; -} - -void token_metadata::add_moving_endpoint(token t, inet_address endpoint) { - _moving_endpoints[t] = endpoint; -} - -std::vector token_metadata::pending_endpoints_for(const token& token, const sstring& keyspace_name) { - // Fast path 0: no pending ranges at all - if (_pending_ranges_interval_map.empty()) { - return {}; - } - - // Fast path 1: no pending ranges for this keyspace_name - if (_pending_ranges_interval_map[keyspace_name].empty()) { - return {}; - } - - // Slow path: lookup pending ranges - std::vector endpoints; - auto interval = range_to_interval(range(token)); - auto it = _pending_ranges_interval_map[keyspace_name].find(interval); - if (it != _pending_ranges_interval_map[keyspace_name].end()) { - // interval_map does not work with std::vector, convert to std::vector of ips - endpoints = std::vector(it->second.begin(), it->second.end()); - } - return endpoints; -} - -std::map token_metadata::get_normal_and_bootstrapping_token_to_endpoint_map() { - std::map ret(_token_to_endpoint_map.begin(), _token_to_endpoint_map.end()); - ret.insert(_bootstrap_tokens.begin(), _bootstrap_tokens.end()); - return ret; -} - -std::multimap token_metadata::get_endpoint_to_token_map_for_reading() { - std::multimap cloned; - for (const auto& x : _token_to_endpoint_map) { - cloned.emplace(x.second, x.first); - } - return cloned; -} - - -/////////////////// class topology ///////////////////////////////////////////// -inline void topology::clear() { - _dc_endpoints.clear(); - _dc_racks.clear(); - _current_locations.clear(); -} - -topology::topology(const topology& other) { - _dc_endpoints = other._dc_endpoints; - _dc_racks = other._dc_racks; - _current_locations = other._current_locations; -} - -void topology::add_endpoint(const inet_address& ep) -{ - auto& snitch = i_endpoint_snitch::get_local_snitch_ptr(); - sstring dc = snitch->get_datacenter(ep); - sstring rack = snitch->get_rack(ep); - auto current = _current_locations.find(ep); - - if (current != _current_locations.end()) { - if (current->second.dc == dc && current->second.rack == rack) { - return; - } - - _dc_racks[current->second.dc][current->second.rack].erase(ep); - _dc_endpoints[current->second.dc].erase(ep); - } - - _dc_endpoints[dc].insert(ep); - _dc_racks[dc][rack].insert(ep); - _current_locations[ep] = {dc, rack}; -} - -void topology::update_endpoint(inet_address ep) { - if (!_current_locations.count(ep) || !locator::i_endpoint_snitch::snitch_instance().local_is_initialized()) { - return; - } - - add_endpoint(ep); -} - -void topology::remove_endpoint(inet_address ep) -{ - auto cur_dc_rack = _current_locations.find(ep); - - if (cur_dc_rack == _current_locations.end()) { - return; - } - - _dc_endpoints[cur_dc_rack->second.dc].erase(ep); - _dc_racks[cur_dc_rack->second.dc][cur_dc_rack->second.rack].erase(ep); - _current_locations.erase(cur_dc_rack); -} -/////////////////// class topology end ///////////////////////////////////////// -} // namespace locator diff --git a/scylla/locator/token_metadata.hh b/scylla/locator/token_metadata.hh deleted file mode 100644 index 1d855af..0000000 --- a/scylla/locator/token_metadata.hh +++ /dev/null @@ -1,1030 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include -#include -#include -#include "gms/inet_address.hh" -#include "dht/i_partitioner.hh" -#include "utils/UUID.hh" -#include -#include -#include -#include -#include "query-request.hh" -#include "range.hh" - -// forward declaration since database.hh includes this file -class keyspace; - -namespace locator { - -class abstract_replication_strategy; - -using inet_address = gms::inet_address; -using token = dht::token; - -// Endpoint Data Center and Rack names -struct endpoint_dc_rack { - sstring dc; - sstring rack; -}; - -class topology { -public: - topology() {} - topology(const topology& other); - - void clear(); - - /** - * Stores current DC/rack assignment for ep - */ - void add_endpoint(const inet_address& ep); - - /** - * Removes current DC/rack assignment for ep - */ - void remove_endpoint(inet_address ep); - - /** - * Re-reads the DC/rack info for the given endpoint - * @param ep endpoint in question - */ - void update_endpoint(inet_address ep); - - std::unordered_map>& - get_datacenter_endpoints() { - return _dc_endpoints; - } - - const std::unordered_map>& - get_datacenter_endpoints() const { - return _dc_endpoints; - } - - std::unordered_map>>& - get_datacenter_racks() { - return _dc_racks; - } - -private: - /** multi-map: DC -> endpoints in that DC */ - std::unordered_map> - _dc_endpoints; - - /** map: DC -> (multi-map: rack -> endpoints in that rack) */ - std::unordered_map>> - _dc_racks; - - /** reverse-lookup map: endpoint -> current known dc/rack assignment */ - std::unordered_map _current_locations; -}; - -class token_metadata final { -public: - using UUID = utils::UUID; - using inet_address = gms::inet_address; -private: - /** - * Maintains token to endpoint map of every node in the cluster. - * Each Token is associated with exactly one Address, but each Address may have - * multiple tokens. Hence, the BiMultiValMap collection. - */ - // FIXME: have to be BiMultiValMap - std::map _token_to_endpoint_map; - - /** Maintains endpoint to host ID map of every node in the cluster */ - std::unordered_map _endpoint_to_host_id_map; - - std::unordered_map _bootstrap_tokens; - std::unordered_set _leaving_endpoints; - std::unordered_map _moving_endpoints; - - std::unordered_map, inet_address>> _pending_ranges; - std::unordered_map, std::unordered_set>> _pending_ranges_map; - std::unordered_map>> _pending_ranges_interval_map; - - std::vector _sorted_tokens; - - topology _topology; - - long _ring_version = 0; - - std::vector sort_tokens(); - - class tokens_iterator : - public std::iterator { - private: - tokens_iterator(std::vector::const_iterator it, size_t pos) - : _cur_it(it), _ring_pos(pos), _insert_min(false) {} - - public: - tokens_iterator(const token& start, const token_metadata* token_metadata, bool include_min = false) - : _token_metadata(token_metadata) { - _cur_it = _token_metadata->sorted_tokens().begin() + _token_metadata->first_token_index(start); - _insert_min = include_min && *_token_metadata->sorted_tokens().begin() != dht::minimum_token(); - if (_token_metadata->sorted_tokens().empty()) { - _min = true; - } - } - - bool operator==(const tokens_iterator& it) const { - return _min == it._min && _cur_it == it._cur_it; - } - - bool operator!=(const tokens_iterator& it) const { - return _min != it._min || _cur_it != it._cur_it; - } - - const token& operator*() { - if (_min) { - return _min_token; - } else { - return *_cur_it; - } - } - - tokens_iterator& operator++() { - if (!_min) { - if (_ring_pos >= _token_metadata->sorted_tokens().size()) { - _cur_it = _token_metadata->sorted_tokens().end(); - } else { - ++_cur_it; - ++_ring_pos; - - if (_cur_it == _token_metadata->sorted_tokens().end()) { - _cur_it = _token_metadata->sorted_tokens().begin(); - _min = _insert_min; - } - } - } else { - _min = false; - } - return *this; - } - - private: - std::vector::const_iterator _cur_it; - // - // position on the token ring starting from token corresponding to - // "start" - // - size_t _ring_pos = 0; - bool _insert_min; - bool _min = false; - const token _min_token = dht::minimum_token(); - const token_metadata* _token_metadata = nullptr; - - friend class token_metadata; - }; - - token_metadata(std::map token_to_endpoint_map, std::unordered_map endpoints_map, topology topology); -public: - token_metadata() {}; - const std::vector& sorted_tokens() const; - void update_normal_token(token token, inet_address endpoint); - void update_normal_tokens(std::unordered_set tokens, inet_address endpoint); - void update_normal_tokens(std::unordered_map>& endpoint_tokens); - const token& first_token(const token& start) const; - size_t first_token_index(const token& start) const; - std::experimental::optional get_endpoint(const token& token) const; - std::vector get_tokens(const inet_address& addr) const; - const std::map& get_token_to_endpoint() const { - return _token_to_endpoint_map; - } - - const std::unordered_set& get_leaving_endpoints() const { - return _leaving_endpoints; - } - - const std::unordered_map& get_moving_endpoints() const { - return _moving_endpoints; - } - const std::unordered_map& get_bootstrap_tokens() const { - return _bootstrap_tokens; - } - - void update_topology(inet_address ep) { - _topology.update_endpoint(ep); - } - - tokens_iterator tokens_end() const { - return tokens_iterator(sorted_tokens().end(), sorted_tokens().size()); - } - - /** - * Creates an iterable range of the sorted tokens starting at the token next - * after the given one. - * - * @param start A token that will define the beginning of the range - * - * @return The requested range (see the description above) - */ - auto ring_range(const token& start, bool include_min = false) const { - auto begin = tokens_iterator(start, this, include_min); - auto end = tokens_end(); - return boost::make_iterator_range(begin, end); - } - - boost::iterator_range ring_range( - const std::experimental::optional& start, bool include_min = false) const; - - topology& get_topology() { - return _topology; - } - - const topology& get_topology() const { - return _topology; - } - - void debug_show(); -#if 0 - private static final Logger logger = LoggerFactory.getLogger(TokenMetadata.class); - - /** - * Maintains token to endpoint map of every node in the cluster. - * Each Token is associated with exactly one Address, but each Address may have - * multiple tokens. Hence, the BiMultiValMap collection. - */ - private final BiMultiValMap tokenToEndpointMap; - - /** Maintains endpoint to host ID map of every node in the cluster */ - private final BiMap _endpoint_to_host_id_map; - - // Prior to CASSANDRA-603, we just had Map pendingRanges, - // which was added to when a node began bootstrap and removed from when it finished. - // - // This is inadequate when multiple changes are allowed simultaneously. For example, - // suppose that there is a ring of nodes A, C and E, with replication factor 3. - // Node D bootstraps between C and E, so its pending ranges will be E-A, A-C and C-D. - // Now suppose node B bootstraps between A and C at the same time. Its pending ranges - // would be C-E, E-A and A-B. Now both nodes need to be assigned pending range E-A, - // which we would be unable to represent with the old Map. The same thing happens - // even more obviously for any nodes that boot simultaneously between same two nodes. - // - // So, we made two changes: - // - // First, we changed pendingRanges to a Multimap (now - // Map>, because replication strategy - // and options are per-KeySpace). - // - // Second, we added the bootstrapTokens and leavingEndpoints collections, so we can - // rebuild pendingRanges from the complete information of what is going on, when - // additional changes are made mid-operation. - // - // Finally, note that recording the tokens of joining nodes in bootstrapTokens also - // means we can detect and reject the addition of multiple nodes at the same token - // before one becomes part of the ring. - private final BiMultiValMap bootstrapTokens = new BiMultiValMap(); - // (don't need to record Token here since it's still part of tokenToEndpointMap until it's done leaving) - private final Set leavingEndpoints = new HashSet(); - // this is a cache of the calculation from {tokenToEndpointMap, bootstrapTokens, leavingEndpoints} - - // nodes which are migrating to the new tokens in the ring - private final Set> _moving_endpoints = new HashSet>(); - - /* Use this lock for manipulating the token map */ - private final ReadWriteLock lock = new ReentrantReadWriteLock(true); - private volatile ArrayList sortedTokens; - - private final Topology topology; - - private static final Comparator inetaddressCmp = new Comparator() - { - public int compare(InetAddress o1, InetAddress o2) - { - return ByteBuffer.wrap(o1.getAddress()).compareTo(ByteBuffer.wrap(o2.getAddress())); - } - }; - - // signals replication strategies that nodes have joined or left the ring and they need to recompute ownership - private volatile long ringVersion = 0; - - public TokenMetadata() - { - this(SortedBiMultiValMap.create(null, inetaddressCmp), - HashBiMap.create(), - new Topology()); - } - - private TokenMetadata(BiMultiValMap tokenToEndpointMap, BiMap endpointsMap, Topology topology) - { - this.tokenToEndpointMap = tokenToEndpointMap; - this.topology = topology; - _endpoint_to_host_id_map = endpointsMap; - sortedTokens = sortTokens(); - } - - private ArrayList sortTokens() - { - return new ArrayList(tokenToEndpointMap.keySet()); - } - - /** @return the number of nodes bootstrapping into source's primary range */ - public int pendingRangeChanges(InetAddress source) - { - int n = 0; - Collection> sourceRanges = getPrimaryRangesFor(getTokens(source)); - lock.readLock().lock(); - try - { - for (Token token : _bootstrap_tokens.keySet()) - for (Range range : sourceRanges) - if (range.contains(token)) - n++; - } - finally - { - lock.readLock().unlock(); - } - return n; - } - - /** - * Update token map with a single token/endpoint pair in normal state. - */ - public void updateNormalToken(Token token, InetAddress endpoint) - { - updateNormalTokens(Collections.singleton(token), endpoint); - } - - public void updateNormalTokens(Collection tokens, InetAddress endpoint) - { - Multimap endpointTokens = HashMultimap.create(); - for (Token token : tokens) - endpointTokens.put(endpoint, token); - updateNormalTokens(endpointTokens); - } - - /** - * Update token map with a set of token/endpoint pairs in normal state. - * - * Prefer this whenever there are multiple pairs to update, as each update (whether a single or multiple) - * is expensive (CASSANDRA-3831). - * - * @param endpointTokens - */ - public void updateNormalTokens(Multimap endpointTokens) - { - if (endpointTokens.isEmpty()) - return; - - lock.writeLock().lock(); - try - { - boolean shouldSortTokens = false; - for (InetAddress endpoint : endpointTokens.keySet()) - { - Collection tokens = endpointTokens.get(endpoint); - - assert tokens != null && !tokens.isEmpty(); - - _bootstrap_tokens.removeValue(endpoint); - tokenToEndpointMap.removeValue(endpoint); - topology.addEndpoint(endpoint); - _leaving_endpoints.remove(endpoint); - removeFromMoving(endpoint); // also removing this endpoint from moving - - for (Token token : tokens) - { - InetAddress prev = tokenToEndpointMap.put(token, endpoint); - if (!endpoint.equals(prev)) - { - if (prev != null) - logger.warn("Token {} changing ownership from {} to {}", token, prev, endpoint); - shouldSortTokens = true; - } - } - } - - if (shouldSortTokens) - sortedTokens = sortTokens(); - } - finally - { - lock.writeLock().unlock(); - } - } -#endif - - /** - * Store an end-point to host ID mapping. Each ID must be unique, and - * cannot be changed after the fact. - * - * @param hostId - * @param endpoint - */ - void update_host_id(const UUID& host_id, inet_address endpoint); - - /** Return the unique host ID for an end-point. */ - UUID get_host_id(inet_address endpoint); - - /** Return the end-point for a unique host ID */ - std::experimental::optional get_endpoint_for_host_id(UUID host_id); - - /** @return a copy of the endpoint-to-id map for read-only operations */ - const std::unordered_map& get_endpoint_to_host_id_map_for_reading() const; - - void add_bootstrap_token(token t, inet_address endpoint); - - void add_bootstrap_tokens(std::unordered_set tokens, inet_address endpoint); - - void remove_bootstrap_tokens(std::unordered_set tokens); - - void add_leaving_endpoint(inet_address endpoint); -public: - - /** - * Add a new moving endpoint - * @param token token which is node moving to - * @param endpoint address of the moving node - */ - void add_moving_endpoint(token t, inet_address endpoint); -public: - void remove_endpoint(inet_address endpoint); - - /** - * Remove pair of token/address from moving endpoints - * @param endpoint address of the moving node - */ - void remove_from_moving(inet_address endpoint); -#if 0 - - public Collection getTokens(InetAddress endpoint) - { - assert endpoint != null; - assert isMember(endpoint); // don't want to return nulls - - lock.readLock().lock(); - try - { - return new ArrayList(tokenToEndpointMap.inverse().get(endpoint)); - } - finally - { - lock.readLock().unlock(); - } - } - - @Deprecated - public Token getToken(InetAddress endpoint) - { - return getTokens(endpoint).iterator().next(); - } - -#endif - - bool is_member(inet_address endpoint); - - bool is_leaving(inet_address endpoint); - - bool is_moving(inet_address endpoint) { - for (auto x : _moving_endpoints) { - if (x.second == endpoint) { - return true; - } - } - return false; - } -#if 0 - private final AtomicReference cachedTokenMap = new AtomicReference(); -#endif -public: - - /** - * Create a copy of TokenMetadata with only tokenToEndpointMap. That is, pending ranges, - * bootstrap tokens and leaving endpoints are not included in the copy. - */ - token_metadata clone_only_token_map() { - return token_metadata(this->_token_to_endpoint_map, this->_endpoint_to_host_id_map, this->_topology); - } -#if 0 - - /** - * Return a cached TokenMetadata with only tokenToEndpointMap, i.e., the same as cloneOnlyTokenMap but - * uses a cached copy that is invalided when the ring changes, so in the common case - * no extra locking is required. - * - * Callers must *NOT* mutate the returned metadata object. - */ - public TokenMetadata cachedOnlyTokenMap() - { - TokenMetadata tm = cachedTokenMap.get(); - if (tm != null) - return tm; - - // synchronize to prevent thundering herd (CASSANDRA-6345) - synchronized (this) - { - if ((tm = cachedTokenMap.get()) != null) - return tm; - - tm = cloneOnlyTokenMap(); - cachedTokenMap.set(tm); - return tm; - } - } -#endif - /** - * Create a copy of TokenMetadata with tokenToEndpointMap reflecting situation after all - * current leave operations have finished. - * - * @return new token metadata - */ - token_metadata clone_after_all_left() { - auto all_left_metadata = clone_only_token_map(); - - for (auto endpoint : _leaving_endpoints) { - all_left_metadata.remove_endpoint(endpoint); - } - - return all_left_metadata; - } - -public: - /** - * Create a copy of TokenMetadata with tokenToEndpointMap reflecting situation after all - * current leave, and move operations have finished. - * - * @return new token metadata - */ - token_metadata clone_after_all_settled(); -#if 0 - public InetAddress getEndpoint(Token token) - { - lock.readLock().lock(); - try - { - return tokenToEndpointMap.get(token); - } - finally - { - lock.readLock().unlock(); - } - } -#endif -public: - dht::token_range_vector get_primary_ranges_for(std::unordered_set tokens); - - dht::token_range_vector get_primary_ranges_for(token right); - static boost::icl::interval::interval_type range_to_interval(range r); - static range interval_to_range(boost::icl::interval::interval_type i); - -private: - std::unordered_multimap, inet_address>& get_pending_ranges_mm(sstring keyspace_name); - void set_pending_ranges(const sstring& keyspace_name, std::unordered_multimap, inet_address> new_pending_ranges); - -public: - /** a mutable map may be returned but caller should not modify it */ - const std::unordered_map, std::unordered_set>& get_pending_ranges(sstring keyspace_name); - - std::vector> get_pending_ranges(sstring keyspace_name, inet_address endpoint); - /** - * Calculate pending ranges according to bootsrapping and leaving nodes. Reasoning is: - * - * (1) When in doubt, it is better to write too much to a node than too little. That is, if - * there are multiple nodes moving, calculate the biggest ranges a node could have. Cleaning - * up unneeded data afterwards is better than missing writes during movement. - * (2) When a node leaves, ranges for other nodes can only grow (a node might get additional - * ranges, but it will not lose any of its current ranges as a result of a leave). Therefore - * we will first remove _all_ leaving tokens for the sake of calculation and then check what - * ranges would go where if all nodes are to leave. This way we get the biggest possible - * ranges with regard current leave operations, covering all subsets of possible final range - * values. - * (3) When a node bootstraps, ranges of other nodes can only get smaller. Without doing - * complex calculations to see if multiple bootstraps overlap, we simply base calculations - * on the same token ring used before (reflecting situation after all leave operations have - * completed). Bootstrapping nodes will be added and removed one by one to that metadata and - * checked what their ranges would be. This will give us the biggest possible ranges the - * node could have. It might be that other bootstraps make our actual final ranges smaller, - * but it does not matter as we can clean up the data afterwards. - * - * NOTE: This is heavy and ineffective operation. This will be done only once when a node - * changes state in the cluster, so it should be manageable. - */ - void calculate_pending_ranges(abstract_replication_strategy& strategy, const sstring& keyspace_name); -public: - - token get_predecessor(token t); - -#if 0 - public Token getSuccessor(Token token) - { - List tokens = sortedTokens(); - int index = Collections.binarySearch(tokens, token); - assert index >= 0 : token + " not found in " + StringUtils.join(tokenToEndpointMap.keySet(), ", "); - return (Token) ((index == (tokens.size() - 1)) ? tokens.get(0) : tokens.get(index + 1)); - } - - /** @return a copy of the bootstrapping tokens map */ - public BiMultiValMap getBootstrapTokens() - { - lock.readLock().lock(); - try - { - return new BiMultiValMap(_bootstrap_tokens); - } - finally - { - lock.readLock().unlock(); - } - } - -#endif - size_t number_of_endpoints() const { - return _endpoint_to_host_id_map.size(); - } - - std::vector get_all_endpoints() const { - std::vector tmp; - std::transform(_endpoint_to_host_id_map.begin(), _endpoint_to_host_id_map.end(), std::back_inserter(tmp), [](const auto& p) { - return p.first; - }); - return tmp; - } - - size_t get_all_endpoints_count() const { - return _endpoint_to_host_id_map.size(); - } - -#if 0 - public Set getAllEndpoints() - { - lock.readLock().lock(); - try - { - return ImmutableSet.copyOf(_endpoint_to_host_id_map.keySet()); - } - finally - { - lock.readLock().unlock(); - } - } - - /** caller should not modify _leaving_endpoints */ - public Set getLeavingEndpoints() - { - lock.readLock().lock(); - try - { - return ImmutableSet.copyOf(_leaving_endpoints); - } - finally - { - lock.readLock().unlock(); - } - } - - /** - * Endpoints which are migrating to the new tokens - * @return set of addresses of moving endpoints - */ - public Set> getMovingEndpoints() - { - lock.readLock().lock(); - try - { - return ImmutableSet.copyOf(_moving_endpoints); - } - finally - { - lock.readLock().unlock(); - } - } - - public static int firstTokenIndex(final ArrayList ring, Token start, boolean insertMin) - { - assert ring.size() > 0; - // insert the minimum token (at index == -1) if we were asked to include it and it isn't a member of the ring - int i = Collections.binarySearch(ring, start); - if (i < 0) - { - i = (i + 1) * (-1); - if (i >= ring.size()) - i = insertMin ? -1 : 0; - } - return i; - } - - public static Token firstToken(final ArrayList ring, Token start) - { - return ring.get(firstTokenIndex(ring, start, false)); - } - - /** - * iterator over the Tokens in the given ring, starting with the token for the node owning start - * (which does not have to be a Token in the ring) - * @param includeMin True if the minimum token should be returned in the ring even if it has no owner. - */ - public static Iterator ringIterator(final ArrayList ring, Token start, boolean includeMin) - { - if (ring.isEmpty()) - return includeMin ? Iterators.singletonIterator(StorageService.getPartitioner().getMinimumToken()) - : Iterators.emptyIterator(); - - final boolean insertMin = includeMin && !ring.get(0).isMinimum(); - final int startIndex = firstTokenIndex(ring, start, insertMin); - return new AbstractIterator() - { - int j = startIndex; - protected Token computeNext() - { - if (j < -1) - return endOfData(); - try - { - // return minimum for index == -1 - if (j == -1) - return StorageService.getPartitioner().getMinimumToken(); - // return ring token for other indexes - return ring.get(j); - } - finally - { - j++; - if (j == ring.size()) - j = insertMin ? -1 : 0; - if (j == startIndex) - // end iteration - j = -2; - } - } - }; - } - - /** used by tests */ - public void clearUnsafe() - { - lock.writeLock().lock(); - try - { - tokenToEndpointMap.clear(); - _endpoint_to_host_id_map.clear(); - _bootstrap_tokens.clear(); - _leaving_endpoints.clear(); - _pending_ranges.clear(); - _moving_endpoints.clear(); - sortedTokens.clear(); - topology.clear(); - invalidateCachedRings(); - } - finally - { - lock.writeLock().unlock(); - } - } - - public String toString() - { - StringBuilder sb = new StringBuilder(); - lock.readLock().lock(); - try - { - Set eps = tokenToEndpointMap.inverse().keySet(); - - if (!eps.isEmpty()) - { - sb.append("Normal Tokens:"); - sb.append(System.getProperty("line.separator")); - for (InetAddress ep : eps) - { - sb.append(ep); - sb.append(":"); - sb.append(tokenToEndpointMap.inverse().get(ep)); - sb.append(System.getProperty("line.separator")); - } - } - - if (!_bootstrap_tokens.isEmpty()) - { - sb.append("Bootstrapping Tokens:" ); - sb.append(System.getProperty("line.separator")); - for (Map.Entry entry : _bootstrap_tokens.entrySet()) - { - sb.append(entry.getValue()).append(":").append(entry.getKey()); - sb.append(System.getProperty("line.separator")); - } - } - - if (!_leaving_endpoints.isEmpty()) - { - sb.append("Leaving Endpoints:"); - sb.append(System.getProperty("line.separator")); - for (InetAddress ep : _leaving_endpoints) - { - sb.append(ep); - sb.append(System.getProperty("line.separator")); - } - } - - if (!_pending_ranges.isEmpty()) - { - sb.append("Pending Ranges:"); - sb.append(System.getProperty("line.separator")); - sb.append(printPendingRanges()); - } - } - finally - { - lock.readLock().unlock(); - } - - return sb.toString(); - } -#endif - sstring print_pending_ranges(); -public: - std::vector pending_endpoints_for(const token& token, const sstring& keyspace_name); -#if 0 - /** - * @deprecated retained for benefit of old tests - */ - public Collection getWriteEndpoints(Token token, String keyspaceName, Collection naturalEndpoints) - { - return ImmutableList.copyOf(Iterables.concat(naturalEndpoints, pendingEndpointsFor(token, keyspaceName))); - } -#endif - -public: - /** @return an endpoint to token multimap representation of tokenToEndpointMap (a copy) */ - std::multimap get_endpoint_to_token_map_for_reading(); - /** - * @return a (stable copy, won't be modified) Token to Endpoint map for all the normal and bootstrapping nodes - * in the cluster. - */ - std::map get_normal_and_bootstrapping_token_to_endpoint_map(); - -#if 0 - /** - * @return the Topology map of nodes to DCs + Racks - * - * This is only allowed when a copy has been made of TokenMetadata, to avoid concurrent modifications - * when Topology methods are subsequently used by the caller. - */ - public Topology getTopology() - { - assert this != StorageService.instance.getTokenMetadata(); - return topology; - } - - public long getRingVersion() - { - return ringVersion; - } - - public void invalidateCachedRings() - { - ringVersion++; - cachedTokenMap.set(null); - } - - /** - * Tracks the assignment of racks and endpoints in each datacenter for all the "normal" endpoints - * in this TokenMetadata. This allows faster calculation of endpoints in NetworkTopologyStrategy. - */ - public static class Topology - { - /** multi-map of DC to endpoints in that DC */ - private final Multimap dcEndpoints; - /** map of DC to multi-map of rack to endpoints in that rack */ - private final Map> dcRacks; - /** reverse-lookup map for endpoint to current known dc/rack assignment */ - private final Map> currentLocations; - - protected Topology() - { - dcEndpoints = HashMultimap.create(); - dcRacks = new HashMap>(); - currentLocations = new HashMap>(); - } - - protected void clear() - { - dcEndpoints.clear(); - dcRacks.clear(); - currentLocations.clear(); - } - - /** - * construct deep-copy of other - */ - protected Topology(Topology other) - { - dcEndpoints = HashMultimap.create(other.dcEndpoints); - dcRacks = new HashMap>(); - for (String dc : other.dcRacks.keySet()) - dcRacks.put(dc, HashMultimap.create(other.dcRacks.get(dc))); - currentLocations = new HashMap>(other.currentLocations); - } - - /** - * Stores current DC/rack assignment for ep - */ - protected void addEndpoint(InetAddress ep) - { - IEndpointSnitch snitch = DatabaseDescriptor.getEndpointSnitch(); - String dc = snitch.getDatacenter(ep); - String rack = snitch.getRack(ep); - Pair current = currentLocations.get(ep); - if (current != null) - { - if (current.left.equals(dc) && current.right.equals(rack)) - return; - dcRacks.get(current.left).remove(current.right, ep); - dcEndpoints.remove(current.left, ep); - } - - dcEndpoints.put(dc, ep); - - if (!dcRacks.containsKey(dc)) - dcRacks.put(dc, HashMultimap.create()); - dcRacks.get(dc).put(rack, ep); - - currentLocations.put(ep, Pair.create(dc, rack)); - } - - /** - * Removes current DC/rack assignment for ep - */ - protected void removeEndpoint(InetAddress ep) - { - if (!currentLocations.containsKey(ep)) - return; - Pair current = currentLocations.remove(ep); - dcEndpoints.remove(current.left, ep); - dcRacks.get(current.left).remove(current.right, ep); - } - - /** - * @return multi-map of DC to endpoints in that DC - */ - public Multimap getDatacenterEndpoints() - { - return dcEndpoints; - } - - /** - * @return map of DC to multi-map of rack to endpoints in that rack - */ - public Map> getDatacenterRacks() - { - return dcRacks; - } - } -#endif - long get_ring_version() const { - return _ring_version; - } - - void invalidate_cached_rings() { - ++_ring_version; - //cachedTokenMap.set(null); - } -}; - -} diff --git a/scylla/log.hh b/scylla/log.hh deleted file mode 100644 index 746fcf8..0000000 --- a/scylla/log.hh +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright (C) 2014 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include - -namespace logging { - -using log_level = seastar::log_level; - -using logger = seastar::logger; -using registry = seastar::log_registry; - - -using seastar::pretty_type_name; -using seastar::level_name; -using seastar::logger_registry; - -} diff --git a/scylla/map_difference.hh b/scylla/map_difference.hh deleted file mode 100644 index 233fd61..0000000 --- a/scylla/map_difference.hh +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include -#include - -template -struct map_difference { - // Entries in left map whose keys don't exist in the right map. - std::set entries_only_on_left; - - // Entries in right map whose keys don't exist in the left map. - std::set entries_only_on_right; - - // Entries that appear in both maps with the same value. - std::set entries_in_common; - - // Entries that appear in both maps but have different values. - std::set entries_differing; - - map_difference() - : entries_only_on_left{} - , entries_only_on_right{} - , entries_in_common{} - , entries_differing{} - { } -}; - -/** - * Produces a map_difference between the two specified maps, with Key keys and - * Tp values, using the provided equality function. In order to work with any - * map type, such as std::map and std::unordered_map, Args holds the remaining - * type parameters of the particular map type. - */ -template class Map, - typename Key, - typename Tp, - typename Eq = std::equal_to, - typename... Args> -inline -map_difference -difference(const Map& left, - const Map& right, - Eq equals = Eq()) -{ - map_difference diff{}; - for (auto&& kv : right) { - diff.entries_only_on_right.emplace(kv.first); - } - for (auto&& kv : left) { - auto&& left_key = kv.first; - auto&& it = right.find(left_key); - if (it != right.end()) { - diff.entries_only_on_right.erase(left_key); - const Tp& left_value = kv.second; - const Tp& right_value = it->second; - if (equals(left_value, right_value)) { - diff.entries_in_common.emplace(left_key); - } else { - diff.entries_differing.emplace(left_key); - } - } else { - diff.entries_only_on_left.emplace(left_key); - } - } - return diff; -} diff --git a/scylla/md5_hasher.hh b/scylla/md5_hasher.hh deleted file mode 100644 index 0de64e5..0000000 --- a/scylla/md5_hasher.hh +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#define CRYPTOPP_ENABLE_NAMESPACE_WEAK 1 - -#include -#include "hashing.hh" -#include "bytes.hh" - -class md5_hasher { - CryptoPP::Weak::MD5 hash{}; -public: - void update(const char* ptr, size_t length) { - static_assert(sizeof(char) == sizeof(byte), "Assuming lengths will be the same"); - hash.Update(reinterpret_cast(ptr), length * sizeof(byte)); - } - - bytes finalize() { - bytes digest{bytes::initialized_later(), CryptoPP::Weak::MD5::DIGESTSIZE}; - hash.Final(reinterpret_cast(digest.begin())); - return digest; - } - - std::array finalize_array() { - std::array array; - hash.Final(reinterpret_cast(array.data())); - return array; - } -}; diff --git a/scylla/memtable-sstable.hh b/scylla/memtable-sstable.hh deleted file mode 100644 index 5331ba5..0000000 --- a/scylla/memtable-sstable.hh +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (C) 2017 ScyllaDB - * - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - - -// Glue logic for writing memtables to sstables - -#pragma once - -#include "memtable.hh" -#include "sstables/sstables.hh" -#include -#include -#include - -future<> -write_memtable_to_sstable(memtable& mt, - sstables::shared_sstable sst, - bool backup = false, - const io_priority_class& pc = default_priority_class(), - bool leave_unsealed = false, - seastar::thread_scheduling_group* tsg = nullptr); - diff --git a/scylla/memtable.cc b/scylla/memtable.cc deleted file mode 100644 index 50f6850..0000000 --- a/scylla/memtable.cc +++ /dev/null @@ -1,576 +0,0 @@ -/* - * Copyright (C) 2014 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "memtable.hh" -#include "database.hh" -#include "frozen_mutation.hh" -#include "sstable_mutation_readers.hh" -#include "stdx.hh" -#include "partition_snapshot_reader.hh" - -memtable::memtable(schema_ptr schema, dirty_memory_manager& dmm, memtable_list* memtable_list) - : logalloc::region(dmm.region_group()) - , _dirty_mgr(dmm) - , _memtable_list(memtable_list) - , _schema(std::move(schema)) - , partitions(memtable_entry::compare(_schema)) { -} - -static thread_local dirty_memory_manager mgr_for_tests; - -memtable::memtable(schema_ptr schema) - : memtable(std::move(schema), mgr_for_tests, nullptr) -{ } - -memtable::~memtable() { - revert_flushed_memory(); - clear(); -} - -uint64_t memtable::dirty_size() const { - return occupancy().total_space(); -} - -void memtable::clear() noexcept { - auto dirty_before = dirty_size(); - with_allocator(allocator(), [this] { - partitions.clear_and_dispose(current_deleter()); - }); - remove_flushed_memory(dirty_before - dirty_size()); -} - -future<> memtable::clear_gently() noexcept { - return futurize_apply([this] { - static thread_local seastar::thread_scheduling_group scheduling_group(std::chrono::milliseconds(1), 0.2); - auto attr = seastar::thread_attributes(); - attr.scheduling_group = &scheduling_group; - auto t = std::make_unique(attr, [this] { - auto& alloc = allocator(); - - auto p = std::move(partitions); - while (!p.empty()) { - auto batch_size = std::min(p.size(), 32); - auto dirty_before = dirty_size(); - with_allocator(alloc, [&] () noexcept { - while (batch_size--) { - p.erase_and_dispose(p.begin(), [&] (auto e) { - alloc.destroy(e); - }); - } - }); - remove_flushed_memory(dirty_before - dirty_size()); - seastar::thread::yield(); - } - }); - auto f = t->join(); - return f.then([t = std::move(t)] {}); - }).handle_exception([this] (auto e) { - this->clear(); - }); -} - -partition_entry& -memtable::find_or_create_partition_slow(partition_key_view key) { - assert(!reclaiming_enabled()); - - // FIXME: Perform lookup using std::pair - // to avoid unconditional copy of the partition key. - // We can't do it right now because std::map<> which holds - // partitions doesn't support heterogeneous lookup. - // We could switch to boost::intrusive_map<> similar to what we have for row keys. - auto& outer = current_allocator(); - return with_allocator(standard_allocator(), [&, this] () -> partition_entry& { - auto dk = dht::global_partitioner().decorate_key(*_schema, key); - return with_allocator(outer, [&dk, this] () -> partition_entry& { - return with_linearized_managed_bytes([&] () -> partition_entry& { - return find_or_create_partition(dk); - }); - }); - }); -} - -partition_entry& -memtable::find_or_create_partition(const dht::decorated_key& key) { - assert(!reclaiming_enabled()); - - // call lower_bound so we have a hint for the insert, just in case. - auto i = partitions.lower_bound(key, memtable_entry::compare(_schema)); - if (i == partitions.end() || !key.equal(*_schema, i->key())) { - memtable_entry* entry = current_allocator().construct( - _schema, dht::decorated_key(key), mutation_partition(_schema)); - i = partitions.insert(i, *entry); - return entry->partition(); - } else { - upgrade_entry(*i); - } - return i->partition(); -} - -boost::iterator_range -memtable::slice(const dht::partition_range& range) const { - if (query::is_single_partition(range)) { - const query::ring_position& pos = range.start()->value(); - auto i = partitions.find(pos, memtable_entry::compare(_schema)); - if (i != partitions.end()) { - return boost::make_iterator_range(i, std::next(i)); - } else { - return boost::make_iterator_range(i, i); - } - } else { - auto cmp = memtable_entry::compare(_schema); - - auto i1 = range.start() - ? (range.start()->is_inclusive() - ? partitions.lower_bound(range.start()->value(), cmp) - : partitions.upper_bound(range.start()->value(), cmp)) - : partitions.cbegin(); - - auto i2 = range.end() - ? (range.end()->is_inclusive() - ? partitions.upper_bound(range.end()->value(), cmp) - : partitions.lower_bound(range.end()->value(), cmp)) - : partitions.cend(); - - return boost::make_iterator_range(i1, i2); - } -} - -class iterator_reader: public mutation_reader::impl { - lw_shared_ptr _memtable; - schema_ptr _schema; - const dht::partition_range* _range; - stdx::optional _last; - memtable::partitions_type::iterator _i; - memtable::partitions_type::iterator _end; - uint64_t _last_reclaim_counter; - size_t _last_partition_count = 0; - - memtable::partitions_type::iterator lookup_end() { - auto cmp = memtable_entry::compare(_memtable->_schema); - return _range->end() - ? (_range->end()->is_inclusive() - ? _memtable->partitions.upper_bound(_range->end()->value(), cmp) - : _memtable->partitions.lower_bound(_range->end()->value(), cmp)) - : _memtable->partitions.end(); - } - void update_iterators() { - // We must be prepared that iterators may get invalidated during compaction. - auto current_reclaim_counter = _memtable->reclaim_counter(); - auto cmp = memtable_entry::compare(_memtable->_schema); - if (_last) { - if (current_reclaim_counter != _last_reclaim_counter || - _last_partition_count != _memtable->partition_count()) { - _i = _memtable->partitions.upper_bound(*_last, cmp); - _end = lookup_end(); - _last_partition_count = _memtable->partition_count(); - } - } else { - // Initial lookup - _i = _range->start() - ? (_range->start()->is_inclusive() - ? _memtable->partitions.lower_bound(_range->start()->value(), cmp) - : _memtable->partitions.upper_bound(_range->start()->value(), cmp)) - : _memtable->partitions.begin(); - _end = lookup_end(); - _last_partition_count = _memtable->partition_count(); - } - _last_reclaim_counter = current_reclaim_counter; - } -protected: - iterator_reader(schema_ptr s, - lw_shared_ptr m, - const dht::partition_range& range) - : _memtable(std::move(m)) - , _schema(std::move(s)) - , _range(&range) - { } - - memtable_entry* fetch_entry() { - update_iterators(); - if (_i == _end) { - return nullptr; - } else { - memtable_entry& e = *_i; - _memtable->upgrade_entry(e); - return &e; - } - } - - void advance() { - memtable_entry& e = *_i; - _last = e.key(); - ++_i; - } - - logalloc::allocating_section& read_section() { - return _memtable->_read_section; - } - - lw_shared_ptr mtbl() { - return _memtable; - } - - schema_ptr schema() { - return _schema; - } - - logalloc::region& region() { - return *_memtable; - }; - - std::experimental::optional get_delegate_range() { - // We cannot run concurrently with row_cache::update(). - if (_memtable->is_flushed()) { - return _last ? _range->split_after(*_last, dht::ring_position_comparator(*_memtable->_schema)) : *_range; - } - return {}; - } - - mutation_reader delegate_reader(const dht::partition_range& delegate, - const query::partition_slice& slice, - const io_priority_class& pc, - streamed_mutation::forwarding fwd, - mutation_reader::forwarding fwd_mr) { - auto ret = (*_memtable->_underlying)(_schema, delegate, slice, pc, nullptr, fwd, fwd_mr); - _memtable = {}; - _last = {}; - return ret; - } -public: - virtual future<> fast_forward_to(const dht::partition_range& pr) override { - _range = ≺ - _last = { }; - return make_ready_future<>(); - } -}; - -class scanning_reader final: public iterator_reader { - stdx::optional _delegate_range; - mutation_reader _delegate; - const io_priority_class& _pc; - const query::partition_slice& _slice; - streamed_mutation::forwarding _fwd; - mutation_reader::forwarding _fwd_mr; -public: - scanning_reader(schema_ptr s, - lw_shared_ptr m, - const dht::partition_range& range, - const query::partition_slice& slice, - const io_priority_class& pc, - streamed_mutation::forwarding fwd, - mutation_reader::forwarding fwd_mr) - : iterator_reader(std::move(s), std::move(m), range) - , _pc(pc) - , _slice(slice) - , _fwd(fwd) - , _fwd_mr(fwd_mr) - { } - - virtual future operator()() override { - if (_delegate_range) { - return _delegate(); - } - - // FIXME: Use cache. See column_family::make_reader(). - _delegate_range = get_delegate_range(); - if (_delegate_range) { - _delegate = delegate_reader(*_delegate_range, _slice, _pc, _fwd, _fwd_mr); - return _delegate(); - } - - return read_section()(region(), [&] { - return with_linearized_managed_bytes([&] { - memtable_entry* e = fetch_entry(); - if (!e) { - return make_ready_future(stdx::nullopt); - } else { - auto ret = make_ready_future(e->read(mtbl(), schema(), _slice, _fwd)); - advance(); - return ret; - } - }); - }); - } -}; - -void memtable::add_flushed_memory(uint64_t delta) { - _flushed_memory += delta; - _dirty_mgr.account_potentially_cleaned_up_memory(this, delta); -} - -void memtable::remove_flushed_memory(uint64_t delta) { - delta = std::min(_flushed_memory, delta); - _flushed_memory -= delta; - _dirty_mgr.revert_potentially_cleaned_up_memory(this, delta); -} - -void memtable::on_detach_from_region_group() noexcept { - revert_flushed_memory(); -} - -void memtable::revert_flushed_memory() noexcept { - _dirty_mgr.revert_potentially_cleaned_up_memory(this, _flushed_memory); - _flushed_memory = 0; -} - -class flush_memory_accounter { - memtable& _mt; -public: - void update_bytes_read(uint64_t delta) { - _mt.add_flushed_memory(delta); - } - explicit flush_memory_accounter(memtable& mt) - : _mt(mt) - {} - ~flush_memory_accounter() { - assert(_mt._flushed_memory <= _mt.occupancy().used_space()); - - // Flushed the current memtable. There is still some work to do, like finish sealing the - // SSTable and updating the cache, but we can already allow the next one to start. - // - // By erasing this memtable from the flush_manager we'll destroy the semaphore_units - // associated with this flush and will allow another one to start. We'll signal the - // condition variable to let them know we might be ready early. - _mt._dirty_mgr.remove_from_flush_manager(&_mt); - } - void account_component(memtable_entry& e) { - auto delta = _mt.allocator().object_memory_size_in_allocator(&e) - + e.external_memory_usage_without_rows(); - update_bytes_read(delta); - } - void account_component(partition_snapshot& snp) { - update_bytes_read(_mt.allocator().object_memory_size_in_allocator(&*snp.version())); - } -}; - -class partition_snapshot_accounter { - flush_memory_accounter& _accounter; -public: - partition_snapshot_accounter(flush_memory_accounter& acct): _accounter(acct) {} - - // We will be passed mutation fragments here, and they are allocated using the standard - // allocator. So we can't compute the size in memtable precisely. However, precise accounting is - // hard anyway, since we may be holding multiple snapshots of the partitions, and the - // partition_snapshot_reader may compose them. In doing so, we move memory to the standard - // allocation. As long as our size read here is lesser or equal to the size in the memtables, we - // are safe, and worst case we will allow a bit fewer requests in. - void operator()(const range_tombstone& rt) { - _accounter.update_bytes_read(rt.memory_usage()); - } - - void operator()(const static_row& sr) { - _accounter.update_bytes_read(sr.external_memory_usage()); - } - - void operator()(const clustering_row& cr) { - // Every clustering row is stored in a rows_entry object, and that has some significant - // overhead - so add it here. We will be a bit short on our estimate because we can't know - // what is the size in the allocator for this rows_entry object: we may have many snapshots, - // and we don't know which one(s) contributed to the generation of this mutation fragment. - // - // We will add the size of the struct here, and that should be good enough. - _accounter.update_bytes_read(sizeof(rows_entry) + cr.external_memory_usage()); - } -}; - -class flush_reader final : public iterator_reader { - flush_memory_accounter _flushed_memory; -public: - flush_reader(schema_ptr s, lw_shared_ptr m) - : iterator_reader(std::move(s), m, query::full_partition_range) - , _flushed_memory(*m) - {} - flush_reader(const flush_reader&) = delete; - flush_reader(flush_reader&&) = delete; - flush_reader& operator=(flush_reader&&) = delete; - flush_reader& operator=(const flush_reader&) = delete; - - virtual future operator()() override { - return read_section()(region(), [&] { - return with_linearized_managed_bytes([&] { - memtable_entry* e = fetch_entry(); - if (!e) { - return make_ready_future(stdx::nullopt); - } else { - auto cr = query::clustering_key_filter_ranges::get_ranges(*schema(), query::full_slice, e->key().key()); - auto snp = e->partition().read(schema()); - auto mpsr = make_partition_snapshot_reader(schema(), e->key(), std::move(cr), - snp, region(), read_section(), mtbl(), streamed_mutation::forwarding::no, _flushed_memory); - _flushed_memory.account_component(*e); - _flushed_memory.account_component(*snp); - auto ret = make_ready_future(std::move(mpsr)); - advance(); - return ret; - } - }); - }); - } -}; - -mutation_reader -memtable::make_reader(schema_ptr s, - const dht::partition_range& range, - const query::partition_slice& slice, - const io_priority_class& pc, - tracing::trace_state_ptr trace_state_ptr, - streamed_mutation::forwarding fwd, - mutation_reader::forwarding fwd_mr) { - if (query::is_single_partition(range)) { - const query::ring_position& pos = range.start()->value(); - return _read_section(*this, [&] { - managed_bytes::linearization_context_guard lcg; - auto i = partitions.find(pos, memtable_entry::compare(_schema)); - if (i != partitions.end()) { - upgrade_entry(*i); - return make_reader_returning(i->read(shared_from_this(), s, slice, fwd)); - } else { - return make_empty_reader(); - } - }); - } else { - return make_mutation_reader(std::move(s), shared_from_this(), range, slice, pc, fwd, fwd_mr); - } -} - -mutation_reader -memtable::make_flush_reader(schema_ptr s, const io_priority_class& pc) { - if (group()) { - return make_mutation_reader(std::move(s), shared_from_this()); - } else { - return make_mutation_reader(std::move(s), shared_from_this(), - query::full_partition_range, query::full_slice, pc, streamed_mutation::forwarding::no, mutation_reader::forwarding::no); - } -} - -void -memtable::update(db::rp_handle&& h) { - db::replay_position rp = h; - if (_replay_position < rp) { - _replay_position = rp; - } - _rp_set.put(std::move(h)); -} - -future<> -memtable::apply(memtable& mt) { - return do_with(mt.make_reader(_schema), [this] (auto&& rd) mutable { - return consume(rd, [self = this->shared_from_this(), &rd] (mutation&& m) { - self->apply(m); - return stop_iteration::no; - }); - }); -} - -void -memtable::apply(const mutation& m, db::rp_handle&& h) { - with_allocator(allocator(), [this, &m] { - _allocating_section(*this, [&, this] { - with_linearized_managed_bytes([&] { - auto& p = find_or_create_partition(m.decorated_key()); - p.apply(*_schema, m.partition(), *m.schema()); - }); - }); - }); - update(std::move(h)); -} - -void -memtable::apply(const frozen_mutation& m, const schema_ptr& m_schema, db::rp_handle&& h) { - with_allocator(allocator(), [this, &m, &m_schema] { - _allocating_section(*this, [&, this] { - with_linearized_managed_bytes([&] { - auto& p = find_or_create_partition_slow(m.key(*_schema)); - p.apply(*_schema, m.partition(), *m_schema); - }); - }); - }); - update(std::move(h)); -} - -logalloc::occupancy_stats memtable::occupancy() const { - return logalloc::region::occupancy(); -} - -mutation_source memtable::as_data_source() { - return mutation_source([mt = shared_from_this()] (schema_ptr s, - const dht::partition_range& range, - const query::partition_slice& slice, - const io_priority_class& pc, - tracing::trace_state_ptr trace_state, - streamed_mutation::forwarding fwd, - mutation_reader::forwarding fwd_mr) { - return mt->make_reader(std::move(s), range, slice, pc, std::move(trace_state), fwd, fwd_mr); - }); -} - -size_t memtable::partition_count() const { - return partitions.size(); -} - -memtable_entry::memtable_entry(memtable_entry&& o) noexcept - : _link() - , _schema(std::move(o._schema)) - , _key(std::move(o._key)) - , _pe(std::move(o._pe)) -{ - using container_type = memtable::partitions_type; - container_type::node_algorithms::replace_node(o._link.this_ptr(), _link.this_ptr()); - container_type::node_algorithms::init(o._link.this_ptr()); -} - -void memtable::mark_flushed(mutation_source underlying) { - _underlying = std::move(underlying); -} - -bool memtable::is_flushed() const { - return bool(_underlying); -} - -streamed_mutation -memtable_entry::read(lw_shared_ptr mtbl, - const schema_ptr& target_schema, - const query::partition_slice& slice, - streamed_mutation::forwarding fwd) { - auto cr = query::clustering_key_filter_ranges::get_ranges(*_schema, slice, _key.key()); - if (_schema->version() != target_schema->version()) { - auto mp = mutation_partition(_pe.squashed(_schema, target_schema), *target_schema, std::move(cr)); - mutation m = mutation(target_schema, _key, std::move(mp)); - return streamed_mutation_from_mutation(std::move(m), fwd); - } - auto snp = _pe.read(_schema); - return make_partition_snapshot_reader(_schema, _key, std::move(cr), snp, *mtbl, mtbl->_read_section, mtbl, fwd); -} - -void memtable::upgrade_entry(memtable_entry& e) { - if (e._schema != _schema) { - assert(!reclaiming_enabled()); - with_allocator(allocator(), [this, &e] { - with_linearized_managed_bytes([&] { - e.partition().upgrade(e._schema, _schema); - e._schema = _schema; - }); - }); - } -} - -void memtable::set_schema(schema_ptr new_schema) noexcept { - _schema = std::move(new_schema); -} diff --git a/scylla/memtable.hh b/scylla/memtable.hh deleted file mode 100644 index e7da20c..0000000 --- a/scylla/memtable.hh +++ /dev/null @@ -1,206 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include -#include -#include "database_fwd.hh" -#include "dht/i_partitioner.hh" -#include "schema.hh" -#include "mutation_reader.hh" -#include "db/commitlog/replay_position.hh" -#include "db/commitlog/rp_set.hh" -#include "utils/logalloc.hh" -#include "partition_version.hh" - -class frozen_mutation; - - -namespace bi = boost::intrusive; - -class memtable_entry { - bi::set_member_hook<> _link; - schema_ptr _schema; - dht::decorated_key _key; - partition_entry _pe; -public: - friend class memtable; - - memtable_entry(schema_ptr s, dht::decorated_key key, mutation_partition p) - : _schema(std::move(s)) - , _key(std::move(key)) - , _pe(std::move(p)) - { } - - memtable_entry(memtable_entry&& o) noexcept; - - const dht::decorated_key& key() const { return _key; } - dht::decorated_key& key() { return _key; } - const partition_entry& partition() const { return _pe; } - partition_entry& partition() { return _pe; } - const schema_ptr& schema() const { return _schema; } - schema_ptr& schema() { return _schema; } - streamed_mutation read(lw_shared_ptr mtbl, const schema_ptr&, const query::partition_slice&, streamed_mutation::forwarding); - - size_t external_memory_usage_without_rows() const { - return _key.key().external_memory_usage(); - } - - struct compare { - dht::decorated_key::less_comparator _c; - - compare(schema_ptr s) - : _c(std::move(s)) - {} - - bool operator()(const dht::decorated_key& k1, const memtable_entry& k2) const { - return _c(k1, k2._key); - } - - bool operator()(const memtable_entry& k1, const memtable_entry& k2) const { - return _c(k1._key, k2._key); - } - - bool operator()(const memtable_entry& k1, const dht::decorated_key& k2) const { - return _c(k1._key, k2); - } - - bool operator()(const memtable_entry& k1, const dht::ring_position& k2) const { - return _c(k1._key, k2); - } - - bool operator()(const dht::ring_position& k1, const memtable_entry& k2) const { - return _c(k1, k2._key); - } - }; -}; - -class dirty_memory_manager; - -// Managed by lw_shared_ptr<>. -class memtable final : public enable_lw_shared_from_this, private logalloc::region { -public: - using partitions_type = bi::set, &memtable_entry::_link>, - bi::compare>; -private: - dirty_memory_manager& _dirty_mgr; - memtable_list *_memtable_list; - schema_ptr _schema; - logalloc::allocating_section _read_section; - logalloc::allocating_section _allocating_section; - partitions_type partitions; - db::replay_position _replay_position; - db::rp_set _rp_set; - // mutation source to which reads fall-back after mark_flushed() - // so that memtable contents can be moved away while there are - // still active readers. This is needed for this mutation_source - // to be monotonic (not loose writes). Monotonicity of each - // mutation_source is necessary for the combined mutation source to be - // monotonic. That combined source in this case is cache + memtable. - mutation_source_opt _underlying; - uint64_t _flushed_memory = 0; - void update(db::rp_handle&&); - friend class row_cache; - friend class memtable_entry; - friend class flush_reader; - friend class flush_memory_accounter; -private: - boost::iterator_range slice(const dht::partition_range& r) const; - partition_entry& find_or_create_partition(const dht::decorated_key& key); - partition_entry& find_or_create_partition_slow(partition_key_view key); - void upgrade_entry(memtable_entry&); - void add_flushed_memory(uint64_t); - void remove_flushed_memory(uint64_t); - void clear() noexcept; - uint64_t dirty_size() const; -public: - explicit memtable(schema_ptr schema, dirty_memory_manager&, memtable_list *memtable_list = nullptr); - // Used for testing that want to control the flush process. - explicit memtable(schema_ptr schema); - ~memtable(); - // Clears this memtable gradually without consuming the whole CPU. - // Never resolves with a failed future. - future<> clear_gently() noexcept; - schema_ptr schema() const { return _schema; } - void set_schema(schema_ptr) noexcept; - future<> apply(memtable&); - // Applies mutation to this memtable. - // The mutation is upgraded to current schema. - void apply(const mutation& m, db::rp_handle&& = {}); - // The mutation is upgraded to current schema. - void apply(const frozen_mutation& m, const schema_ptr& m_schema, db::rp_handle&& = {}); - - static memtable& from_region(logalloc::region& r) { - return static_cast(r); - } - - const logalloc::region& region() const { - return *this; - } - - logalloc::region_group* region_group() { - return group(); - } -public: - memtable_list* get_memtable_list() { - return _memtable_list; - } - - size_t partition_count() const; - logalloc::occupancy_stats occupancy() const; - - // Creates a reader of data in this memtable for given partition range. - // - // Live readers share ownership of the memtable instance, so caller - // doesn't need to ensure that memtable remains live. - // - // The 'range' parameter must be live as long as the reader is being used - // - // Mutations returned by the reader will all have given schema. - mutation_reader make_reader(schema_ptr, - const dht::partition_range& range = query::full_partition_range, - const query::partition_slice& slice = query::full_slice, - const io_priority_class& pc = default_priority_class(), - tracing::trace_state_ptr trace_state_ptr = nullptr, - streamed_mutation::forwarding fwd = streamed_mutation::forwarding::no, - mutation_reader::forwarding fwd_mr = mutation_reader::forwarding::no); - - - mutation_reader make_flush_reader(schema_ptr, const io_priority_class& pc); - - mutation_source as_data_source(); - - bool empty() const { return partitions.empty(); } - void mark_flushed(mutation_source); - bool is_flushed() const; - void on_detach_from_region_group() noexcept; - void revert_flushed_memory() noexcept; - - const db::replay_position& replay_position() const { - return _replay_position; - } - const db::rp_set& rp_set() const { - return _rp_set; - } - friend class iterator_reader; -}; diff --git a/scylla/mutation.cc b/scylla/mutation.cc deleted file mode 100644 index e801c2a..0000000 --- a/scylla/mutation.cc +++ /dev/null @@ -1,266 +0,0 @@ -/* - * Copyright (C) 2014 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "mutation.hh" -#include "query-result-writer.hh" - -mutation::data::data(dht::decorated_key&& key, schema_ptr&& schema) - : _schema(std::move(schema)) - , _dk(std::move(key)) - , _p(_schema) -{ } - -mutation::data::data(partition_key&& key_, schema_ptr&& schema) - : _schema(std::move(schema)) - , _dk(dht::global_partitioner().decorate_key(*_schema, std::move(key_))) - , _p(_schema) -{ } - -mutation::data::data(schema_ptr&& schema, dht::decorated_key&& key, const mutation_partition& mp) - : _schema(std::move(schema)) - , _dk(std::move(key)) - , _p(mp) -{ } - -mutation::data::data(schema_ptr&& schema, dht::decorated_key&& key, mutation_partition&& mp) - : _schema(std::move(schema)) - , _dk(std::move(key)) - , _p(std::move(mp)) -{ } - -void mutation::set_static_cell(const column_definition& def, atomic_cell_or_collection&& value) { - partition().static_row().apply(def, std::move(value)); -} - -void mutation::set_static_cell(const bytes& name, const data_value& value, api::timestamp_type timestamp, ttl_opt ttl) { - auto column_def = schema()->get_column_definition(name); - if (!column_def) { - throw std::runtime_error(sprint("no column definition found for '%s'", name)); - } - if (!column_def->is_static()) { - throw std::runtime_error(sprint("column '%s' is not static", name)); - } - partition().static_row().apply(*column_def, atomic_cell::make_live(timestamp, column_def->type->decompose(value), ttl)); -} - -void mutation::set_clustered_cell(const clustering_key& key, const bytes& name, const data_value& value, - api::timestamp_type timestamp, ttl_opt ttl) { - auto column_def = schema()->get_column_definition(name); - if (!column_def) { - throw std::runtime_error(sprint("no column definition found for '%s'", name)); - } - return set_clustered_cell(key, *column_def, atomic_cell::make_live(timestamp, column_def->type->decompose(value), ttl)); -} - -void mutation::set_clustered_cell(const clustering_key& key, const column_definition& def, atomic_cell_or_collection&& value) { - auto& row = partition().clustered_row(*schema(), key).cells(); - row.apply(def, std::move(value)); -} - -void mutation::set_cell(const clustering_key_prefix& prefix, const bytes& name, const data_value& value, - api::timestamp_type timestamp, ttl_opt ttl) { - auto column_def = schema()->get_column_definition(name); - if (!column_def) { - throw std::runtime_error(sprint("no column definition found for '%s'", name)); - } - return set_cell(prefix, *column_def, atomic_cell::make_live(timestamp, column_def->type->decompose(value), ttl)); -} - -void mutation::set_cell(const clustering_key_prefix& prefix, const column_definition& def, atomic_cell_or_collection&& value) { - if (def.is_static()) { - set_static_cell(def, std::move(value)); - } else if (def.is_regular()) { - set_clustered_cell(prefix, def, std::move(value)); - } else { - throw std::runtime_error("attemting to store into a key cell"); - } -} - -bool mutation::operator==(const mutation& m) const { - return decorated_key().equal(*schema(), m.decorated_key()) - && partition().equal(*schema(), m.partition(), *m.schema()); -} - -bool mutation::operator!=(const mutation& m) const { - return !(*this == m); -} - -void -mutation::query(query::result::builder& builder, - const query::partition_slice& slice, - gc_clock::time_point now, - uint32_t row_limit) && -{ - auto pb = builder.add_partition(*schema(), key()); - auto is_reversed = slice.options.contains(); - mutation_partition& p = partition(); - auto limit = std::min(row_limit, slice.partition_row_limit()); - p.compact_for_query(*schema(), now, slice.row_ranges(*schema(), key()), is_reversed, limit); - p.query_compacted(pb, *schema(), limit); -} - -query::result -mutation::query(const query::partition_slice& slice, - query::result_request request, - gc_clock::time_point now, uint32_t row_limit) && -{ - query::result::builder builder(slice, request, { }); - std::move(*this).query(builder, slice, now, row_limit); - return builder.build(); -} - -query::result -mutation::query(const query::partition_slice& slice, - query::result_request request, - gc_clock::time_point now, uint32_t row_limit) const& -{ - return mutation(*this).query(slice, request, now, row_limit); -} - -size_t -mutation::live_row_count(gc_clock::time_point query_time) const { - return partition().live_row_count(*schema(), query_time); -} - -bool -mutation_decorated_key_less_comparator::operator()(const mutation& m1, const mutation& m2) const { - return m1.decorated_key().less_compare(*m1.schema(), m2.decorated_key()); -} - -boost::iterator_range::const_iterator> -slice(const std::vector& partitions, const dht::partition_range& r) { - struct cmp { - bool operator()(const dht::ring_position& pos, const mutation& m) const { - return m.decorated_key().tri_compare(*m.schema(), pos) > 0; - }; - bool operator()(const mutation& m, const dht::ring_position& pos) const { - return m.decorated_key().tri_compare(*m.schema(), pos) < 0; - }; - }; - - return boost::make_iterator_range( - r.start() - ? (r.start()->is_inclusive() - ? std::lower_bound(partitions.begin(), partitions.end(), r.start()->value(), cmp()) - : std::upper_bound(partitions.begin(), partitions.end(), r.start()->value(), cmp())) - : partitions.cbegin(), - r.end() - ? (r.end()->is_inclusive() - ? std::upper_bound(partitions.begin(), partitions.end(), r.end()->value(), cmp()) - : std::lower_bound(partitions.begin(), partitions.end(), r.end()->value(), cmp())) - : partitions.cend()); -} - -void -mutation::upgrade(const schema_ptr& new_schema) { - if (_ptr->_schema != new_schema) { - schema_ptr s = new_schema; - partition().upgrade(*schema(), *new_schema); - _ptr->_schema = std::move(s); - } -} - -void mutation::apply(mutation&& m) { - partition().apply(*schema(), std::move(m.partition()), *m.schema()); -} - -void mutation::apply(const mutation& m) { - partition().apply(*schema(), m.partition(), *m.schema()); -} - -mutation& mutation::operator=(const mutation& m) { - return *this = mutation(m); -} - -mutation mutation::operator+(const mutation& other) const { - auto m = *this; - m.apply(other); - return m; -} - -mutation& mutation::operator+=(const mutation& other) { - apply(other); - return *this; -} - -mutation& mutation::operator+=(mutation&& other) { - apply(std::move(other)); - return *this; -} - -mutation mutation::sliced(const query::clustering_row_ranges& ranges) const { - auto m = mutation(schema(), decorated_key(), mutation_partition(partition(), *schema(), ranges)); - m.partition().row_tombstones().trim(*schema(), ranges); - return m; -} - -class mutation_rebuilder { - mutation _m; - streamed_mutation& _sm; - size_t _remaining_limit; - -public: - mutation_rebuilder(streamed_mutation& sm) - : _m(sm.decorated_key(), sm.schema()), _sm(sm), _remaining_limit(0) { - } - - stop_iteration consume(tombstone t) { - _m.partition().apply(t); - return stop_iteration::no; - } - - stop_iteration consume(range_tombstone&& rt) { - _m.partition().apply_row_tombstone(*_m.schema(), std::move(rt)); - return stop_iteration::no; - } - - stop_iteration consume(static_row&& sr) { - _m.partition().static_row().apply(*_m.schema(), column_kind::static_column, std::move(sr.cells())); - return stop_iteration::no; - } - - stop_iteration consume(clustering_row&& cr) { - auto& dr = _m.partition().clustered_row(*_m.schema(), std::move(cr.key())); - dr.apply(cr.tomb()); - dr.apply(cr.marker()); - dr.cells().apply(*_m.schema(), column_kind::regular_column, std::move(cr.cells())); - return stop_iteration::no; - } - - mutation_opt consume_end_of_stream() { - return mutation_opt(std::move(_m)); - } -}; - -future mutation_from_streamed_mutation(streamed_mutation_opt sm) { - if (!sm) { - return make_ready_future(); - } - return do_with(std::move(*sm), [] (auto& sm) { - return consume(sm, mutation_rebuilder(sm)); - }); -} - -future mutation_from_streamed_mutation(streamed_mutation& sm) { - return consume(sm, mutation_rebuilder(sm)).then([] (mutation_opt&& mo) { - return std::move(*mo); - }); -} diff --git a/scylla/mutation.hh b/scylla/mutation.hh deleted file mode 100644 index f3f6345..0000000 --- a/scylla/mutation.hh +++ /dev/null @@ -1,191 +0,0 @@ -/* - * Copyright (C) 2014 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include - -#include "mutation_partition.hh" -#include "keys.hh" -#include "schema.hh" -#include "dht/i_partitioner.hh" -#include "hashing.hh" -#include "utils/optimized_optional.hh" -#include "streamed_mutation.hh" - -class mutation final { -private: - struct data { - schema_ptr _schema; - dht::decorated_key _dk; - mutation_partition _p; - - data(dht::decorated_key&& key, schema_ptr&& schema); - data(partition_key&& key, schema_ptr&& schema); - data(schema_ptr&& schema, dht::decorated_key&& key, const mutation_partition& mp); - data(schema_ptr&& schema, dht::decorated_key&& key, mutation_partition&& mp); - }; - std::unique_ptr _ptr; -private: - mutation() = default; - explicit operator bool() const { return bool(_ptr); } - friend class optimized_optional; -public: - mutation(dht::decorated_key key, schema_ptr schema) - : _ptr(std::make_unique(std::move(key), std::move(schema))) - { } - mutation(partition_key key_, schema_ptr schema) - : _ptr(std::make_unique(std::move(key_), std::move(schema))) - { } - mutation(schema_ptr schema, dht::decorated_key key, const mutation_partition& mp) - : _ptr(std::make_unique(std::move(schema), std::move(key), mp)) - { } - mutation(schema_ptr schema, dht::decorated_key key, mutation_partition&& mp) - : _ptr(std::make_unique(std::move(schema), std::move(key), std::move(mp))) - { } - mutation(const mutation& m) - : _ptr(std::make_unique(schema_ptr(m.schema()), dht::decorated_key(m.decorated_key()), m.partition())) - { } - mutation(mutation&&) = default; - mutation& operator=(mutation&& x) = default; - mutation& operator=(const mutation& m); - - void set_static_cell(const column_definition& def, atomic_cell_or_collection&& value); - void set_static_cell(const bytes& name, const data_value& value, api::timestamp_type timestamp, ttl_opt ttl = {}); - void set_clustered_cell(const clustering_key& key, const bytes& name, const data_value& value, api::timestamp_type timestamp, ttl_opt ttl = {}); - void set_clustered_cell(const clustering_key& key, const column_definition& def, atomic_cell_or_collection&& value); - void set_cell(const clustering_key_prefix& prefix, const bytes& name, const data_value& value, api::timestamp_type timestamp, ttl_opt ttl = {}); - void set_cell(const clustering_key_prefix& prefix, const column_definition& def, atomic_cell_or_collection&& value); - - // Upgrades this mutation to a newer schema. The new schema must - // be obtained using only valid schema transformation: - // * primary key column count must not change - // * column types may only change to those with compatible representations - // - // After upgrade, mutation's partition should only be accessed using the new schema. User must - // ensure proper isolation of accesses. - // - // Strong exception guarantees. - // - // Note that the conversion may lose information, it's possible that m1 != m2 after: - // - // auto m2 = m1; - // m2.upgrade(s2); - // m2.upgrade(m1.schema()); - // - void upgrade(const schema_ptr&); - - const partition_key& key() const { return _ptr->_dk._key; }; - const dht::decorated_key& decorated_key() const { return _ptr->_dk; }; - dht::ring_position ring_position() const { return { decorated_key() }; } - const dht::token& token() const { return _ptr->_dk._token; } - const schema_ptr& schema() const { return _ptr->_schema; } - const mutation_partition& partition() const { return _ptr->_p; } - mutation_partition& partition() { return _ptr->_p; } - const utils::UUID& column_family_id() const { return _ptr->_schema->id(); } - // Consistent with hash - bool operator==(const mutation&) const; - bool operator!=(const mutation&) const; -public: - // The supplied partition_slice must be governed by this mutation's schema - query::result query(const query::partition_slice&, - query::result_request request = query::result_request::only_result, - gc_clock::time_point now = gc_clock::now(), - uint32_t row_limit = query::max_rows) &&; - - // The supplied partition_slice must be governed by this mutation's schema - // FIXME: Slower than the r-value version - query::result query(const query::partition_slice&, - query::result_request request = query::result_request::only_result, - gc_clock::time_point now = gc_clock::now(), - uint32_t row_limit = query::max_rows) const&; - - // The supplied partition_slice must be governed by this mutation's schema - void query(query::result::builder& builder, - const query::partition_slice& slice, - gc_clock::time_point now = gc_clock::now(), - uint32_t row_limit = query::max_rows) &&; - - // See mutation_partition::live_row_count() - size_t live_row_count(gc_clock::time_point query_time = gc_clock::time_point::min()) const; - - void apply(mutation&&); - void apply(const mutation&); - - mutation operator+(const mutation& other) const; - mutation& operator+=(const mutation& other); - mutation& operator+=(mutation&& other); - - // Returns a subset of this mutation holding only information relevant for given clustering ranges. - // Range tombstones will be trimmed to the boundaries of the clustering ranges. - mutation sliced(const query::clustering_row_ranges&) const; -private: - friend std::ostream& operator<<(std::ostream& os, const mutation& m); -}; - -struct mutation_decorated_key_less_comparator { - bool operator()(const mutation& m1, const mutation& m2) const; -}; - -template<> -struct move_constructor_disengages { - enum { value = true }; -}; -using mutation_opt = optimized_optional; - -// Consistent with operator==() -// Consistent across the cluster, so should not rely on particular -// serialization format, only on actual data stored. -template<> -struct appending_hash { - template - void operator()(Hasher& h, const mutation& m) const { - const schema& s = *m.schema(); - m.key().feed_hash(h, s); - m.partition().feed_hash(h, s); - } -}; - -inline -void apply(mutation_opt& dst, mutation&& src) { - if (!dst) { - dst = std::move(src); - } else { - dst->apply(std::move(src)); - } -} - -inline -void apply(mutation_opt& dst, mutation_opt&& src) { - if (src) { - apply(dst, std::move(*src)); - } -} - -// Returns a range into partitions containing mutations covered by the range. -// partitions must be sorted according to decorated key. -// range must not wrap around. -boost::iterator_range::const_iterator> slice( - const std::vector& partitions, - const dht::partition_range&); - -future mutation_from_streamed_mutation(streamed_mutation_opt sm); -future mutation_from_streamed_mutation(streamed_mutation& sm); diff --git a/scylla/mutation_compactor.hh b/scylla/mutation_compactor.hh deleted file mode 100644 index 6671871..0000000 --- a/scylla/mutation_compactor.hh +++ /dev/null @@ -1,262 +0,0 @@ -/* - * Copyright (C) 2016 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "streamed_mutation.hh" - -static inline bool has_ck_selector(const query::clustering_row_ranges& ranges) { - // Like PK range, an empty row range, should be considered an "exclude all" restriction - return ranges.empty() || std::any_of(ranges.begin(), ranges.end(), [](auto& r) { - return !r.is_full(); - }); -} - -enum class emit_only_live_rows { - no, - yes, -}; - -enum class compact_for_sstables { - no, - yes, -}; - -/* -template -concept bool CompactedMutationsConsumer() { - return requires(T obj, tombstone t, const dht::decorated_key& dk, static_row sr, - clustering_row cr, range_tombstone rt, tombstone current_tombstone, bool is_alive) - { - obj.consume_new_partition(dk); - obj.consume(t); - { obj.consume(std::move(sr), current_tombstone, is_alive) } ->stop_iteration; - { obj.consume(std::move(cr), current_tombstone, is_alive) } ->stop_iteration; - { obj.consume(std::move(rt)) } ->stop_iteration; - { obj.consume_end_of_partition() } ->stop_iteration; - obj.consume_end_of_stream(); - }; -} -*/ -// emit_only_live::yes will cause compact_for_query to emit only live -// static and clustering rows. It doesn't affect the way range tombstones are -// emitted. -template -class compact_mutation { - const schema& _schema; - gc_clock::time_point _query_time; - gc_clock::time_point _gc_before; - std::function _get_max_purgeable; - can_gc_fn _can_gc; - api::timestamp_type _max_purgeable = api::missing_timestamp; - const query::partition_slice& _slice; - uint32_t _row_limit{}; - uint32_t _partition_limit{}; - uint32_t _partition_row_limit{}; - - CompactedMutationsConsumer _consumer; - range_tombstone_accumulator _range_tombstones; - - bool _static_row_live{}; - uint32_t _rows_in_current_partition; - uint32_t _current_partition_limit; - bool _empty_partition{}; - const dht::decorated_key* _dk; - bool _has_ck_selector{}; -private: - static constexpr bool only_live() { - return OnlyLive == emit_only_live_rows::yes; - } - static constexpr bool sstable_compaction() { - return SSTableCompaction == compact_for_sstables::yes; - } - - void partition_is_not_empty() { - if (_empty_partition) { - _empty_partition = false; - _consumer.consume_new_partition(*_dk); - auto pt = _range_tombstones.get_partition_tombstone(); - if (pt && !can_purge_tombstone(pt)) { - _consumer.consume(pt); - } - } - } - - bool can_purge_tombstone(const tombstone& t) { - return t.deletion_time < _gc_before && can_gc(t); - }; - - bool can_purge_tombstone(const row_tombstone& t) { - return t.max_deletion_time() < _gc_before && can_gc(t.tomb()); - }; - - bool can_gc(tombstone t) { - if (!sstable_compaction()) { - return true; - } - if (!t) { - return false; - } - if (_max_purgeable == api::missing_timestamp) { - _max_purgeable = _get_max_purgeable(*_dk); - } - return t.timestamp < _max_purgeable; - }; -public: - compact_mutation(compact_mutation&&) = delete; // Because 'this' is captured - - compact_mutation(const schema& s, gc_clock::time_point query_time, const query::partition_slice& slice, uint32_t limit, - uint32_t partition_limit, CompactedMutationsConsumer consumer) - : _schema(s) - , _query_time(query_time) - , _gc_before(saturating_subtract(query_time, s.gc_grace_seconds())) - , _can_gc(always_gc) - , _slice(slice) - , _row_limit(limit) - , _partition_limit(partition_limit) - , _partition_row_limit(_slice.options.contains(query::partition_slice::option::distinct) ? 1 : slice.partition_row_limit()) - , _consumer(std::move(consumer)) - , _range_tombstones(s, _slice.options.contains(query::partition_slice::option::reversed)) - { - static_assert(!sstable_compaction(), "This constructor cannot be used for sstable compaction."); - } - - compact_mutation(const schema& s, gc_clock::time_point compaction_time, CompactedMutationsConsumer consumer, - std::function get_max_purgeable) - : _schema(s) - , _query_time(compaction_time) - , _gc_before(saturating_subtract(_query_time, s.gc_grace_seconds())) - , _get_max_purgeable(std::move(get_max_purgeable)) - , _can_gc([this] (tombstone t) { return can_gc(t); }) - , _slice(query::full_slice) - , _consumer(std::move(consumer)) - , _range_tombstones(s, false) - { - static_assert(sstable_compaction(), "This constructor can only be used for sstable compaction."); - static_assert(!only_live(), "SSTable compaction cannot be run with emit_only_live_rows::yes."); - } - - void consume_new_partition(const dht::decorated_key& dk) { - auto& pk = dk.key(); - _dk = &dk; - _has_ck_selector = has_ck_selector(_slice.row_ranges(_schema, pk)); - _empty_partition = true; - _rows_in_current_partition = 0; - _static_row_live = false; - _range_tombstones.clear(); - _current_partition_limit = std::min(_row_limit, _partition_row_limit); - _max_purgeable = api::missing_timestamp; - } - - void consume(tombstone t) { - _range_tombstones.set_partition_tombstone(t); - if (!only_live() && !can_purge_tombstone(t)) { - partition_is_not_empty(); - } - } - - stop_iteration consume(static_row&& sr) { - auto current_tombstone = _range_tombstones.get_partition_tombstone(); - bool is_live = sr.cells().compact_and_expire(_schema, column_kind::static_column, - row_tombstone(current_tombstone), - _query_time, _can_gc, _gc_before); - _static_row_live = is_live; - if (is_live || (!only_live() && !sr.empty())) { - partition_is_not_empty(); - return _consumer.consume(std::move(sr), current_tombstone, is_live); - } - return stop_iteration::no; - } - - stop_iteration consume(clustering_row&& cr) { - auto current_tombstone = _range_tombstones.tombstone_for_row(cr.key()); - auto t = cr.tomb(); - if (t.tomb() <= current_tombstone || can_purge_tombstone(t)) { - cr.remove_tombstone(); - } - t.apply(current_tombstone); - bool is_live = cr.marker().compact_and_expire(t.tomb(), _query_time, _can_gc, _gc_before); - is_live |= cr.cells().compact_and_expire(_schema, column_kind::regular_column, t, _query_time, _can_gc, _gc_before); - if (only_live() && is_live) { - partition_is_not_empty(); - auto stop = _consumer.consume(std::move(cr), t, true); - if (++_rows_in_current_partition == _current_partition_limit) { - return stop_iteration::yes; - } - return stop; - } else if (!only_live()) { - if (is_live) { - if (!sstable_compaction() && _rows_in_current_partition == _current_partition_limit) { - return stop_iteration::yes; - } - _rows_in_current_partition++; - } - if (!cr.empty()) { - partition_is_not_empty(); - return _consumer.consume(std::move(cr), t, is_live); - } - } - return stop_iteration::no; - } - - stop_iteration consume(range_tombstone&& rt) { - _range_tombstones.apply(rt); - // FIXME: drop tombstone if it is fully covered by other range tombstones - if (!can_purge_tombstone(rt.tomb) && rt.tomb > _range_tombstones.get_partition_tombstone()) { - partition_is_not_empty(); - return _consumer.consume(std::move(rt)); - } - return stop_iteration::no; - } - - stop_iteration consume_end_of_partition() { - if (!_empty_partition) { - // #589 - Do not add extra row for statics unless we did a CK range-less query. - // See comment in query - if (_rows_in_current_partition == 0 && _static_row_live && !_has_ck_selector) { - ++_rows_in_current_partition; - } - - _row_limit -= _rows_in_current_partition; - _partition_limit -= _rows_in_current_partition > 0; - auto stop = _consumer.consume_end_of_partition(); - if (!sstable_compaction()) { - return _row_limit && _partition_limit && stop != stop_iteration::yes - ? stop_iteration::no : stop_iteration::yes; - } - } - return stop_iteration::no; - } - - auto consume_end_of_stream() { - return _consumer.consume_end_of_stream(); - } -}; - -template -struct compact_for_query : compact_mutation { - using compact_mutation::compact_mutation; -}; - -template -struct compact_for_compaction : compact_mutation { - using compact_mutation::compact_mutation; -}; diff --git a/scylla/mutation_partition.cc b/scylla/mutation_partition.cc deleted file mode 100644 index 32a7696..0000000 --- a/scylla/mutation_partition.cc +++ /dev/null @@ -1,2159 +0,0 @@ -/* - * Copyright (C) 2014 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include -#include -#include "mutation_partition.hh" -#include "mutation_partition_applier.hh" -#include "converting_mutation_partition_applier.hh" -#include "partition_builder.hh" -#include "query-result-writer.hh" -#include "atomic_cell_hash.hh" -#include "reversibly_mergeable.hh" -#include "streamed_mutation.hh" -#include "mutation_query.hh" -#include "service/priority_manager.hh" -#include "mutation_compactor.hh" -#include "intrusive_set_external_comparator.hh" -#include "counters.hh" -#include - -template -struct reversal_traits; - -template<> -struct reversal_traits { - template - static auto begin(Container& c) { - return c.begin(); - } - - template - static auto end(Container& c) { - return c.end(); - } - - template - static typename Container::iterator erase_and_dispose(Container& c, - typename Container::iterator begin, - typename Container::iterator end, - Disposer disposer) - { - return c.erase_and_dispose(begin, end, std::move(disposer)); - } - - template - static typename Container::iterator erase_dispose_and_update_end(Container& c, - typename Container::iterator it, Disposer&& disposer, - typename Container::iterator&) - { - return c.erase_and_dispose(it, std::forward(disposer)); - } - - template - static boost::iterator_range maybe_reverse( - Container& c, boost::iterator_range r) - { - return r; - } - - template - static typename Container::iterator maybe_reverse(Container&, typename Container::iterator r) { - return r; - } -}; - -template<> -struct reversal_traits { - template - static auto begin(Container& c) { - return c.rbegin(); - } - - template - static auto end(Container& c) { - return c.rend(); - } - - template - static typename Container::reverse_iterator erase_and_dispose(Container& c, - typename Container::reverse_iterator begin, - typename Container::reverse_iterator end, - Disposer disposer) - { - return typename Container::reverse_iterator( - c.erase_and_dispose(end.base(), begin.base(), disposer) - ); - } - - // Erases element pointed to by it and makes sure than iterator end is not - // invalidated. - template - static typename Container::reverse_iterator erase_dispose_and_update_end(Container& c, - typename Container::reverse_iterator it, Disposer&& disposer, - typename Container::reverse_iterator& end) - { - auto to_erase = std::next(it).base(); - bool update_end = end.base() == to_erase; - auto ret = typename Container::reverse_iterator( - c.erase_and_dispose(to_erase, std::forward(disposer)) - ); - if (update_end) { - end = ret; - } - return ret; - } - - template - static boost::iterator_range maybe_reverse( - Container& c, boost::iterator_range r) - { - using reverse_iterator = typename Container::reverse_iterator; - return boost::make_iterator_range(reverse_iterator(r.end()), reverse_iterator(r.begin())); - } - - template - static typename Container::reverse_iterator maybe_reverse(Container&, typename Container::iterator r) { - return typename Container::reverse_iterator(r); - } -}; - - -// -// apply_reversibly_intrusive_set() and revert_intrusive_set() implement ReversiblyMergeable -// for a rows_type container of ReversiblyMergeable entries. -// -// See reversibly_mergeable.hh -// -// Requirements: -// - entry has distinct key and value states -// - entries are ordered only by key in the container -// - entry can have an empty value -// - presence of an entry with an empty value doesn't affect equality of the containers -// - E::empty() returns true iff the value is empty -// - E(e.key()) creates an entry with empty value but the same key as that of e. -// -// Implementation of ReversiblyMergeable for the entry's value is provided via Apply and Revert functors. -// -// ReversiblyMergeable is constructed assuming the following properties of the 'apply' operation -// on containers: -// -// apply([{k1, v1}], [{k1, v2}]) = [{k1, apply(v1, v2)}] -// apply([{k1, v1}], [{k2, v2}]) = [{k1, v1}, {k2, v2}] -// - -// revert for apply_reversibly_intrusive_set() -void revert_intrusive_set_range(const schema& s, mutation_partition::rows_type& dst, mutation_partition::rows_type& src, - mutation_partition::rows_type::iterator start, - mutation_partition::rows_type::iterator end) noexcept -{ - auto deleter = current_deleter(); - while (start != end) { - auto& e = *start; - // lower_bound() can allocate if linearization is required but it should have - // been already performed by the lower_bound() invocation in apply_reversibly_intrusive_set() and - // stored in the linearization context. - auto i = dst.find(e, rows_entry::compare(s)); - assert(i != dst.end()); - rows_entry& dst_e = *i; - - if (e.erased()) { - dst.erase(i); - start = src.erase_and_dispose(start, deleter); - start = src.insert_before(start, dst_e); - } else { - dst_e.revert(s, e); - } - - ++start; - } -} - -void revert_intrusive_set(const schema& s, mutation_partition::rows_type& dst, mutation_partition::rows_type& src) noexcept { - revert_intrusive_set_range(s, dst, src, src.begin(), src.end()); -} - -// Applies src onto dst. See comment above revert_intrusive_set_range() for more details. -// -// Returns an object which upon going out of scope, unless cancel() is called on it, -// reverts the applicaiton by calling revert_intrusive_set(). The references to containers -// must be stable as long as the returned object is live. -auto apply_reversibly_intrusive_set(const schema& s, mutation_partition::rows_type& dst, mutation_partition::rows_type& src) { - auto src_i = src.begin(); - try { - rows_entry::compare cmp(s); - while (src_i != src.end()) { - rows_entry& src_e = *src_i; - - auto i = dst.lower_bound(src_e, cmp); - if (i == dst.end() || cmp(src_e, *i)) { - // Construct erased entry which will represent missing dst entry for revert. - rows_entry* empty_e = current_allocator().construct(rows_entry::erased_tag{}, src_e); - [&] () noexcept { - src_i = src.erase(src_i); - src_i = src.insert_before(src_i, *empty_e); - dst.insert_before(i, src_e); - }(); - } else { - i->apply_reversibly(s, src_e); - } - ++src_i; - } - return defer([&s, &dst, &src] { revert_intrusive_set(s, dst, src); }); - } catch (...) { - revert_intrusive_set_range(s, dst, src, src.begin(), src_i); - throw; - } -} - -mutation_partition::mutation_partition(const mutation_partition& x) - : _tombstone(x._tombstone) - , _static_row(x._static_row) - , _static_row_continuous(x._static_row_continuous) - , _rows() - , _row_tombstones(x._row_tombstones) { - auto cloner = [] (const auto& x) { - return current_allocator().construct>>(x); - }; - _rows.clone_from(x._rows, cloner, current_deleter()); -} - -mutation_partition::mutation_partition(const mutation_partition& x, const schema& schema, - query::clustering_key_filter_ranges ck_ranges) - : _tombstone(x._tombstone) - , _static_row(x._static_row) - , _static_row_continuous(x._static_row_continuous) - , _rows() - , _row_tombstones(x._row_tombstones, range_tombstone_list::copy_comparator_only()) { - try { - for(auto&& r : ck_ranges) { - for (const rows_entry& e : x.range(schema, r)) { - _rows.insert(_rows.end(), *current_allocator().construct(e), rows_entry::compare(schema)); - } - } - } catch (...) { - _rows.clear_and_dispose(current_deleter()); - throw; - } - - for(auto&& r : ck_ranges) { - for (auto&& rt : x._row_tombstones.slice(schema, r)) { - _row_tombstones.apply(schema, rt); - } - } -} - -mutation_partition::mutation_partition(mutation_partition&& x, const schema& schema, - query::clustering_key_filter_ranges ck_ranges) - : _tombstone(x._tombstone) - , _static_row(std::move(x._static_row)) - , _static_row_continuous(x._static_row_continuous) - , _rows(std::move(x._rows)) - , _row_tombstones(std::move(x._row_tombstones)) -{ - { - auto deleter = current_deleter(); - auto it = _rows.begin(); - for (auto&& range : ck_ranges.ranges()) { - _rows.erase_and_dispose(it, lower_bound(schema, range), deleter); - it = upper_bound(schema, range); - } - _rows.erase_and_dispose(it, _rows.end(), deleter); - } - { - range_tombstone_list::const_iterator it = _row_tombstones.begin(); - for (auto&& range : ck_ranges.ranges()) { - auto rt_range = _row_tombstones.slice(schema, range); - // upper bound for previous range may be after lower bound for the next range - // if both ranges are connected through a range tombstone. In this case the - // erase range would be invalid. - if (rt_range.begin() == _row_tombstones.end() || std::next(rt_range.begin()) != it) { - _row_tombstones.erase(it, rt_range.begin()); - } - it = rt_range.end(); - } - _row_tombstones.erase(it, _row_tombstones.end()); - } -} - -mutation_partition::~mutation_partition() { - _rows.clear_and_dispose(current_deleter()); -} - -mutation_partition& -mutation_partition::operator=(const mutation_partition& x) { - mutation_partition n(x); - std::swap(*this, n); - return *this; -} - -mutation_partition& -mutation_partition::operator=(mutation_partition&& x) noexcept { - if (this != &x) { - this->~mutation_partition(); - new (this) mutation_partition(std::move(x)); - } - return *this; -} - -void mutation_partition::ensure_last_dummy(const schema& s) { - if (_rows.empty() || !_rows.rbegin()->position().is_after_all_clustered_rows(s)) { - _rows.insert_before(_rows.end(), - *current_allocator().construct(s, position_in_partition_view::after_all_clustered_rows(), is_dummy::yes, is_continuous::yes)); - } -} - -void -mutation_partition::apply(const schema& s, const mutation_partition& p, const schema& p_schema) { - if (s.version() != p_schema.version()) { - auto p2 = p; - p2.upgrade(p_schema, s); - apply(s, std::move(p2)); - return; - } - - mutation_partition tmp(p); - apply(s, std::move(tmp)); -} - -void -mutation_partition::apply(const schema& s, mutation_partition&& p, const schema& p_schema) { - if (s.version() != p_schema.version()) { - // We can't upgrade p in-place due to exception guarantees - apply(s, p, p_schema); - return; - } - - apply(s, std::move(p)); -} - -void -mutation_partition::apply(const schema& s, mutation_partition&& p) { - auto revert_row_tombstones = _row_tombstones.apply_reversibly(s, p._row_tombstones); - - _static_row.apply_reversibly(s, column_kind::static_column, p._static_row); - auto revert_static_row = defer([&] { - _static_row.revert(s, column_kind::static_column, p._static_row); - }); - - auto revert_rows = apply_reversibly_intrusive_set(s, _rows, p._rows); - - _tombstone.apply(p._tombstone); // noexcept - - revert_rows.cancel(); - revert_row_tombstones.cancel(); - revert_static_row.cancel(); -} - -void -mutation_partition::apply(const schema& s, mutation_partition_view p, const schema& p_schema) { - if (p_schema.version() == s.version()) { - mutation_partition p2(*this, copy_comparators_only{}); - partition_builder b(s, p2); - p.accept(s, b); - apply(s, std::move(p2)); - } else { - mutation_partition p2(*this, copy_comparators_only{}); - partition_builder b(p_schema, p2); - p.accept(p_schema, b); - p2.upgrade(p_schema, s); - apply(s, std::move(p2)); - } -} - -tombstone -mutation_partition::range_tombstone_for_row(const schema& schema, const clustering_key& key) const { - tombstone t = _tombstone; - if (!_row_tombstones.empty()) { - auto found = _row_tombstones.search_tombstone_covering(schema, key); - t.apply(found); - } - return t; -} - -row_tombstone -mutation_partition::tombstone_for_row(const schema& schema, const clustering_key& key) const { - row_tombstone t = row_tombstone(range_tombstone_for_row(schema, key)); - - auto j = _rows.find(key, rows_entry::compare(schema)); - if (j != _rows.end()) { - t.apply(j->row().deleted_at(), j->row().marker()); - } - - return t; -} - -row_tombstone -mutation_partition::tombstone_for_row(const schema& schema, const rows_entry& e) const { - row_tombstone t = e.row().deleted_at(); - t.apply(range_tombstone_for_row(schema, e.key())); - return t; -} - -void -mutation_partition::apply_row_tombstone(const schema& schema, clustering_key_prefix prefix, tombstone t) { - assert(!prefix.is_full(schema)); - auto start = prefix; - _row_tombstones.apply(schema, {std::move(start), std::move(prefix), std::move(t)}); -} - -void -mutation_partition::apply_row_tombstone(const schema& schema, range_tombstone rt) { - _row_tombstones.apply(schema, std::move(rt)); -} - -void -mutation_partition::apply_delete(const schema& schema, const clustering_key_prefix& prefix, tombstone t) { - if (prefix.is_empty(schema)) { - apply(t); - } else if (prefix.is_full(schema)) { - clustered_row(schema, prefix).apply(t); - } else { - apply_row_tombstone(schema, prefix, t); - } -} - -void -mutation_partition::apply_delete(const schema& schema, range_tombstone rt) { - if (range_tombstone::is_single_clustering_row_tombstone(schema, rt.start, rt.start_kind, rt.end, rt.end_kind)) { - apply_delete(schema, std::move(rt.start), std::move(rt.tomb)); - return; - } - apply_row_tombstone(schema, std::move(rt)); -} - -void -mutation_partition::apply_delete(const schema& schema, clustering_key&& prefix, tombstone t) { - if (prefix.is_empty(schema)) { - apply(t); - } else if (prefix.is_full(schema)) { - clustered_row(schema, std::move(prefix)).apply(t); - } else { - apply_row_tombstone(schema, std::move(prefix), t); - } -} - -void -mutation_partition::apply_delete(const schema& schema, clustering_key_prefix_view prefix, tombstone t) { - if (prefix.is_empty(schema)) { - apply(t); - } else if (prefix.is_full(schema)) { - clustered_row(schema, prefix).apply(t); - } else { - apply_row_tombstone(schema, prefix, t); - } -} - -void -mutation_partition::apply_insert(const schema& s, clustering_key_view key, api::timestamp_type created_at) { - clustered_row(s, key).apply(row_marker(created_at)); -} - -void mutation_partition::insert_row(const schema& s, const clustering_key& key, deletable_row&& row) { - auto e = current_allocator().construct(key, std::move(row)); - _rows.insert(_rows.end(), *e, rows_entry::compare(s)); -} - -void mutation_partition::insert_row(const schema& s, const clustering_key& key, const deletable_row& row) { - auto e = current_allocator().construct(key, row); - _rows.insert(_rows.end(), *e, rows_entry::compare(s)); -} - -const row* -mutation_partition::find_row(const schema& s, const clustering_key& key) const { - auto i = _rows.find(key, rows_entry::compare(s)); - if (i == _rows.end()) { - return nullptr; - } - return &i->row().cells(); -} - -deletable_row& -mutation_partition::clustered_row(const schema& s, clustering_key&& key) { - auto i = _rows.find(key, rows_entry::compare(s)); - if (i == _rows.end()) { - auto e = current_allocator().construct(std::move(key)); - _rows.insert(i, *e, rows_entry::compare(s)); - return e->row(); - } - return i->row(); -} - -deletable_row& -mutation_partition::clustered_row(const schema& s, const clustering_key& key) { - auto i = _rows.find(key, rows_entry::compare(s)); - if (i == _rows.end()) { - auto e = current_allocator().construct(key); - _rows.insert(i, *e, rows_entry::compare(s)); - return e->row(); - } - return i->row(); -} - -deletable_row& -mutation_partition::clustered_row(const schema& s, clustering_key_view key) { - auto i = _rows.find(key, rows_entry::compare(s)); - if (i == _rows.end()) { - auto e = current_allocator().construct(key); - _rows.insert(i, *e, rows_entry::compare(s)); - return e->row(); - } - return i->row(); -} - -deletable_row& -mutation_partition::clustered_row(const schema& s, position_in_partition_view pos, is_dummy dummy, is_continuous continuous) { - auto i = _rows.find(pos, rows_entry::compare(s)); - if (i == _rows.end()) { - auto e = current_allocator().construct(s, pos, dummy, continuous); - _rows.insert(i, *e, rows_entry::compare(s)); - return e->row(); - } - return i->row(); -} - -mutation_partition::rows_type::const_iterator -mutation_partition::lower_bound(const schema& schema, const query::clustering_range& r) const { - auto cmp = rows_entry::key_comparator(clustering_key_prefix::prefix_equality_less_compare(schema)); - return r.lower_bound(_rows, std::move(cmp)); -} - -mutation_partition::rows_type::const_iterator -mutation_partition::upper_bound(const schema& schema, const query::clustering_range& r) const { - auto cmp = rows_entry::key_comparator(clustering_key_prefix::prefix_equality_less_compare(schema)); - return r.upper_bound(_rows, std::move(cmp)); -} - -boost::iterator_range -mutation_partition::range(const schema& schema, const query::clustering_range& r) const { - return boost::make_iterator_range(lower_bound(schema, r), upper_bound(schema, r)); -} - -template -boost::iterator_range -unconst(Container& c, boost::iterator_range r) { - return boost::make_iterator_range( - c.erase(r.begin(), r.begin()), - c.erase(r.end(), r.end()) - ); -} - -template -typename Container::iterator -unconst(Container& c, typename Container::const_iterator i) { - return c.erase(i, i); -} - -boost::iterator_range -mutation_partition::range(const schema& schema, const query::clustering_range& r) { - return unconst(_rows, static_cast(this)->range(schema, r)); -} - -mutation_partition::rows_type::iterator -mutation_partition::lower_bound(const schema& schema, const query::clustering_range& r) { - return unconst(_rows, static_cast(this)->lower_bound(schema, r)); -} - -mutation_partition::rows_type::iterator -mutation_partition::upper_bound(const schema& schema, const query::clustering_range& r) { - return unconst(_rows, static_cast(this)->upper_bound(schema, r)); -} - -template -void mutation_partition::for_each_row(const schema& schema, const query::clustering_range& row_range, bool reversed, Func&& func) const -{ - auto r = range(schema, row_range); - if (!reversed) { - for (const auto& e : r) { - if (func(e) == stop_iteration::yes) { - break; - } - } - } else { - for (const auto& e : r | boost::adaptors::reversed) { - if (func(e) == stop_iteration::yes) { - break; - } - } - } -} - -template -void write_cell(RowWriter& w, const query::partition_slice& slice, ::atomic_cell_view c) { - assert(c.is_live()); - auto wr = w.add().write(); - auto after_timestamp = [&, wr = std::move(wr)] () mutable { - if (slice.options.contains()) { - return std::move(wr).write_timestamp(c.timestamp()); - } else { - return std::move(wr).skip_timestamp(); - } - }(); - auto after_value = [&, wr = std::move(after_timestamp)] () mutable { - if (slice.options.contains() && c.is_live_and_has_ttl()) { - return std::move(wr).write_expiry(c.expiry()); - } else { - return std::move(wr).skip_expiry(); - } - }().write_value(c.value()); - [&, wr = std::move(after_value)] () mutable { - if (slice.options.contains() && c.is_live_and_has_ttl()) { - return std::move(wr).write_ttl(c.ttl()); - } else { - return std::move(wr).skip_ttl(); - } - }().end_qr_cell(); -} - -template -void write_cell(RowWriter& w, const query::partition_slice& slice, const data_type& type, collection_mutation_view v) { - auto ctype = static_pointer_cast(type); - if (slice.options.contains()) { - ctype = map_type_impl::get_instance(ctype->name_comparator(), ctype->value_comparator(), true); - } - w.add().write().skip_timestamp() - .skip_expiry() - .write_value(ctype->to_value(v, slice.cql_format())) - .skip_ttl() - .end_qr_cell(); -} - -template -void write_counter_cell(RowWriter& w, const query::partition_slice& slice, ::atomic_cell_view c) { - assert(c.is_live()); - auto wr = w.add().write(); - [&, wr = std::move(wr)] () mutable { - if (slice.options.contains()) { - return std::move(wr).write_timestamp(c.timestamp()); - } else { - return std::move(wr).skip_timestamp(); - } - }().skip_expiry() - .write_value(counter_cell_view::total_value_type()->decompose(counter_cell_view(c).total_value())) - .skip_ttl() - .end_qr_cell(); -} - -// returns the timestamp of a latest update to the row -static api::timestamp_type hash_row_slice(md5_hasher& hasher, - const schema& s, - column_kind kind, - const row& cells, - const std::vector& columns) -{ - api::timestamp_type max = api::missing_timestamp; - for (auto id : columns) { - const atomic_cell_or_collection* cell = cells.find_cell(id); - if (!cell) { - continue; - } - feed_hash(hasher, id); - auto&& def = s.column_at(kind, id); - if (def.is_atomic()) { - feed_hash(hasher, cell->as_atomic_cell(), def); - max = std::max(max, cell->as_atomic_cell().timestamp()); - } else { - auto&& cm = cell->as_collection_mutation(); - feed_hash(hasher, cm, def); - auto&& ctype = static_pointer_cast(def.type); - max = std::max(max, ctype->last_update(cm)); - } - } - return max; -} - -template -static void get_compacted_row_slice(const schema& s, - const query::partition_slice& slice, - column_kind kind, - const row& cells, - const std::vector& columns, - RowWriter& writer) -{ - for (auto id : columns) { - const atomic_cell_or_collection* cell = cells.find_cell(id); - if (!cell) { - writer.add().skip(); - } else { - auto&& def = s.column_at(kind, id); - if (def.is_atomic()) { - auto c = cell->as_atomic_cell(); - if (!c.is_live()) { - writer.add().skip(); - } else if (def.is_counter()) { - write_counter_cell(writer, slice, cell->as_atomic_cell()); - } else { - write_cell(writer, slice, cell->as_atomic_cell()); - } - } else { - auto&& mut = cell->as_collection_mutation(); - auto&& ctype = static_pointer_cast(def.type); - if (!ctype->is_any_live(mut)) { - writer.add().skip(); - } else { - write_cell(writer, slice, def.type, mut); - } - } - } - } -} - -bool has_any_live_data(const schema& s, column_kind kind, const row& cells, tombstone tomb = tombstone(), - gc_clock::time_point now = gc_clock::time_point::min()) { - bool any_live = false; - cells.for_each_cell_until([&] (column_id id, const atomic_cell_or_collection& cell_or_collection) { - const column_definition& def = s.column_at(kind, id); - if (def.is_atomic()) { - auto&& c = cell_or_collection.as_atomic_cell(); - if (c.is_live(tomb, now, def.is_counter())) { - any_live = true; - return stop_iteration::yes; - } - } else { - auto&& cell = cell_or_collection.as_collection_mutation(); - auto&& ctype = static_pointer_cast(def.type); - if (ctype->is_any_live(cell, tomb, now)) { - any_live = true; - return stop_iteration::yes; - } - } - return stop_iteration::no; - }); - return any_live; -} - -void -mutation_partition::query_compacted(query::result::partition_writer& pw, const schema& s, uint32_t limit) const { - const query::partition_slice& slice = pw.slice(); - - if (limit == 0) { - pw.retract(); - return; - } - - auto static_cells_wr = pw.start().start_static_row().start_cells(); - - if (!slice.static_columns.empty()) { - if (pw.requested_result()) { - get_compacted_row_slice(s, slice, column_kind::static_column, static_row(), slice.static_columns, static_cells_wr); - } - if (pw.requested_digest()) { - auto pt = partition_tombstone(); - ::feed_hash(pw.digest(), pt); - auto t = hash_row_slice(pw.digest(), s, column_kind::static_column, static_row(), slice.static_columns); - pw.last_modified() = std::max({pw.last_modified(), pt.timestamp, t}); - } - } - - auto rows_wr = std::move(static_cells_wr).end_cells() - .end_static_row() - .start_rows(); - - uint32_t row_count = 0; - - auto is_reversed = slice.options.contains(query::partition_slice::option::reversed); - auto send_ck = slice.options.contains(query::partition_slice::option::send_clustering_key); - for_each_row(s, query::clustering_range::make_open_ended_both_sides(), is_reversed, [&] (const rows_entry& e) { - if (e.dummy()) { - return stop_iteration::no; - } - auto& row = e.row(); - auto row_tombstone = tombstone_for_row(s, e); - - if (pw.requested_digest()) { - e.key().feed_hash(pw.digest(), s); - ::feed_hash(pw.digest(), row_tombstone); - auto t = hash_row_slice(pw.digest(), s, column_kind::regular_column, row.cells(), slice.regular_columns); - pw.last_modified() = std::max({pw.last_modified(), row_tombstone.tomb().timestamp, t}); - } - - if (row.is_live(s)) { - if (pw.requested_result()) { - auto cells_wr = [&] { - if (send_ck) { - return rows_wr.add().write_key(e.key()).start_cells().start_cells(); - } else { - return rows_wr.add().skip_key().start_cells().start_cells(); - } - }(); - get_compacted_row_slice(s, slice, column_kind::regular_column, row.cells(), slice.regular_columns, cells_wr); - std::move(cells_wr).end_cells().end_cells().end_qr_clustered_row(); - } - ++row_count; - if (--limit == 0) { - return stop_iteration::yes; - } - } - return stop_iteration::no; - }); - - // If we got no rows, but have live static columns, we should only - // give them back IFF we did not have any CK restrictions. - // #589 - // If ck:s exist, and we do a restriction on them, we either have maching - // rows, or return nothing, since cql does not allow "is null". - if (row_count == 0 - && (has_ck_selector(pw.ranges()) - || !has_any_live_data(s, column_kind::static_column, static_row()))) { - pw.retract(); - } else { - pw.row_count() += row_count ? : 1; - pw.partition_count() += 1; - std::move(rows_wr).end_rows().end_qr_partition(); - } -} - -std::ostream& -operator<<(std::ostream& os, const std::pair& c) { - return fprint(os, "{column: %s %s}", c.first, c.second); -} - -std::ostream& -operator<<(std::ostream& os, const row& r) { - sstring cells; - switch (r._type) { - case row::storage_type::set: - cells = ::join(", ", r.get_range_set()); - break; - case row::storage_type::vector: - cells = ::join(", ", r.get_range_vector()); - break; - } - return fprint(os, "{row: %s}", cells); -} - -std::ostream& -operator<<(std::ostream& os, const row_marker& rm) { - if (rm.is_missing()) { - return fprint(os, "{missing row_marker}"); - } else if (rm._ttl == row_marker::dead) { - return fprint(os, "{dead row_marker %s %s}", rm._timestamp, rm._expiry.time_since_epoch().count()); - } else { - return fprint(os, "{row_marker %s %s %s}", rm._timestamp, rm._ttl.count(), - rm._ttl != row_marker::no_ttl ? rm._expiry.time_since_epoch().count() : 0); - } -} - -std::ostream& -operator<<(std::ostream& os, const deletable_row& dr) { - return fprint(os, "{deletable_row: %s %s %s}", dr._marker, dr._deleted_at, dr._cells); -} - -std::ostream& -operator<<(std::ostream& os, const rows_entry& re) { - return fprint(os, "{rows_entry: cont=%d dummy=%d %s %s}", re.continuous(), re.dummy(), re._key, re._row); -} - -std::ostream& -operator<<(std::ostream& os, const mutation_partition& mp) { - return fprint(os, "{mutation_partition: %s (%s) static cont=%d %s clustered %s}", - mp._tombstone, ::join(", ", mp._row_tombstones), mp._static_row_continuous, mp._static_row, - ::join(", ", mp._rows)); -} - -constexpr gc_clock::duration row_marker::no_ttl; -constexpr gc_clock::duration row_marker::dead; - -int compare_row_marker_for_merge(const row_marker& left, const row_marker& right) noexcept { - if (left.timestamp() != right.timestamp()) { - return left.timestamp() > right.timestamp() ? 1 : -1; - } - if (left.is_live() != right.is_live()) { - return left.is_live() ? -1 : 1; - } - if (left.is_live()) { - if (left.is_expiring() != right.is_expiring()) { - // prefer expiring cells. - return left.is_expiring() ? 1 : -1; - } - if (left.is_expiring() && left.expiry() != right.expiry()) { - return left.expiry() < right.expiry() ? -1 : 1; - } - } else { - // Both are deleted - if (left.deletion_time() != right.deletion_time()) { - // Origin compares big-endian serialized deletion time. That's because it - // delegates to AbstractCell.reconcile() which compares values after - // comparing timestamps, which in case of deleted cells will hold - // serialized expiry. - return (uint32_t) left.deletion_time().time_since_epoch().count() - < (uint32_t) right.deletion_time().time_since_epoch().count() ? -1 : 1; - } - } - return 0; -} - -bool -deletable_row::equal(column_kind kind, const schema& s, const deletable_row& other, const schema& other_schema) const { - if (_deleted_at != other._deleted_at || _marker != other._marker) { - return false; - } - return _cells.equal(kind, s, other._cells, other_schema); -} - -void deletable_row::apply_reversibly(const schema& s, deletable_row& src) { - _cells.apply_reversibly(s, column_kind::regular_column, src._cells); - _marker.apply_reversibly(src._marker); // noexcept - _deleted_at.apply_reversibly(src._deleted_at, _marker); // noexcept -} - -void deletable_row::revert(const schema& s, deletable_row& src) { - _cells.revert(s, column_kind::regular_column, src._cells); - _deleted_at.revert(src._deleted_at); - _marker.revert(src._marker); -} - -void deletable_row::apply(const schema& s, deletable_row&& src) { - _cells.apply(s, column_kind::regular_column, std::move(src._cells)); - _marker.apply(src._marker); - _deleted_at.apply(src._deleted_at, _marker); -} - -bool -rows_entry::equal(const schema& s, const rows_entry& other) const { - return equal(s, other, s); -} - -bool -rows_entry::equal(const schema& s, const rows_entry& other, const schema& other_schema) const { - position_in_partition::equal_compare eq(s); - return eq(position(), other.position()) - && row().equal(column_kind::regular_column, s, other.row(), other_schema); -} - -bool mutation_partition::equal(const schema& s, const mutation_partition& p) const { - return equal(s, p, s); -} - -bool mutation_partition::equal(const schema& this_schema, const mutation_partition& p, const schema& p_schema) const { - if (_tombstone != p._tombstone) { - return false; - } - - if (!boost::equal(non_dummy_rows(), p.non_dummy_rows(), - [&] (const rows_entry& e1, const rows_entry& e2) { - return e1.equal(this_schema, e2, p_schema); - } - )) { - return false; - } - - if (!std::equal(_row_tombstones.begin(), _row_tombstones.end(), - p._row_tombstones.begin(), p._row_tombstones.end(), - [&] (const range_tombstone& rt1, const range_tombstone& rt2) { return rt1.equal(this_schema, rt2); } - )) { - return false; - } - - return _static_row.equal(column_kind::static_column, this_schema, p._static_row, p_schema); -} - -bool mutation_partition::equal_continuity(const schema& s, const mutation_partition& p) const { - return _static_row_continuous == p._static_row_continuous - && boost::equal(_rows, p._rows, [&] (const rows_entry& e1, const rows_entry& e2) { - position_in_partition::equal_compare eq(s); - return eq(e1.position(), e2.position()) - && e1.continuous() == e2.continuous() - && e1.dummy() == e2.dummy(); - }); -} - -void -apply_reversibly(const column_definition& def, atomic_cell_or_collection& dst, atomic_cell_or_collection& src) { - // Must be run via with_linearized_managed_bytes() context, but assume it is - // provided via an upper layer - if (def.is_atomic()) { - auto&& src_ac = src.as_atomic_cell_ref(); - if (def.is_counter()) { - auto did_apply = counter_cell_view::apply_reversibly(dst, src); - src_ac.set_revert(did_apply); - } else { - if (compare_atomic_cell_for_merge(dst.as_atomic_cell(), src.as_atomic_cell()) < 0) { - std::swap(dst, src); - src_ac.set_revert(true); - } else { - src_ac.set_revert(false); - } - } - } else { - auto ct = static_pointer_cast(def.type); - src = ct->merge(dst.as_collection_mutation(), src.as_collection_mutation()); - std::swap(dst, src); - } -} - -void -revert(const column_definition& def, atomic_cell_or_collection& dst, atomic_cell_or_collection& src) noexcept { - static_assert(std::is_nothrow_move_constructible::value - && std::is_nothrow_move_assignable::value, - "for std::swap() to be noexcept"); - if (def.is_atomic()) { - auto&& ac = src.as_atomic_cell_ref(); - if (ac.is_revert_set()) { - ac.set_revert(false); - if (def.is_counter()) { - counter_cell_view::revert_apply(dst, src); - } else { - std::swap(dst, src); - } - } - } else { - std::swap(dst, src); - } -} - -void -row::apply(const column_definition& column, const atomic_cell_or_collection& value) { - atomic_cell_or_collection tmp(value); - apply(column, std::move(tmp)); -} - -void -row::apply(const column_definition& column, atomic_cell_or_collection&& value) { - apply_reversibly(column, value); -} - -template -void row::for_each_cell(Func&& func, Rollback&& rollback) { - static_assert(noexcept(rollback(std::declval(), std::declval())), - "rollback must be noexcept"); - - if (_type == storage_type::vector) { - unsigned i = 0; - try { - for (; i < _storage.vector.v.size(); i++) { - if (_storage.vector.present.test(i)) { - func(i, _storage.vector.v[i]); - } - } - } catch (...) { - while (i) { - --i; - if (_storage.vector.present.test(i)) { - rollback(i, _storage.vector.v[i]); - } - } - throw; - } - } else { - auto i = _storage.set.begin(); - try { - while (i != _storage.set.end()) { - func(i->id(), i->cell()); - ++i; - } - } catch (...) { - while (i != _storage.set.begin()) { - --i; - rollback(i->id(), i->cell()); - } - throw; - } - } -} - -void -row::apply_reversibly(const column_definition& column, atomic_cell_or_collection& value) { - static_assert(std::is_nothrow_move_constructible::value - && std::is_nothrow_move_assignable::value, - "noexcept required for atomicity"); - - // our mutations are not yet immutable - auto id = column.id; - if (_type == storage_type::vector && id < max_vector_size) { - if (id >= _storage.vector.v.size()) { - _storage.vector.v.resize(id); - _storage.vector.v.emplace_back(std::move(value)); - _storage.vector.present.set(id); - _size++; - } else if (!bool(_storage.vector.v[id])) { - _storage.vector.v[id] = std::move(value); - _storage.vector.present.set(id); - _size++; - } else { - ::apply_reversibly(column, _storage.vector.v[id], value); - } - } else { - if (_type == storage_type::vector) { - vector_to_set(); - } - auto i = _storage.set.lower_bound(id, cell_entry::compare()); - if (i == _storage.set.end() || i->id() != id) { - cell_entry* e = current_allocator().construct(id); - std::swap(e->_cell, value); - _storage.set.insert(i, *e); - _size++; - } else { - ::apply_reversibly(column, i->cell(), value); - } - } -} - -void -row::revert(const column_definition& column, atomic_cell_or_collection& src) noexcept { - auto id = column.id; - if (_type == storage_type::vector) { - auto& dst = _storage.vector.v[id]; - if (!src) { - std::swap(dst, src); - _storage.vector.present.reset(id); - --_size; - } else { - ::revert(column, dst, src); - } - } else { - auto i = _storage.set.find(id, cell_entry::compare()); - auto& dst = i->cell(); - if (!src) { - std::swap(dst, src); - _storage.set.erase_and_dispose(i, current_deleter()); - --_size; - } else { - ::revert(column, dst, src); - } - } -} - -void -row::append_cell(column_id id, atomic_cell_or_collection value) { - if (_type == storage_type::vector && id < max_vector_size) { - _storage.vector.v.resize(id); - _storage.vector.v.emplace_back(std::move(value)); - _storage.vector.present.set(id); - } else { - if (_type == storage_type::vector) { - vector_to_set(); - } - auto e = current_allocator().construct(id, std::move(value)); - _storage.set.insert(_storage.set.end(), *e); - } - _size++; -} - -const atomic_cell_or_collection* -row::find_cell(column_id id) const { - if (_type == storage_type::vector) { - if (id >= _storage.vector.v.size() || !_storage.vector.present.test(id)) { - return nullptr; - } - return &_storage.vector.v[id]; - } else { - auto i = _storage.set.find(id, cell_entry::compare()); - if (i == _storage.set.end()) { - return nullptr; - } - return &i->cell(); - } -} - -size_t row::external_memory_usage() const { - size_t mem = 0; - if (_type == storage_type::vector) { - mem += _storage.vector.v.external_memory_usage(); - for (auto&& ac_o_c : _storage.vector.v) { - mem += ac_o_c.external_memory_usage(); - } - } else { - for (auto&& ce : _storage.set) { - mem += sizeof(cell_entry) + ce.cell().external_memory_usage(); - } - } - return mem; -} - -template -void mutation_partition::trim_rows(const schema& s, - const std::vector& row_ranges, - Func&& func) -{ - static_assert(std::is_same>::value, "Bad func signature"); - - bool stop = false; - auto last = reversal_traits::begin(_rows); - auto deleter = current_deleter(); - - auto range_begin = [this, &s] (const query::clustering_range& range) { - return reversed ? upper_bound(s, range) : lower_bound(s, range); - }; - - auto range_end = [this, &s] (const query::clustering_range& range) { - return reversed ? lower_bound(s, range) : upper_bound(s, range); - }; - - for (auto&& row_range : row_ranges) { - if (stop) { - break; - } - - last = reversal_traits::erase_and_dispose(_rows, last, - reversal_traits::maybe_reverse(_rows, range_begin(row_range)), deleter); - - auto end = reversal_traits::maybe_reverse(_rows, range_end(row_range)); - while (last != end) { - rows_entry& e = *last; - if (func(e) == stop_iteration::yes) { - stop = true; - break; - } - - if (e.empty()) { - last = reversal_traits::erase_dispose_and_update_end(_rows, last, deleter, end); - } else { - ++last; - } - } - } - - reversal_traits::erase_and_dispose(_rows, last, reversal_traits::end(_rows), deleter); -} - -uint32_t mutation_partition::do_compact(const schema& s, - gc_clock::time_point query_time, - const std::vector& row_ranges, - bool reverse, - uint32_t row_limit, - can_gc_fn& can_gc) -{ - assert(row_limit > 0); - - auto gc_before = saturating_subtract(query_time, s.gc_grace_seconds()); - - auto should_purge_tombstone = [&] (const tombstone& t) { - return t.deletion_time < gc_before && can_gc(t); - }; - auto should_purge_row_tombstone = [&] (const row_tombstone& t) { - return t.max_deletion_time() < gc_before && can_gc(t.tomb()); - }; - - bool static_row_live = _static_row.compact_and_expire(s, column_kind::static_column, row_tombstone(_tombstone), - query_time, can_gc, gc_before); - - uint32_t row_count = 0; - - auto row_callback = [&] (rows_entry& e) { - if (e.dummy()) { - return stop_iteration::no; - } - deletable_row& row = e.row(); - row_tombstone tomb = tombstone_for_row(s, e); - - bool is_live = row.cells().compact_and_expire(s, column_kind::regular_column, tomb, query_time, can_gc, gc_before); - is_live |= row.marker().compact_and_expire(tomb.tomb(), query_time, can_gc, gc_before); - - if (should_purge_row_tombstone(row.deleted_at())) { - row.remove_tombstone(); - } - - // when row_limit is reached, do not exit immediately, - // iterate to the next live_row instead to include trailing - // tombstones in the mutation. This is how Origin deals with - // https://issues.apache.org/jira/browse/CASSANDRA-8933 - if (is_live) { - if (row_count == row_limit) { - return stop_iteration::yes; - } - ++row_count; - } - - return stop_iteration::no; - }; - - if (reverse) { - trim_rows(s, row_ranges, row_callback); - } else { - trim_rows(s, row_ranges, row_callback); - } - - // #589 - Do not add extra row for statics unless we did a CK range-less query. - // See comment in query - if (row_count == 0 && static_row_live && !has_ck_selector(row_ranges)) { - ++row_count; - } - - _row_tombstones.erase_where([&] (auto&& rt) { - return should_purge_tombstone(rt.tomb) || rt.tomb.timestamp <= _tombstone.timestamp; - }); - if (should_purge_tombstone(_tombstone)) { - _tombstone = tombstone(); - } - - // FIXME: purge unneeded prefix tombstones based on row_ranges - - return row_count; -} - -uint32_t -mutation_partition::compact_for_query( - const schema& s, - gc_clock::time_point query_time, - const std::vector& row_ranges, - bool reverse, - uint32_t row_limit) -{ - return do_compact(s, query_time, row_ranges, reverse, row_limit, always_gc); -} - -void mutation_partition::compact_for_compaction(const schema& s, - can_gc_fn& can_gc, gc_clock::time_point compaction_time) -{ - static const std::vector all_rows = { - query::clustering_range::make_open_ended_both_sides() - }; - - do_compact(s, compaction_time, all_rows, false, query::max_rows, can_gc); -} - -// Returns true if there is no live data or tombstones. -bool mutation_partition::empty() const -{ - if (_tombstone.timestamp != api::missing_timestamp) { - return false; - } - return !_static_row.size() && _rows.empty() && _row_tombstones.empty(); -} - -bool -deletable_row::is_live(const schema& s, tombstone base_tombstone, gc_clock::time_point query_time) const { - // _created_at corresponds to the row marker cell, present for rows - // created with the 'insert' statement. If row marker is live, we know the - // row is live. Otherwise, a row is considered live if it has any cell - // which is live. - base_tombstone.apply(_deleted_at.tomb()); - return _marker.is_live(base_tombstone, query_time) - || has_any_live_data(s, column_kind::regular_column, _cells, base_tombstone, query_time); -} - -bool -mutation_partition::is_static_row_live(const schema& s, gc_clock::time_point query_time) const { - return has_any_live_data(s, column_kind::static_column, static_row(), _tombstone, query_time); -} - -size_t -mutation_partition::live_row_count(const schema& s, gc_clock::time_point query_time) const { - size_t count = 0; - - for (const rows_entry& e : non_dummy_rows()) { - tombstone base_tombstone = range_tombstone_for_row(s, e.key()); - if (e.row().is_live(s, base_tombstone, query_time)) { - ++count; - } - } - - if (count == 0 && is_static_row_live(s, query_time)) { - return 1; - } - - return count; -} - -rows_entry::rows_entry(rows_entry&& o) noexcept - : _link(std::move(o._link)) - , _key(std::move(o._key)) - , _row(std::move(o._row)) - , _flags(std::move(o._flags)) -{ } - -row::row(const row& o) - : _type(o._type) - , _size(o._size) -{ - if (_type == storage_type::vector) { - new (&_storage.vector) vector_storage(o._storage.vector); - } else { - auto cloner = [] (const auto& x) { - return current_allocator().construct>>(x); - }; - new (&_storage.set) map_type; - try { - _storage.set.clone_from(o._storage.set, cloner, current_deleter()); - } catch (...) { - _storage.set.~map_type(); - throw; - } - } -} - -row::~row() { - if (_type == storage_type::vector) { - _storage.vector.~vector_storage(); - } else { - _storage.set.clear_and_dispose(current_deleter()); - _storage.set.~map_type(); - } -} - -row::cell_entry::cell_entry(const cell_entry& o) - : _id(o._id) - , _cell(o._cell) -{ } - -row::cell_entry::cell_entry(cell_entry&& o) noexcept - : _link() - , _id(o._id) - , _cell(std::move(o._cell)) -{ - using container_type = row::map_type; - container_type::node_algorithms::replace_node(o._link.this_ptr(), _link.this_ptr()); - container_type::node_algorithms::init(o._link.this_ptr()); -} - -const atomic_cell_or_collection& row::cell_at(column_id id) const { - auto&& cell = find_cell(id); - if (!cell) { - throw std::out_of_range(sprint("Column not found for id = %d", id)); - } - return *cell; -} - -void row::vector_to_set() -{ - assert(_type == storage_type::vector); - map_type set; - try { - for (auto i : bitsets::for_each_set(_storage.vector.present)) { - auto& c = _storage.vector.v[i]; - auto e = current_allocator().construct(i, std::move(c)); - set.insert(set.end(), *e); - } - } catch (...) { - set.clear_and_dispose([this, del = current_deleter()] (cell_entry* ce) noexcept { - _storage.vector.v[ce->id()] = std::move(ce->cell()); - del(ce); - }); - throw; - } - _storage.vector.~vector_storage(); - new (&_storage.set) map_type(std::move(set)); - _type = storage_type::set; -} - -void row::reserve(column_id last_column) -{ - if (_type == storage_type::vector && last_column >= internal_count) { - if (last_column >= max_vector_size) { - vector_to_set(); - } else { - _storage.vector.v.reserve(last_column); - } - } -} - -template -auto row::with_both_ranges(const row& other, Func&& func) const { - if (_type == storage_type::vector) { - if (other._type == storage_type::vector) { - return func(get_range_vector(), other.get_range_vector()); - } else { - return func(get_range_vector(), other.get_range_set()); - } - } else { - if (other._type == storage_type::vector) { - return func(get_range_set(), other.get_range_vector()); - } else { - return func(get_range_set(), other.get_range_set()); - } - } -} - -bool row::operator==(const row& other) const { - if (size() != other.size()) { - return false; - } - - auto cells_equal = [] (std::pair c1, - std::pair c2) { - return c1.first == c2.first && c1.second == c2.second; - }; - return with_both_ranges(other, [&] (auto r1, auto r2) { - return boost::equal(r1, r2, cells_equal); - }); -} - -bool row::equal(column_kind kind, const schema& this_schema, const row& other, const schema& other_schema) const { - if (size() != other.size()) { - return false; - } - - auto cells_equal = [&] (std::pair c1, - std::pair c2) { - static_assert(schema::row_column_ids_are_ordered_by_name::value, "Relying on column ids being ordered by name"); - return this_schema.column_at(kind, c1.first).name() == other_schema.column_at(kind, c2.first).name() - && c1.second == c2.second; - }; - return with_both_ranges(other, [&] (auto r1, auto r2) { - return boost::equal(r1, r2, cells_equal); - }); -} - -row::row() { - new (&_storage.vector) vector_storage; -} - -row::row(row&& other) noexcept - : _type(other._type), _size(other._size) { - if (_type == storage_type::vector) { - new (&_storage.vector) vector_storage(std::move(other._storage.vector)); - } else { - new (&_storage.set) map_type(std::move(other._storage.set)); - } -} - -row& row::operator=(row&& other) noexcept { - if (this != &other) { - this->~row(); - new (this) row(std::move(other)); - } - return *this; -} - -void row::apply_reversibly(const schema& s, column_kind kind, row& other) { - if (other.empty()) { - return; - } - if (other._type == storage_type::vector) { - reserve(other._storage.vector.v.size() - 1); - } else { - reserve(other._storage.set.rbegin()->id()); - } - other.for_each_cell([&] (column_id id, atomic_cell_or_collection& cell) { - apply_reversibly(s.column_at(kind, id), cell); - }, [&] (column_id id, atomic_cell_or_collection& cell) noexcept { - revert(s.column_at(kind, id), cell); - }); -} - -void row::apply(const schema& s, column_kind kind, const row& other) { - if (other.empty()) { - return; - } - if (other._type == storage_type::vector) { - reserve(other._storage.vector.v.size() - 1); - } else { - reserve(other._storage.set.rbegin()->id()); - } - other.for_each_cell([&] (column_id id, const atomic_cell_or_collection& cell) { - apply(s.column_at(kind, id), cell); - }); -} - -void row::apply(const schema& s, column_kind kind, row&& other) { - if (other.empty()) { - return; - } - if (other._type == storage_type::vector) { - reserve(other._storage.vector.v.size() - 1); - } else { - reserve(other._storage.set.rbegin()->id()); - } - other.for_each_cell([&] (column_id id, atomic_cell_or_collection& cell) { - apply(s.column_at(kind, id), std::move(cell)); - }); -} - -void row::revert(const schema& s, column_kind kind, row& other) noexcept { - other.for_each_cell([&] (column_id id, atomic_cell_or_collection& cell) noexcept { - revert(s.column_at(kind, id), cell); - }); -} - -bool row::compact_and_expire(const schema& s, column_kind kind, row_tombstone tomb, gc_clock::time_point query_time, - can_gc_fn& can_gc, gc_clock::time_point gc_before) -{ - bool any_live = false; - remove_if([&] (column_id id, atomic_cell_or_collection& c) { - bool erase = false; - const column_definition& def = s.column_at(kind, id); - if (def.is_atomic()) { - atomic_cell_view cell = c.as_atomic_cell(); - auto can_erase_cell = [&] { - return cell.deletion_time() < gc_before && can_gc(tombstone(cell.timestamp(), cell.deletion_time())); - }; - - if (cell.is_covered_by(tomb.regular(), def.is_counter())) { - erase = true; - } else if (cell.has_expired(query_time)) { - erase = can_erase_cell(); - if (!erase) { - c = atomic_cell::make_dead(cell.timestamp(), cell.deletion_time()); - } - } else if (!cell.is_live()) { - erase = can_erase_cell(); - } else if (cell.is_covered_by(tomb.shadowable().tomb(), def.is_counter())) { - erase = true; - } else { - any_live = true; - } - } else { - auto&& cell = c.as_collection_mutation(); - auto&& ctype = static_pointer_cast(def.type); - auto m_view = ctype->deserialize_mutation_form(cell); - collection_type_impl::mutation m = m_view.materialize(); - any_live |= m.compact_and_expire(tomb, query_time, can_gc, gc_before); - if (m.cells.empty() && m.tomb <= tomb.tomb()) { - erase = true; - } else { - c = ctype->serialize_mutation_form(m); - } - } - return erase; - }); - return any_live; -} - -deletable_row deletable_row::difference(const schema& s, column_kind kind, const deletable_row& other) const -{ - deletable_row dr; - if (_deleted_at > other._deleted_at) { - dr.apply(_deleted_at); - } - if (compare_row_marker_for_merge(_marker, other._marker) > 0) { - dr.apply(_marker); - } - dr._cells = _cells.difference(s, kind, other._cells); - return dr; -} - -row row::difference(const schema& s, column_kind kind, const row& other) const -{ - row r; - with_both_ranges(other, [&] (auto this_range, auto other_range) { - auto it = other_range.begin(); - for (auto&& c : this_range) { - while (it != other_range.end() && it->first < c.first) { - ++it; - } - auto& cdef = s.column_at(kind, c.first); - if (it == other_range.end() || it->first != c.first) { - r.append_cell(c.first, c.second); - } else if (cdef.is_counter()) { - auto cell = counter_cell_view::difference(c.second.as_atomic_cell(), it->second.as_atomic_cell()); - if (cell) { - r.append_cell(c.first, std::move(*cell)); - } - } else if (s.column_at(kind, c.first).is_atomic()) { - if (compare_atomic_cell_for_merge(c.second.as_atomic_cell(), it->second.as_atomic_cell()) > 0) { - r.append_cell(c.first, c.second); - } - } else { - auto ct = static_pointer_cast(s.column_at(kind, c.first).type); - auto diff = ct->difference(c.second.as_collection_mutation(), it->second.as_collection_mutation()); - if (!ct->is_empty(diff)) { - r.append_cell(c.first, std::move(diff)); - } - } - } - }); - return r; -} - -mutation_partition mutation_partition::difference(schema_ptr s, const mutation_partition& other) const -{ - mutation_partition mp(s); - if (_tombstone > other._tombstone) { - mp.apply(_tombstone); - } - mp._static_row = _static_row.difference(*s, column_kind::static_column, other._static_row); - - mp._row_tombstones = _row_tombstones.difference(*s, other._row_tombstones); - - auto it_r = other._rows.begin(); - rows_entry::compare cmp_r(*s); - for (auto&& r : _rows) { - if (r.dummy()) { - continue; - } - while (it_r != other._rows.end() && (it_r->dummy() || cmp_r(*it_r, r))) { - ++it_r; - } - if (it_r == other._rows.end() || !it_r->key().equal(*s, r.key())) { - mp.insert_row(*s, r.key(), r.row()); - } else { - auto dr = r.row().difference(*s, column_kind::regular_column, it_r->row()); - if (!dr.empty()) { - mp.insert_row(*s, r.key(), std::move(dr)); - } - } - } - return mp; -} - -void mutation_partition::accept(const schema& s, mutation_partition_visitor& v) const { - v.accept_partition_tombstone(_tombstone); - _static_row.for_each_cell([&] (column_id id, const atomic_cell_or_collection& cell) { - const column_definition& def = s.static_column_at(id); - if (def.is_atomic()) { - v.accept_static_cell(id, cell.as_atomic_cell()); - } else { - v.accept_static_cell(id, cell.as_collection_mutation()); - } - }); - for (const range_tombstone& rt : _row_tombstones) { - v.accept_row_tombstone(rt); - } - for (const rows_entry& e : _rows) { - const deletable_row& dr = e.row(); - v.accept_row(e.position(), dr.deleted_at(), dr.marker(), e.dummy(), e.continuous()); - dr.cells().for_each_cell([&] (column_id id, const atomic_cell_or_collection& cell) { - const column_definition& def = s.regular_column_at(id); - if (def.is_atomic()) { - v.accept_row_cell(id, cell.as_atomic_cell()); - } else { - v.accept_row_cell(id, cell.as_collection_mutation()); - } - }); - } -} - -void -mutation_partition::upgrade(const schema& old_schema, const schema& new_schema) { - // We need to copy to provide strong exception guarantees. - mutation_partition tmp(new_schema.shared_from_this()); - converting_mutation_partition_applier v(old_schema.get_column_mapping(), new_schema, tmp); - accept(old_schema, v); - *this = std::move(tmp); -} - -void row_marker::apply_reversibly(row_marker& rm) noexcept { - if (compare_row_marker_for_merge(*this, rm) < 0) { - std::swap(*this, rm); - } else { - rm = *this; - } -} - -void row_marker::revert(row_marker& rm) noexcept { - std::swap(*this, rm); -} - -// Adds mutation to query::result. -class mutation_querier { - const schema& _schema; - query::result_memory_accounter& _memory_accounter; - query::result::partition_writer& _pw; - ser::qr_partition__static_row__cells _static_cells_wr; - bool _live_data_in_static_row{}; - uint32_t _live_clustering_rows = 0; - stdx::optional> _rows_wr; - bool _short_reads_allowed; -private: - void query_static_row(const row& r, tombstone current_tombstone); - void prepare_writers(); -public: - mutation_querier(const schema& s, query::result::partition_writer& pw, - query::result_memory_accounter& memory_accounter); - void consume(tombstone) { } - // Requires that sr.has_any_live_data() - stop_iteration consume(static_row&& sr, tombstone current_tombstone); - // Requires that cr.has_any_live_data() - stop_iteration consume(clustering_row&& cr, row_tombstone current_tombstone); - stop_iteration consume(range_tombstone&&) { return stop_iteration::no; } - uint32_t consume_end_of_stream(); -}; - -mutation_querier::mutation_querier(const schema& s, query::result::partition_writer& pw, - query::result_memory_accounter& memory_accounter) - : _schema(s) - , _memory_accounter(memory_accounter) - , _pw(pw) - , _static_cells_wr(pw.start().start_static_row().start_cells()) - , _short_reads_allowed(pw.slice().options.contains()) -{ -} - -void mutation_querier::query_static_row(const row& r, tombstone current_tombstone) -{ - const query::partition_slice& slice = _pw.slice(); - if (!slice.static_columns.empty()) { - if (_pw.requested_result()) { - auto start = _static_cells_wr._out.size(); - get_compacted_row_slice(_schema, slice, column_kind::static_column, - r, slice.static_columns, _static_cells_wr); - _memory_accounter.update(_static_cells_wr._out.size() - start); - } else if (_short_reads_allowed) { - seastar::measuring_output_stream stream; - ser::qr_partition__static_row__cells out(stream, { }); - get_compacted_row_slice(_schema, slice, column_kind::static_column, - r, slice.static_columns, _static_cells_wr); - _memory_accounter.update(stream.size()); - } - if (_pw.requested_digest()) { - ::feed_hash(_pw.digest(), current_tombstone); - auto t = hash_row_slice(_pw.digest(), _schema, column_kind::static_column, - r, slice.static_columns); - _pw.last_modified() = std::max({_pw.last_modified(), current_tombstone.timestamp, t}); - } - } - _rows_wr.emplace(std::move(_static_cells_wr).end_cells().end_static_row().start_rows()); -} - -stop_iteration mutation_querier::consume(static_row&& sr, tombstone current_tombstone) { - query_static_row(sr.cells(), current_tombstone); - _live_data_in_static_row = true; - return stop_iteration::no; -} - -void mutation_querier::prepare_writers() { - if (!_rows_wr) { - row empty_row; - query_static_row(empty_row, { }); - _live_data_in_static_row = false; - } -} - -stop_iteration mutation_querier::consume(clustering_row&& cr, row_tombstone current_tombstone) { - prepare_writers(); - - const query::partition_slice& slice = _pw.slice(); - - if (_pw.requested_digest()) { - cr.key().feed_hash(_pw.digest(), _schema); - ::feed_hash(_pw.digest(), current_tombstone); - auto t = hash_row_slice(_pw.digest(), _schema, column_kind::regular_column, cr.cells(), slice.regular_columns); - _pw.last_modified() = std::max({_pw.last_modified(), current_tombstone.tomb().timestamp, t}); - } - - auto write_row = [&] (auto& rows_writer) { - auto cells_wr = [&] { - if (slice.options.contains(query::partition_slice::option::send_clustering_key)) { - return rows_writer.add().write_key(cr.key()).start_cells().start_cells(); - } else { - return rows_writer.add().skip_key().start_cells().start_cells(); - } - }(); - get_compacted_row_slice(_schema, slice, column_kind::regular_column, cr.cells(), slice.regular_columns, cells_wr); - std::move(cells_wr).end_cells().end_cells().end_qr_clustered_row(); - }; - - auto stop = stop_iteration::no; - if (_pw.requested_result()) { - auto start = _rows_wr->_out.size(); - write_row(*_rows_wr); - stop = _memory_accounter.update_and_check(_rows_wr->_out.size() - start); - } else if (_short_reads_allowed) { - seastar::measuring_output_stream stream; - ser::qr_partition__rows out(stream, { }); - write_row(out); - stop = _memory_accounter.update_and_check(stream.size()); - } - - _live_clustering_rows++; - return stop && stop_iteration(_short_reads_allowed); -} - -uint32_t mutation_querier::consume_end_of_stream() { - prepare_writers(); - - // If we got no rows, but have live static columns, we should only - // give them back IFF we did not have any CK restrictions. - // #589 - // If ck:s exist, and we do a restriction on them, we either have maching - // rows, or return nothing, since cql does not allow "is null". - if (!_live_clustering_rows - && (has_ck_selector(_pw.ranges()) || !_live_data_in_static_row)) { - _pw.retract(); - return 0; - } else { - auto live_rows = std::max(_live_clustering_rows, uint32_t(1)); - _pw.row_count() += live_rows; - _pw.partition_count() += 1; - std::move(*_rows_wr).end_rows().end_qr_partition(); - return live_rows; - } -} - -class query_result_builder { - const schema& _schema; - query::result::builder& _rb; - stdx::optional _pw; - stdx::optional _mutation_consumer; - stop_iteration _stop; - stop_iteration _short_read_allowed; -public: - query_result_builder(const schema& s, query::result::builder& rb) - : _schema(s), _rb(rb) - , _short_read_allowed(_rb.slice().options.contains()) - { } - - void consume_new_partition(const dht::decorated_key& dk) { - _pw.emplace(_rb.add_partition(_schema, dk.key())); - _mutation_consumer.emplace(mutation_querier(_schema, *_pw, _rb.memory_accounter())); - } - - void consume(tombstone t) { - _mutation_consumer->consume(t); - } - stop_iteration consume(static_row&& sr, tombstone t, bool) { - _stop = _mutation_consumer->consume(std::move(sr), t) && _short_read_allowed; - return _stop; - } - stop_iteration consume(clustering_row&& cr, row_tombstone t, bool) { - _stop = _mutation_consumer->consume(std::move(cr), t) && _short_read_allowed; - return _stop; - } - stop_iteration consume(range_tombstone&& rt) { - _stop = _mutation_consumer->consume(std::move(rt)) && _short_read_allowed; - return _stop; - } - - stop_iteration consume_end_of_partition() { - auto live_rows_in_partition = _mutation_consumer->consume_end_of_stream(); - if (_short_read_allowed && live_rows_in_partition > 0 && !_stop) { - _stop = _rb.memory_accounter().check(); - } - if (_stop) { - _rb.mark_as_short_read(); - } - return _stop; - } - - void consume_end_of_stream() { - } -}; - -future<> data_query( - schema_ptr s, - const mutation_source& source, - const dht::partition_range& range, - const query::partition_slice& slice, - uint32_t row_limit, - uint32_t partition_limit, - gc_clock::time_point query_time, - query::result::builder& builder, - tracing::trace_state_ptr trace_ptr) -{ - if (row_limit == 0 || slice.partition_row_limit() == 0 || partition_limit == 0) { - return make_ready_future<>(); - } - - auto is_reversed = slice.options.contains(query::partition_slice::option::reversed); - - auto qrb = query_result_builder(*s, builder); - auto cfq = make_stable_flattened_mutations_consumer>( - *s, query_time, slice, row_limit, partition_limit, std::move(qrb)); - - auto reader = source(s, range, slice, service::get_local_sstable_query_read_priority(), std::move(trace_ptr)); - return consume_flattened(std::move(reader), std::move(cfq), is_reversed); -} - -class reconcilable_result_builder { - const schema& _schema; - const query::partition_slice& _slice; - - std::vector _result; - uint32_t _live_rows; - - bool _has_ck_selector{}; - bool _static_row_is_alive{}; - uint32_t _total_live_rows = 0; - query::result_memory_accounter _memory_accounter; - stop_iteration _stop; - bool _short_read_allowed; - stdx::optional _mutation_consumer; -public: - reconcilable_result_builder(const schema& s, const query::partition_slice& slice, - query::result_memory_accounter&& accounter) - : _schema(s), _slice(slice) - , _memory_accounter(std::move(accounter)) - , _short_read_allowed(slice.options.contains()) - { } - - void consume_new_partition(const dht::decorated_key& dk) { - _has_ck_selector = has_ck_selector(_slice.row_ranges(_schema, dk.key())); - _static_row_is_alive = false; - _live_rows = 0; - auto is_reversed = _slice.options.contains(query::partition_slice::option::reversed); - _mutation_consumer.emplace(streamed_mutation_freezer(_schema, dk.key(), is_reversed)); - } - - void consume(tombstone t) { - _mutation_consumer->consume(t); - } - stop_iteration consume(static_row&& sr, tombstone, bool is_alive) { - _static_row_is_alive = is_alive; - _memory_accounter.update(sr.memory_usage()); - return _mutation_consumer->consume(std::move(sr)); - } - stop_iteration consume(clustering_row&& cr, row_tombstone, bool is_alive) { - _live_rows += is_alive; - auto stop = _memory_accounter.update_and_check(cr.memory_usage()); - if (is_alive) { - // We are considering finishing current read only after consuming a - // live clustering row. While sending a single live row is enough to - // guarantee progress, not ending the result on a live row would - // mean that the next page fetch will read all tombstones after the - // last live row again. - _stop = stop && stop_iteration(_short_read_allowed); - } - return _mutation_consumer->consume(std::move(cr)) || _stop; - } - stop_iteration consume(range_tombstone&& rt) { - _memory_accounter.update(rt.memory_usage()); - return _mutation_consumer->consume(std::move(rt)); - } - - stop_iteration consume_end_of_partition() { - if (_live_rows == 0 && _static_row_is_alive && !_has_ck_selector) { - ++_live_rows; - // Normally we count only live clustering rows, to guarantee that - // the next page fetch won't ask for the same range. However, - // if we return just a single static row we can stop the result as - // well. Next page fetch will ask for the next partition and if we - // don't do that we could end up with an unbounded number of - // partitions with only a static row. - _stop = _stop || (_memory_accounter.check() && stop_iteration(_short_read_allowed)); - } - _total_live_rows += _live_rows; - _result.emplace_back(partition { _live_rows, _mutation_consumer->consume_end_of_stream() }); - return _stop; - } - - reconcilable_result consume_end_of_stream() { - return reconcilable_result(_total_live_rows, std::move(_result), - query::short_read(bool(_stop)), - std::move(_memory_accounter).done()); - } -}; - -future -static do_mutation_query(schema_ptr s, - mutation_source source, - const dht::partition_range& range, - const query::partition_slice& slice, - uint32_t row_limit, - uint32_t partition_limit, - gc_clock::time_point query_time, - query::result_memory_accounter&& accounter, - tracing::trace_state_ptr trace_ptr) -{ - if (row_limit == 0 || slice.partition_row_limit() == 0 || partition_limit == 0) { - return make_ready_future(reconcilable_result()); - } - - auto is_reversed = slice.options.contains(query::partition_slice::option::reversed); - - auto rrb = reconcilable_result_builder(*s, slice, std::move(accounter)); - auto cfq = make_stable_flattened_mutations_consumer>( - *s, query_time, slice, row_limit, partition_limit, std::move(rrb)); - - auto reader = source(s, range, slice, service::get_local_sstable_query_read_priority(), std::move(trace_ptr)); - return consume_flattened(std::move(reader), std::move(cfq), is_reversed); -} - -static thread_local auto mutation_query_stage = seastar::make_execution_stage("mutation_query", do_mutation_query); - -future -mutation_query(schema_ptr s, - mutation_source source, - const dht::partition_range& range, - const query::partition_slice& slice, - uint32_t row_limit, - uint32_t partition_limit, - gc_clock::time_point query_time, - query::result_memory_accounter&& accounter, - tracing::trace_state_ptr trace_ptr) -{ - return mutation_query_stage(std::move(s), std::move(source), seastar::cref(range), seastar::cref(slice), - row_limit, partition_limit, query_time, std::move(accounter), std::move(trace_ptr)); -} - -deletable_row::deletable_row(clustering_row&& cr) - : _deleted_at(cr.tomb()) - , _marker(std::move(cr.marker())) - , _cells(std::move(cr.cells())) -{ } - -class counter_write_query_result_builder { - const schema& _schema; - mutation_opt _mutation; -public: - counter_write_query_result_builder(const schema& s) : _schema(s) { } - void consume_new_partition(const dht::decorated_key& dk) { - _mutation = mutation(dk, _schema.shared_from_this()); - } - void consume(tombstone) { } - stop_iteration consume(static_row&& sr, tombstone, bool) { - _mutation->partition().static_row() = std::move(sr.cells()); - return stop_iteration::no; - } - stop_iteration consume(clustering_row&& cr, row_tombstone, bool) { - _mutation->partition().insert_row(_schema, cr.key(), deletable_row(std::move(cr))); - return stop_iteration::no; - } - stop_iteration consume(range_tombstone&& rt) { - return stop_iteration::no; - } - stop_iteration consume_end_of_partition() { - return stop_iteration::no; - } - mutation_opt consume_end_of_stream() { - return std::move(_mutation); - } -}; - -mutation_partition::mutation_partition(mutation_partition::incomplete_tag, const schema& s, tombstone t) - : _tombstone(t) - , _static_row_continuous(!s.has_static_columns()) - , _rows() - , _row_tombstones(s) -{ - _rows.insert_before(_rows.end(), - *current_allocator().construct(s, position_in_partition_view::after_all_clustered_rows(), is_dummy::yes, is_continuous::no)); -} - -bool mutation_partition::is_fully_continuous() const { - if (!_static_row_continuous) { - return false; - } - for (auto&& row : _rows) { - if (!row.continuous()) { - return false; - } - } - return true; -} - -void mutation_partition::make_fully_continuous() { - _static_row_continuous = true; - auto i = _rows.begin(); - while (i != _rows.end()) { - if (i->dummy()) { - i = _rows.erase_and_dispose(i, alloc_strategy_deleter()); - } else { - i->set_continuous(true); - ++i; - } - } -} - -future counter_write_query(schema_ptr s, const mutation_source& source, - const dht::decorated_key& dk, - const query::partition_slice& slice, - tracing::trace_state_ptr trace_ptr) -{ - return do_with(dht::partition_range::make_singular(dk), [&] (auto& prange) { - auto cwqrb = counter_write_query_result_builder(*s); - auto cfq = make_stable_flattened_mutations_consumer>( - *s, gc_clock::now(), slice, query::max_rows, query::max_rows, std::move(cwqrb)); - auto reader = source(s, prange, slice, - service::get_local_sstable_query_read_priority(), std::move(trace_ptr)); - return consume_flattened(std::move(reader), std::move(cfq), false); - }); -} diff --git a/scylla/mutation_partition.hh b/scylla/mutation_partition.hh deleted file mode 100644 index daa8b11..0000000 --- a/scylla/mutation_partition.hh +++ /dev/null @@ -1,1063 +0,0 @@ -/* - * Copyright (C) 2014 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include -#include -#include -#include -#include -#include - -#include - -#include "schema.hh" -#include "tombstone.hh" -#include "keys.hh" -#include "position_in_partition.hh" -#include "atomic_cell_or_collection.hh" -#include "query-result.hh" -#include "mutation_partition_view.hh" -#include "mutation_partition_visitor.hh" -#include "utils/managed_vector.hh" -#include "hashing_partition_visitor.hh" -#include "range_tombstone_list.hh" -#include "clustering_key_filter.hh" -#include "intrusive_set_external_comparator.hh" -#include "utils/with_relational_operators.hh" - -// -// Container for cells of a row. Cells are identified by column_id. -// -// All cells must belong to a single column_kind. The kind is not stored -// for space-efficiency reasons. Whenever a method accepts a column_kind, -// the caller must always supply the same column_kind. -// -// Can be used as a range of row::cell_entry. -// -class row { - class cell_entry { - boost::intrusive::set_member_hook<> _link; - column_id _id; - atomic_cell_or_collection _cell; - friend class row; - public: - cell_entry(column_id id, atomic_cell_or_collection cell) - : _id(id) - , _cell(std::move(cell)) - { } - cell_entry(column_id id) - : _id(id) - { } - cell_entry(cell_entry&&) noexcept; - cell_entry(const cell_entry&); - - column_id id() const { return _id; } - const atomic_cell_or_collection& cell() const { return _cell; } - atomic_cell_or_collection& cell() { return _cell; } - - struct compare { - bool operator()(const cell_entry& e1, const cell_entry& e2) const { - return e1._id < e2._id; - } - bool operator()(column_id id1, const cell_entry& e2) const { - return id1 < e2._id; - } - bool operator()(const cell_entry& e1, column_id id2) const { - return e1._id < id2; - } - }; - }; - - using size_type = std::make_unsigned_t; - - enum class storage_type { - vector, - set, - }; - storage_type _type = storage_type::vector; - size_type _size = 0; - - using map_type = boost::intrusive::set, &cell_entry::_link>, - boost::intrusive::compare, boost::intrusive::constant_time_size>; -public: - static constexpr size_t max_vector_size = 32; - static constexpr size_t internal_count = (sizeof(map_type) + sizeof(cell_entry)) / sizeof(atomic_cell_or_collection); -private: - using vector_type = managed_vector; - - struct vector_storage { - std::bitset present; - vector_type v; - }; - - union storage { - storage() { } - ~storage() { } - map_type set; - vector_storage vector; - } _storage; -public: - row(); - ~row(); - row(const row&); - row(row&& other) noexcept; - row& operator=(row&& other) noexcept; - size_t size() const { return _size; } - bool empty() const { return _size == 0; } - - void reserve(column_id); - - const atomic_cell_or_collection& cell_at(column_id id) const; - - // Returns a pointer to cell's value or nullptr if column is not set. - const atomic_cell_or_collection* find_cell(column_id id) const; -private: - template - void remove_if(Func&& func) { - if (_type == storage_type::vector) { - for (unsigned i = 0; i < _storage.vector.v.size(); i++) { - if (!_storage.vector.present.test(i)) { - continue; - } - auto& c = _storage.vector.v[i]; - if (func(i, c)) { - c = atomic_cell_or_collection(); - _storage.vector.present.reset(i); - _size--; - } - } - } else { - for (auto it = _storage.set.begin(); it != _storage.set.end();) { - if (func(it->id(), it->cell())) { - auto& entry = *it; - it = _storage.set.erase(it); - current_allocator().destroy(&entry); - _size--; - } else { - ++it; - } - } - } - } - -private: - auto get_range_vector() const { - auto id_range = boost::irange(0, _storage.vector.v.size()); - return boost::combine(id_range, _storage.vector.v) - | boost::adaptors::filtered([this] (const boost::tuple& t) { - return _storage.vector.present.test(t.get<0>()); - }) | boost::adaptors::transformed([] (const boost::tuple& t) { - return std::pair(t.get<0>(), t.get<1>()); - }); - } - auto get_range_set() const { - auto range = boost::make_iterator_range(_storage.set.begin(), _storage.set.end()); - return range | boost::adaptors::transformed([] (const cell_entry& c) { - return std::pair(c.id(), c.cell()); - }); - } - template - auto with_both_ranges(const row& other, Func&& func) const; - - void vector_to_set(); - - // Calls Func(column_id, atomic_cell_or_collection&) for each cell in this row. - // - // Func() is allowed to modify the cell. Emptying a cell makes it still - // visible to for_each(). - // - // In case of exception, calls Rollback(column_id, atomic_cell_or_collection&) on - // all cells on which Func() was successfully invoked in reverse order. - // - template - void for_each_cell(Func&&, Rollback&&); -public: - // Calls Func(column_id, atomic_cell_or_collection&) for each cell in this row. - // noexcept if Func doesn't throw. - template - void for_each_cell(Func&& func) { - if (_type == storage_type::vector) { - for (auto i : bitsets::for_each_set(_storage.vector.present)) { - func(i, _storage.vector.v[i]); - } - } else { - for (auto& cell : _storage.set) { - func(cell.id(), cell.cell()); - } - } - } - - template - void for_each_cell(Func&& func) const { - for_each_cell_until([func = std::forward(func)] (column_id id, const atomic_cell_or_collection& c) { - func(id, c); - return stop_iteration::no; - }); - } - - template - void for_each_cell_until(Func&& func) const { - if (_type == storage_type::vector) { - for (auto i : bitsets::for_each_set(_storage.vector.present)) { - auto& cell = _storage.vector.v[i]; - if (func(i, cell) == stop_iteration::yes) { - break; - } - } - } else { - for (auto& cell : _storage.set) { - const auto& c = cell.cell(); - if (func(cell.id(), c) == stop_iteration::yes) { - break; - } - } - } - } - - // Merges cell's value into the row. - void apply(const column_definition& column, const atomic_cell_or_collection& cell); - - // - // Merges cell's value into the row. - // - // In case of exception the current object is left with a value equivalent to the original state. - // - // The external cell is left in a valid state, such that it will commute with - // current object to the same value should the exception had not occurred. - // - void apply(const column_definition& column, atomic_cell_or_collection&& cell); - - // Equivalent to calling apply_reversibly() with a row containing only given cell. - // See reversibly_mergeable.hh - void apply_reversibly(const column_definition& column, atomic_cell_or_collection& cell); - // See reversibly_mergeable.hh - void revert(const column_definition& column, atomic_cell_or_collection& cell) noexcept; - - // Adds cell to the row. The column must not be already set. - void append_cell(column_id id, atomic_cell_or_collection cell); - - void apply(const schema&, column_kind, const row& src); - void apply(const schema&, column_kind, row&& src); - - // See reversibly_mergeable.hh - void apply_reversibly(const schema&, column_kind, row& src); - // See reversibly_mergeable.hh - void revert(const schema&, column_kind, row& src) noexcept; - - // Expires cells based on query_time. Expires tombstones based on gc_before - // and max_purgeable. Removes cells covered by tomb. - // Returns true iff there are any live cells left. - bool compact_and_expire(const schema& s, column_kind kind, row_tombstone tomb, gc_clock::time_point query_time, - can_gc_fn&, gc_clock::time_point gc_before); - - row difference(const schema&, column_kind, const row& other) const; - - // Assumes the other row has the same schema - // Consistent with feed_hash() - bool operator==(const row&) const; - - bool equal(column_kind kind, const schema& this_schema, const row& other, const schema& other_schema) const; - - size_t external_memory_usage() const; - - friend std::ostream& operator<<(std::ostream& os, const row& r); -}; - -std::ostream& operator<<(std::ostream& os, const std::pair& c); - -class row_marker; -int compare_row_marker_for_merge(const row_marker& left, const row_marker& right) noexcept; - -class row_marker { - static constexpr gc_clock::duration no_ttl { 0 }; - static constexpr gc_clock::duration dead { -1 }; - api::timestamp_type _timestamp = api::missing_timestamp; - gc_clock::duration _ttl = no_ttl; - gc_clock::time_point _expiry; -public: - row_marker() = default; - explicit row_marker(api::timestamp_type created_at) : _timestamp(created_at) { } - row_marker(api::timestamp_type created_at, gc_clock::duration ttl, gc_clock::time_point expiry) - : _timestamp(created_at), _ttl(ttl), _expiry(expiry) - { } - explicit row_marker(tombstone deleted_at) - : _timestamp(deleted_at.timestamp), _ttl(dead), _expiry(deleted_at.deletion_time) - { } - bool is_missing() const { - return _timestamp == api::missing_timestamp; - } - bool is_live() const { - return !is_missing() && _ttl != dead; - } - bool is_live(tombstone t, gc_clock::time_point now) const { - if (is_missing() || _ttl == dead) { - return false; - } - if (_ttl != no_ttl && _expiry < now) { - return false; - } - return _timestamp > t.timestamp; - } - // Can be called only when !is_missing(). - bool is_dead(gc_clock::time_point now) const { - if (_ttl == dead) { - return true; - } - return _ttl != no_ttl && _expiry < now; - } - // Can be called only when is_live(). - bool is_expiring() const { - return _ttl != no_ttl; - } - // Can be called only when is_expiring(). - gc_clock::duration ttl() const { - return _ttl; - } - // Can be called only when is_expiring(). - gc_clock::time_point expiry() const { - return _expiry; - } - // Can be called only when is_dead(). - gc_clock::time_point deletion_time() const { - return _ttl == dead ? _expiry : _expiry - _ttl; - } - api::timestamp_type timestamp() const { - return _timestamp; - } - void apply(const row_marker& rm) { - if (compare_row_marker_for_merge(*this, rm) < 0) { - *this = rm; - } - } - // See reversibly_mergeable.hh - void apply_reversibly(row_marker& rm) noexcept; - // See reversibly_mergeable.hh - void revert(row_marker& rm) noexcept; - // Expires cells and tombstones. Removes items covered by higher level - // tombstones. - // Returns true if row marker is live. - bool compact_and_expire(tombstone tomb, gc_clock::time_point now, - can_gc_fn& can_gc, gc_clock::time_point gc_before) { - if (is_missing()) { - return false; - } - if (_timestamp <= tomb.timestamp) { - _timestamp = api::missing_timestamp; - return false; - } - if (_ttl > no_ttl && _expiry < now) { - _expiry -= _ttl; - _ttl = dead; - } - if (_ttl == dead && _expiry < gc_before && can_gc(tombstone(_timestamp, _expiry))) { - _timestamp = api::missing_timestamp; - } - return !is_missing() && _ttl != dead; - } - // Consistent with feed_hash() - bool operator==(const row_marker& other) const { - if (_timestamp != other._timestamp) { - return false; - } - if (is_missing()) { - return true; - } - if (_ttl != other._ttl) { - return false; - } - return _ttl == no_ttl || _expiry == other._expiry; - } - bool operator!=(const row_marker& other) const { - return !(*this == other); - } - // Consistent with operator==() - template - void feed_hash(Hasher& h) const { - ::feed_hash(h, _timestamp); - if (!is_missing()) { - ::feed_hash(h, _ttl); - if (_ttl != no_ttl) { - ::feed_hash(h, _expiry); - } - } - } - friend std::ostream& operator<<(std::ostream& os, const row_marker& rm); -}; - -template<> -struct appending_hash { - template - void operator()(Hasher& h, const row_marker& m) const { - m.feed_hash(h); - } -}; - -class clustering_row; - -class shadowable_tombstone : public with_relational_operators { - tombstone _tomb; -public: - - explicit shadowable_tombstone(api::timestamp_type timestamp, gc_clock::time_point deletion_time) - : _tomb(timestamp, deletion_time) { - } - - explicit shadowable_tombstone(tombstone tomb = tombstone()) - : _tomb(std::move(tomb)) { - } - - int compare(const shadowable_tombstone& t) const { - return _tomb.compare(t._tomb); - } - - explicit operator bool() const { - return bool(_tomb); - } - - const tombstone& tomb() const { - return _tomb; - } - - // A shadowable row tombstone is valid only if the row has no live marker. In other words, - // the row tombstone is only valid as long as no newer insert is done (thus setting a - // live row marker; note that if the row timestamp set is lower than the tombstone's, - // then the tombstone remains in effect as usual). If a row has a shadowable tombstone - // with timestamp Ti and that row is updated with a timestamp Tj, such that Tj > Ti - // (and that update sets the row marker), then the shadowable tombstone is shadowed by - // that update. A concrete consequence is that if the update has cells with timestamp - // lower than Ti, then those cells are preserved (since the deletion is removed), and - // this is contrary to a regular, non-shadowable row tombstone where the tombstone is - // preserved and such cells are removed. - bool is_shadowed_by(const row_marker& marker) const { - return marker.is_live() && marker.timestamp() > _tomb.timestamp; - } - - void maybe_shadow(tombstone t, row_marker marker) noexcept { - if (is_shadowed_by(marker)) { - _tomb = std::move(t); - } - } - - void apply(tombstone t) noexcept { - _tomb.apply(t); - } - - void apply(shadowable_tombstone t) noexcept { - _tomb.apply(t._tomb); - } - - friend std::ostream& operator<<(std::ostream& out, const shadowable_tombstone& t) { - if (t) { - return out << "{shadowable tombstone: timestamp=" << t.tomb().timestamp - << ", deletion_time=" << t.tomb().deletion_time.time_since_epoch().count() - << "}"; - } else { - return out << "{shadowable tombstone: none}"; - } - } -}; - -template<> -struct appending_hash { - template - void operator()(Hasher& h, const shadowable_tombstone& t) const { - feed_hash(h, t.tomb()); - } -}; - -/* -The rules for row_tombstones are as follows: - - The shadowable tombstone is always >= than the regular one; - - The regular tombstone works as expected; - - The shadowable tombstone doesn't erase or compact away the regular - row tombstone, nor dead cells; - - The shadowable tombstone can erase live cells, but only provided they - can be recovered (e.g., by including all cells in a MV update, both - updated cells and pre-existing ones); - - The shadowable tombstone can be erased or compacted away by a newer - row marker. -*/ -class row_tombstone : public with_relational_operators { - tombstone _regular; - shadowable_tombstone _shadowable; // _shadowable is always >= _regular -public: - explicit row_tombstone(tombstone regular, shadowable_tombstone shadowable) - : _regular(std::move(regular)) - , _shadowable(std::move(shadowable)) { - } - - explicit row_tombstone(tombstone regular) - : row_tombstone(regular, shadowable_tombstone(regular)) { - } - - row_tombstone() = default; - - int compare(const row_tombstone& t) const { - return _shadowable.compare(t._shadowable); - } - - explicit operator bool() const { - return bool(_shadowable); - } - - const tombstone& tomb() const { - return _shadowable.tomb(); - } - - const gc_clock::time_point max_deletion_time() const { - return std::max(_regular.deletion_time, _shadowable.tomb().deletion_time); - } - - const tombstone& regular() const { - return _regular; - } - - const shadowable_tombstone& shadowable() const { - return _shadowable; - } - - bool is_shadowable() const { - return _shadowable.tomb() > _regular; - } - - void maybe_shadow(const row_marker& marker) noexcept { - _shadowable.maybe_shadow(_regular, marker); - } - - void apply(tombstone regular) noexcept { - _shadowable.apply(regular); - _regular.apply(regular); - } - - void apply(shadowable_tombstone shadowable, row_marker marker) noexcept { - _shadowable.apply(shadowable.tomb()); - _shadowable.maybe_shadow(_regular, marker); - } - - void apply(row_tombstone t, row_marker marker) noexcept { - _regular.apply(t._regular); - _shadowable.apply(t._shadowable); - _shadowable.maybe_shadow(_regular, marker); - } - - // See reversibly_mergeable.hh - void apply_reversibly(row_tombstone& t, row_marker marker) noexcept { - std::swap(*this, t); - apply(t, marker); - } - - // See reversibly_mergeable.hh - void revert(row_tombstone& t) noexcept { - std::swap(*this, t); - } - - friend std::ostream& operator<<(std::ostream& out, const row_tombstone& t) { - if (t) { - return out << "{row_tombstone: " << t._regular << (t.is_shadowable() ? t._shadowable : shadowable_tombstone()) << "}"; - } else { - return out << "{row_tombstone: none}"; - } - } -}; - -template<> -struct appending_hash { - template - void operator()(Hasher& h, const row_tombstone& t) const { - feed_hash(h, t.regular()); - if (t.is_shadowable()) { - feed_hash(h, t.shadowable()); - } - } -}; - -class deletable_row final { - row_tombstone _deleted_at; - row_marker _marker; - row _cells; -public: - deletable_row() {} - explicit deletable_row(clustering_row&&); - deletable_row(row_tombstone tomb, const row_marker& marker, const row& cells) - : _deleted_at(tomb), _marker(marker), _cells(cells) - {} - - void apply(tombstone deleted_at) { - _deleted_at.apply(deleted_at); - } - - void apply(shadowable_tombstone deleted_at) { - _deleted_at.apply(deleted_at, _marker); - } - - void apply(row_tombstone deleted_at) { - _deleted_at.apply(deleted_at, _marker); - } - - void apply(const row_marker& rm) { - _marker.apply(rm); - _deleted_at.maybe_shadow(_marker); - } - - void remove_tombstone() { - _deleted_at = {}; - } - - // See reversibly_mergeable.hh - void apply_reversibly(const schema& s, deletable_row& src); - // See reversibly_mergeable.hh - void revert(const schema& s, deletable_row& src); - - // Weak exception guarantees. After exception, both src and this will commute to the same value as - // they would should the exception not happen. - void apply(const schema& s, deletable_row&& src); -public: - row_tombstone deleted_at() const { return _deleted_at; } - api::timestamp_type created_at() const { return _marker.timestamp(); } - row_marker& marker() { return _marker; } - const row_marker& marker() const { return _marker; } - const row& cells() const { return _cells; } - row& cells() { return _cells; } - friend std::ostream& operator<<(std::ostream& os, const deletable_row& dr); - bool equal(column_kind, const schema& s, const deletable_row& other, const schema& other_schema) const; - bool is_live(const schema& s, tombstone base_tombstone = tombstone(), gc_clock::time_point query_time = gc_clock::time_point::min()) const; - bool empty() const { return !_deleted_at && _marker.is_missing() && !_cells.size(); } - deletable_row difference(const schema&, column_kind, const deletable_row& other) const; -}; - -class rows_entry { - intrusive_set_external_comparator_member_hook _link; - clustering_key _key; - deletable_row _row; - struct flags { - bool _continuous : 1; // See doc of is_continuous. - bool _dummy : 1; - bool _last : 1; - bool _erased : 1; // Used only temporarily during apply_reversibly(). Refs #2012. - flags() : _continuous(true), _dummy(false), _last(false), _erased(false) { } - } _flags{}; - friend class mutation_partition; -public: - struct erased_tag {}; - rows_entry(erased_tag, const rows_entry& e) - : _key(e._key) - { - _flags._erased = true; - _flags._last = e._flags._last; - } - explicit rows_entry(clustering_key&& key) - : _key(std::move(key)) - { } - explicit rows_entry(const clustering_key& key) - : _key(key) - { } - rows_entry(const schema& s, position_in_partition_view pos, is_dummy dummy, is_continuous continuous) - : _key(pos.key()) - { - if (!pos.is_clustering_row()) { - assert(bool(dummy)); - assert(pos.is_after_all_clustered_rows(s)); // FIXME: Support insertion at any position - _flags._last = true; - } - _flags._dummy = bool(dummy); - _flags._continuous = bool(continuous); - } - rows_entry(const clustering_key& key, deletable_row&& row) - : _key(key), _row(std::move(row)) - { } - rows_entry(const clustering_key& key, const deletable_row& row) - : _key(key), _row(row) - { } - rows_entry(const clustering_key& key, row_tombstone tomb, const row_marker& marker, const row& row) - : _key(key), _row(tomb, marker, row) - { } - rows_entry(rows_entry&& o) noexcept; - rows_entry(const rows_entry& e) - : _key(e._key) - , _row(e._row) - , _flags(e._flags) - { } - // Valid only if !dummy() - clustering_key& key() { - return _key; - } - // Valid only if !dummy() - const clustering_key& key() const { - return _key; - } - deletable_row& row() { - return _row; - } - const deletable_row& row() const { - return _row; - } - position_in_partition_view position() const { - if (_flags._last) { - return position_in_partition_view::after_all_clustered_rows(); - } else { - return position_in_partition_view( - position_in_partition_view::clustering_row_tag_t(), _key); - } - } - - is_continuous continuous() const { return is_continuous(_flags._continuous); } - void set_continuous(bool value) { _flags._continuous = value; } - void set_continuous(is_continuous value) { set_continuous(bool(value)); } - is_dummy dummy() const { return is_dummy(_flags._dummy); } - void apply(row_tombstone t) { - _row.apply(t); - } - // See reversibly_mergeable.hh - void apply_reversibly(const schema& s, rows_entry& e) { - _row.apply_reversibly(s, e._row); - } - // See reversibly_mergeable.hh - void revert(const schema& s, rows_entry& e) noexcept { - _row.revert(s, e._row); - } - bool empty() const { - return _row.empty(); - } - bool erased() const { - return _flags._erased; - } - struct tri_compare { - position_in_partition::tri_compare _c; - explicit tri_compare(const schema& s) : _c(s) {} - int operator()(const rows_entry& e1, const rows_entry& e2) const { - return _c(e1.position(), e2.position()); - } - int operator()(const clustering_key& key, const rows_entry& e) const { - return _c(position_in_partition_view::for_key(key), e.position()); - } - int operator()(const rows_entry& e, const clustering_key& key) const { - return _c(e.position(), position_in_partition_view::for_key(key)); - } - int operator()(const rows_entry& e, position_in_partition_view p) const { - return _c(e.position(), p); - } - int operator()(position_in_partition_view p, const rows_entry& e) const { - return _c(p, e.position()); - } - int operator()(position_in_partition_view p1, position_in_partition_view p2) const { - return _c(p1, p2); - } - }; - struct compare { - tri_compare _c; - explicit compare(const schema& s) : _c(s) {} - bool operator()(const rows_entry& e1, const rows_entry& e2) const { - return _c(e1, e2) < 0; - } - bool operator()(const clustering_key& key, const rows_entry& e) const { - return _c(key, e) < 0; - } - bool operator()(const rows_entry& e, const clustering_key& key) const { - return _c(e, key) < 0; - } - bool operator()(const clustering_key_view& key, const rows_entry& e) const { - return _c(key, e) < 0; - } - bool operator()(const rows_entry& e, const clustering_key_view& key) const { - return _c(e, key) < 0; - } - bool operator()(const rows_entry& e, position_in_partition_view p) const { - return _c(e.position(), p) < 0; - } - bool operator()(position_in_partition_view p, const rows_entry& e) const { - return _c(p, e.position()) < 0; - } - }; - template - struct delegating_compare { - Comparator _c; - delegating_compare(Comparator&& c) : _c(std::move(c)) {} - template - bool operator()(const Comparable& v, const rows_entry& e) const { - if (e._flags._last) { - return true; - } - return _c(v, e._key); - } - template - bool operator()(const rows_entry& e, const Comparable& v) const { - if (e._flags._last) { - return false; - } - return _c(e._key, v); - } - }; - template - static auto key_comparator(Comparator&& c) { - return delegating_compare(std::move(c)); - } - friend std::ostream& operator<<(std::ostream& os, const rows_entry& re); - bool equal(const schema& s, const rows_entry& other) const; - bool equal(const schema& s, const rows_entry& other, const schema& other_schema) const; -}; - -// Represents a set of writes made to a single partition. -// -// The object is schema-dependent. Each instance is governed by some -// specific schema version. Accessors require a reference to the schema object -// of that version. -// -// There is an operation of addition defined on mutation_partition objects -// (also called "apply"), which gives as a result an object representing the -// sum of writes contained in the addends. For instances governed by the same -// schema, addition is commutative and associative. -// -// In addition to representing writes, the object supports specifying a set of -// partition elements called "continuity". This set can be used to represent -// lack of information about certain parts of the partition. It can be -// specified which ranges of clustering keys belong to that set. We say that a -// key range is continuous if all keys in that range belong to the continuity -// set, and discontinuous otherwise. By default everything is continuous. -// The static row may be also continuous or not. -// Partition tombstone is always continuous. -// -// Continuity is ignored by instance equality. It's also transient, not -// preserved by serialization. -// -// Continuity is represented internally using flags on row entries. The key -// range between two consecutive entries (both ends exclusive) is continuous -// if and only if rows_entry::continuous() is true for the later entry. The -// range starting after the last entry is assumed to be continuous. The range -// corresponding to the key of the entry is continuous if and only if -// rows_entry::dummy() is false. -// -// Adding two fully-continuous instances gives a fully-continuous instance. -// Continuity doesn't affect how the write part is added. -// -// Addition of continuity is not commutative in general, but is associative. -// Continuity flags on objects representing the same thing (e.g. rows_entry -// with the same key) are merged such that the information stored in the left- -// hand operand wins. Flags on objects which are present only in one of the -// operands are transferred as-is. Such merging rules are useful for layering -// information in MVCC, where newer versions specify continuity with respect -// to the combined set of rows in all prior versions, not just in their -// versions. -class mutation_partition final { -public: - using rows_type = intrusive_set_external_comparator; - friend class rows_entry; - friend class size_calculator; -private: - tombstone _tombstone; - row _static_row; - bool _static_row_continuous = true; - rows_type _rows; - // Contains only strict prefixes so that we don't have to lookup full keys - // in both _row_tombstones and _rows. - range_tombstone_list _row_tombstones; - - friend class mutation_partition_applier; - friend class converting_mutation_partition_applier; -public: - struct copy_comparators_only {}; - struct incomplete_tag {}; - // Constructs an empty instance which is fully discontinuous except for the partition tombstone. - mutation_partition(incomplete_tag, const schema& s, tombstone); - static mutation_partition make_incomplete(const schema& s, tombstone t = {}) { - return mutation_partition(incomplete_tag(), s, t); - } - mutation_partition(schema_ptr s) - : _rows() - , _row_tombstones(*s) - { } - mutation_partition(mutation_partition& other, copy_comparators_only) - : _rows() - , _row_tombstones(other._row_tombstones, range_tombstone_list::copy_comparator_only()) - { } - mutation_partition(mutation_partition&&) = default; - mutation_partition(const mutation_partition&); - mutation_partition(const mutation_partition&, const schema&, query::clustering_key_filter_ranges); - mutation_partition(mutation_partition&&, const schema&, query::clustering_key_filter_ranges); - ~mutation_partition(); - mutation_partition& operator=(const mutation_partition& x); - mutation_partition& operator=(mutation_partition&& x) noexcept; - bool equal(const schema&, const mutation_partition&) const; - bool equal(const schema& this_schema, const mutation_partition& p, const schema& p_schema) const; - bool equal_continuity(const schema&, const mutation_partition&) const; - // Consistent with equal() - template - void feed_hash(Hasher& h, const schema& s) const { - hashing_partition_visitor v(h, s); - accept(s, v); - } - friend std::ostream& operator<<(std::ostream& os, const mutation_partition& mp); -public: - // Makes sure there is a dummy entry after all clustered rows. Doesn't affect continuity. - // Doesn't invalidate iterators. - void ensure_last_dummy(const schema&); - bool static_row_continuous() const { return _static_row_continuous; } - void set_static_row_continuous(bool value) { _static_row_continuous = value; } - bool is_fully_continuous() const; - void make_fully_continuous(); - void apply(tombstone t) { _tombstone.apply(t); } - void apply_delete(const schema& schema, const clustering_key_prefix& prefix, tombstone t); - void apply_delete(const schema& schema, range_tombstone rt); - void apply_delete(const schema& schema, clustering_key_prefix&& prefix, tombstone t); - void apply_delete(const schema& schema, clustering_key_prefix_view prefix, tombstone t); - // Equivalent to applying a mutation with an empty row, created with given timestamp - void apply_insert(const schema& s, clustering_key_view, api::timestamp_type created_at); - // prefix must not be full - void apply_row_tombstone(const schema& schema, clustering_key_prefix prefix, tombstone t); - void apply_row_tombstone(const schema& schema, range_tombstone rt); - // - // Applies p to current object. - // - // Commutative when this_schema == p_schema. If schemas differ, data in p which - // is not representable in this_schema is dropped, thus apply() loses commutativity. - // - // Strong exception guarantees. - void apply(const schema& this_schema, const mutation_partition& p, const schema& p_schema); - // - // Applies p to current object. - // - // Commutative when this_schema == p_schema. If schemas differ, data in p which - // is not representable in this_schema is dropped, thus apply() loses commutativity. - // - // If exception is thrown, this object will be left in a state equivalent to the entry state - // and p will be left in a state which will commute with current object to the same value - // should the exception had not occurred. - void apply(const schema& this_schema, mutation_partition&& p, const schema& p_schema); - // Use in case this instance and p share the same schema. - // Same guarantees as apply(const schema&, mutation_partition&&, const schema&); - void apply(const schema& s, mutation_partition&& p); - // Same guarantees and constraints as for apply(const schema&, const mutation_partition&, const schema&). - void apply(const schema& this_schema, mutation_partition_view p, const schema& p_schema); - - // Converts partition to the new schema. When succeeds the partition should only be accessed - // using the new schema. - // - // Strong exception guarantees. - void upgrade(const schema& old_schema, const schema& new_schema); -private: - void insert_row(const schema& s, const clustering_key& key, deletable_row&& row); - void insert_row(const schema& s, const clustering_key& key, const deletable_row& row); - - uint32_t do_compact(const schema& s, - gc_clock::time_point now, - const std::vector& row_ranges, - bool reverse, - uint32_t row_limit, - can_gc_fn&); - - // Calls func for each row entry inside row_ranges until func returns stop_iteration::yes. - // Removes all entries for which func didn't return stop_iteration::no or wasn't called at all. - // Removes all entries that are empty, check rows_entry::empty(). - // If reversed is true, func will be called on entries in reverse order. In that case row_ranges - // must be already in reverse order. - template - void trim_rows(const schema& s, - const std::vector& row_ranges, - Func&& func); -public: - // Performs the following: - // - throws out data which doesn't belong to row_ranges - // - expires cells and tombstones based on query_time - // - drops cells covered by higher-level tombstones (compaction) - // - leaves at most row_limit live rows - // - // Note: a partition with a static row which has any cell live but no - // clustered rows still counts as one row, according to the CQL row - // counting rules. - // - // Returns the count of CQL rows which remained. If the returned number is - // smaller than the row_limit it means that there was no more data - // satisfying the query left. - // - // The row_limit parameter must be > 0. - // - uint32_t compact_for_query(const schema& s, gc_clock::time_point query_time, - const std::vector& row_ranges, bool reversed, uint32_t row_limit); - - // Performs the following: - // - expires cells based on compaction_time - // - drops cells covered by higher-level tombstones - // - drops expired tombstones which timestamp is before max_purgeable - void compact_for_compaction(const schema& s, can_gc_fn&, - gc_clock::time_point compaction_time); - - // Returns the minimal mutation_partition that when applied to "other" will - // create a mutation_partition equal to the sum of other and this one. - // This and other must both be governed by the same schema s. - mutation_partition difference(schema_ptr s, const mutation_partition& other) const; - - // Returns true if there is no live data or tombstones. - bool empty() const; -public: - deletable_row& clustered_row(const schema& s, const clustering_key& key); - deletable_row& clustered_row(const schema& s, clustering_key&& key); - deletable_row& clustered_row(const schema& s, clustering_key_view key); - deletable_row& clustered_row(const schema& s, position_in_partition_view pos, is_dummy, is_continuous); -public: - tombstone partition_tombstone() const { return _tombstone; } - row& static_row() { return _static_row; } - const row& static_row() const { return _static_row; } - // return a set of rows_entry where each entry represents a CQL row sharing the same clustering key. - const rows_type& clustered_rows() const { return _rows; } - const range_tombstone_list& row_tombstones() const { return _row_tombstones; } - rows_type& clustered_rows() { return _rows; } - range_tombstone_list& row_tombstones() { return _row_tombstones; } - const row* find_row(const schema& s, const clustering_key& key) const; - tombstone range_tombstone_for_row(const schema& schema, const clustering_key& key) const; - row_tombstone tombstone_for_row(const schema& schema, const clustering_key& key) const; - // Can be called only for non-dummy entries - row_tombstone tombstone_for_row(const schema& schema, const rows_entry& e) const; - boost::iterator_range range(const schema& schema, const query::clustering_range& r) const; - rows_type::const_iterator lower_bound(const schema& schema, const query::clustering_range& r) const; - rows_type::const_iterator upper_bound(const schema& schema, const query::clustering_range& r) const; - rows_type::iterator lower_bound(const schema& schema, const query::clustering_range& r); - rows_type::iterator upper_bound(const schema& schema, const query::clustering_range& r); - boost::iterator_range range(const schema& schema, const query::clustering_range& r); - // Returns an iterator range of rows_entry, with only non-dummy entries. - auto non_dummy_rows() const { - return boost::make_iterator_range(_rows.begin(), _rows.end()) - | boost::adaptors::filtered([] (const rows_entry& e) { return bool(!e.dummy()); }); - } - // Writes this partition using supplied query result writer. - // The partition should be first compacted with compact_for_query(), otherwise - // results may include data which is deleted/expired. - // At most row_limit CQL rows will be written and digested. - void query_compacted(query::result::partition_writer& pw, const schema& s, uint32_t row_limit) const; - void accept(const schema&, mutation_partition_visitor&) const; - - // Returns the number of live CQL rows in this partition. - // - // Note: If no regular rows are live, but there's something live in the - // static row, the static row counts as one row. If there is at least one - // regular row live, static row doesn't count. - // - size_t live_row_count(const schema&, - gc_clock::time_point query_time = gc_clock::time_point::min()) const; - - bool is_static_row_live(const schema&, - gc_clock::time_point query_time = gc_clock::time_point::min()) const; -private: - template - void for_each_row(const schema& schema, const query::clustering_range& row_range, bool reversed, Func&& func) const; - friend class counter_write_query_result_builder; -}; diff --git a/scylla/mutation_partition_applier.hh b/scylla/mutation_partition_applier.hh deleted file mode 100644 index 2414150..0000000 --- a/scylla/mutation_partition_applier.hh +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "mutation_partition_view.hh" -#include "schema.hh" - -// Mutation partition visitor which applies visited data into -// existing mutation_partition. -class mutation_partition_applier : public mutation_partition_visitor { - const schema& _schema; - mutation_partition& _p; - deletable_row* _current_row; -public: - mutation_partition_applier(const schema& s, mutation_partition& target) - : _schema(s), _p(target) { } - - virtual void accept_partition_tombstone(tombstone t) override { - _p.apply(t); - } - - virtual void accept_static_cell(column_id id, atomic_cell_view cell) override { - _p._static_row.apply(_schema.column_at(column_kind::static_column, id), atomic_cell_or_collection(cell)); - } - - virtual void accept_static_cell(column_id id, collection_mutation_view collection) override { - _p._static_row.apply(_schema.column_at(column_kind::static_column, id), atomic_cell_or_collection(collection)); - } - - virtual void accept_row_tombstone(const range_tombstone& rt) override { - _p.apply_row_tombstone(_schema, rt); - } - - virtual void accept_row(position_in_partition_view key, const row_tombstone& deleted_at, const row_marker& rm, is_dummy dummy, is_continuous continuous) override { - deletable_row& r = _p.clustered_row(_schema, key, dummy, continuous); - r.apply(rm); - r.apply(deleted_at); - _current_row = &r; - } - - virtual void accept_row_cell(column_id id, atomic_cell_view cell) override { - _current_row->cells().apply(_schema.column_at(column_kind::regular_column, id), atomic_cell_or_collection(cell)); - } - - virtual void accept_row_cell(column_id id, collection_mutation_view collection) override { - _current_row->cells().apply(_schema.column_at(column_kind::regular_column, id), atomic_cell_or_collection(collection)); - } -}; diff --git a/scylla/mutation_partition_serializer.cc b/scylla/mutation_partition_serializer.cc deleted file mode 100644 index 7788e0f..0000000 --- a/scylla/mutation_partition_serializer.cc +++ /dev/null @@ -1,243 +0,0 @@ - -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "mutation_partition_serializer.hh" -#include "mutation_partition.hh" - -#include "counters.hh" -#include "utils/UUID.hh" -#include "serializer.hh" -#include "idl/uuid.dist.hh" -#include "idl/keys.dist.hh" -#include "idl/mutation.dist.hh" -#include "serializer_impl.hh" -#include "serialization_visitors.hh" -#include "idl/uuid.dist.impl.hh" -#include "idl/keys.dist.impl.hh" -#include "idl/mutation.dist.impl.hh" -#include "service/storage_service.hh" - -using namespace db; - -namespace { - -template -auto write_live_cell(Writer&& writer, atomic_cell_view c) -{ - return std::move(writer).write_created_at(c.timestamp()) - .write_value(c.value()) - .end_live_cell(); -} - -template -auto write_counter_cell(Writer&& writer, atomic_cell_view c) -{ - auto value = std::move(writer).write_created_at(c.timestamp()); - return [&c, value = std::move(value)] () mutable { - if (c.is_counter_update()) { - auto delta = c.counter_update_value(); - return std::move(value).start_value_counter_cell_update() - .write_delta(delta) - .end_counter_cell_update(); - } else { - counter_cell_view ccv(c); - auto shards = std::move(value).start_value_counter_cell_full() - .start_shards(); - for (auto csv : ccv.shards()) { - shards.add_shards(counter_shard(csv)); - } - return std::move(shards).end_shards().end_counter_cell_full(); - } - }().end_counter_cell(); -} - -template -auto write_expiring_cell(Writer&& writer, atomic_cell_view c) -{ - return std::move(writer).write_ttl(c.ttl()) - .write_expiry(c.expiry()) - .start_c() - .write_created_at(c.timestamp()) - .write_value(c.value()) - .end_c() - .end_expiring_cell(); -} - -template -auto write_dead_cell(Writer&& writer, atomic_cell_view c) -{ - return std::move(writer).start_tomb() - .write_timestamp(c.timestamp()) - .write_deletion_time(c.deletion_time()) - .end_tomb() - .end_dead_cell(); -} - -template -auto write_collection_cell(Writer&& collection_writer, collection_mutation_view cmv, const column_definition& def) -{ - auto&& ctype = static_pointer_cast(def.type); - auto m_view = ctype->deserialize_mutation_form(cmv); - auto cells_writer = std::move(collection_writer).write_tomb(m_view.tomb).start_elements(); - for (auto&& c : m_view.cells) { - auto cell_writer = cells_writer.add().write_key(c.first); - if (!c.second.is_live()) { - write_dead_cell(std::move(cell_writer).start_value_dead_cell(), c.second).end_collection_element(); - } else if (c.second.is_live_and_has_ttl()) { - write_expiring_cell(std::move(cell_writer).start_value_expiring_cell(), c.second).end_collection_element(); - } else { - write_live_cell(std::move(cell_writer).start_value_live_cell(), c.second).end_collection_element(); - } - } - return std::move(cells_writer).end_elements().end_collection_cell(); -} - -template -auto write_row_cells(Writer&& writer, const row& r, const schema& s, column_kind kind) -{ - auto column_writer = std::move(writer).start_columns(); - r.for_each_cell([&] (column_id id, const atomic_cell_or_collection& cell) { - auto& def = s.column_at(kind, id); - auto cell_or_collection_writer = column_writer.add().write_id(id); - if (def.is_atomic()) { - auto&& c = cell.as_atomic_cell(); - auto cell_writer = std::move(cell_or_collection_writer).start_c_variant(); - if (!c.is_live()) { - write_dead_cell(std::move(cell_writer).start_variant_dead_cell(), c).end_variant().end_column(); - } else if (def.is_counter()) { - write_counter_cell(std::move(cell_writer).start_variant_counter_cell(), c).end_variant().end_column(); - } else if (c.is_live_and_has_ttl()) { - write_expiring_cell(std::move(cell_writer).start_variant_expiring_cell(), c).end_variant().end_column(); - } else { - write_live_cell(std::move(cell_writer).start_variant_live_cell(), c).end_variant().end_column(); - } - } else { - write_collection_cell(std::move(cell_or_collection_writer).start_c_collection_cell(), cell.as_collection_mutation(), def).end_column(); - } - }); - return std::move(column_writer).end_columns(); -} - -template -auto write_row_marker(Writer&& writer, const row_marker& marker) -{ - if (marker.is_missing()) { - return std::move(writer).start_marker_no_marker().end_no_marker(); - } else if (!marker.is_live()) { - return std::move(writer).start_marker_dead_marker() - .start_tomb() - .write_timestamp(marker.timestamp()) - .write_deletion_time(marker.deletion_time()) - .end_tomb() - .end_dead_marker(); - } else if (marker.is_expiring()) { - return std::move(writer).start_marker_expiring_marker() - .start_lm() - .write_created_at(marker.timestamp()) - .end_lm() - .write_ttl(marker.ttl()) - .write_expiry(marker.expiry()) - .end_expiring_marker(); - } else { - return std::move(writer).start_marker_live_marker() - .write_created_at(marker.timestamp()) - .end_live_marker(); - } -} - -} - -template -static void write_tombstones(const schema& s, RowTombstones& row_tombstones, const range_tombstone_list& rt_list) -{ - for (auto&& rt : rt_list) { - row_tombstones.add().write_start(rt.start).write_tomb(rt.tomb).write_start_kind(rt.start_kind) - .write_end(rt.end).write_end_kind(rt.end_kind).end_range_tombstone(); - } -} - -template -static auto write_tombstone(Writer&& writer, const tombstone& t) { - return std::move(writer).write_timestamp(t.timestamp).write_deletion_time(t.deletion_time); -} - -template -static auto write_row(Writer&& writer, const schema& s, const clustering_key_prefix& key, const row& cells, const row_marker& m, const row_tombstone& t) { - auto marker_writer = std::move(writer).write_key(key); - auto deleted_at_writer = write_row_marker(std::move(marker_writer), m).start_deleted_at(); - auto row_writer = write_tombstone(std::move(deleted_at_writer), t.regular()).end_deleted_at().start_cells(); - auto shadowable_deleted_at_writer = write_row_cells(std::move(row_writer), cells, s, column_kind::regular_column).end_cells().start_shadowable_deleted_at(); - return write_tombstone(std::move(shadowable_deleted_at_writer), t.shadowable().tomb()).end_shadowable_deleted_at().end_deletable_row(); -} - -template -void mutation_partition_serializer::write_serialized(Writer&& writer, const schema& s, const mutation_partition& mp) -{ - auto srow_writer = std::move(writer).write_tomb(mp.partition_tombstone()).start_static_row(); - auto row_tombstones = write_row_cells(std::move(srow_writer), mp.static_row(), s, column_kind::static_column).end_static_row().start_range_tombstones(); - write_tombstones(s, row_tombstones, mp.row_tombstones()); - auto clustering_rows = std::move(row_tombstones).end_range_tombstones().start_rows(); - for (auto&& cr : mp.non_dummy_rows()) { - write_row(clustering_rows.add(), s, cr.key(), cr.row().cells(), cr.row().marker(), cr.row().deleted_at()); - } - std::move(clustering_rows).end_rows().end_mutation_partition(); -} - -mutation_partition_serializer::mutation_partition_serializer(const schema& schema, const mutation_partition& p) - : _schema(schema), _p(p) -{ } - -void -mutation_partition_serializer::write(bytes_ostream& out) const { - write(ser::writer_of_mutation_partition(out)); -} - -void mutation_partition_serializer::write(ser::writer_of_mutation_partition&& wr) const -{ - write_serialized(std::move(wr), _schema, _p); -} - -void serialize_mutation_fragments(const schema& s, tombstone partition_tombstone, - stdx::optional sr, range_tombstone_list rts, - std::deque crs, ser::writer_of_mutation_partition&& wr) -{ - auto srow_writer = std::move(wr).write_tomb(partition_tombstone).start_static_row(); - auto row_tombstones = [&] { - if (sr) { - return write_row_cells(std::move(srow_writer), sr->cells(), s, column_kind::static_column).end_static_row().start_range_tombstones(); - } else { - return std::move(srow_writer).start_columns().end_columns().end_static_row().start_range_tombstones(); - } - }(); - sr = { }; - - write_tombstones(s, row_tombstones, rts); - rts.clear(); - - auto clustering_rows = std::move(row_tombstones).end_range_tombstones().start_rows(); - while (!crs.empty()) { - auto& cr = crs.front(); - write_row(clustering_rows.add(), s, cr.key(), cr.cells(), cr.marker(), cr.tomb()); - crs.pop_front(); - } - std::move(clustering_rows).end_rows().end_mutation_partition(); -} diff --git a/scylla/mutation_partition_serializer.hh b/scylla/mutation_partition_serializer.hh deleted file mode 100644 index f1dae36..0000000 --- a/scylla/mutation_partition_serializer.hh +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "utils/data_input.hh" -#include "utils/data_output.hh" -#include "database_fwd.hh" -#include "mutation_partition_view.hh" -#include "bytes_ostream.hh" -#include "streamed_mutation.hh" - -namespace ser { -template -class writer_of_mutation_partition; -} - -class mutation_partition_serializer { - static size_t size(const schema&, const mutation_partition&); -public: - using size_type = uint32_t; -private: - const schema& _schema; - const mutation_partition& _p; -private: - template - static void write_serialized(Writer&& out, const schema&, const mutation_partition&); -public: - using count_type = uint32_t; - mutation_partition_serializer(const schema&, const mutation_partition&); -public: - void write(bytes_ostream&) const; - void write(ser::writer_of_mutation_partition&&) const; -}; - -void serialize_mutation_fragments(const schema& s, tombstone partition_tombstone, - stdx::optional sr, range_tombstone_list range_tombstones, - std::deque clustering_rows, ser::writer_of_mutation_partition&&); diff --git a/scylla/mutation_partition_view.cc b/scylla/mutation_partition_view.cc deleted file mode 100644 index d2d555c..0000000 --- a/scylla/mutation_partition_view.cc +++ /dev/null @@ -1,232 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include - -#include "mutation_partition_view.hh" -#include "schema.hh" -#include "atomic_cell.hh" -#include "utils/data_input.hh" -#include "mutation_partition_serializer.hh" -#include "mutation_partition.hh" -#include "counters.hh" - -#include "utils/UUID.hh" -#include "serializer.hh" -#include "idl/uuid.dist.hh" -#include "idl/keys.dist.hh" -#include "idl/mutation.dist.hh" -#include "serializer_impl.hh" -#include "serialization_visitors.hh" -#include "idl/uuid.dist.impl.hh" -#include "idl/keys.dist.impl.hh" -#include "idl/mutation.dist.impl.hh" - -using namespace db; - -namespace { - -using atomic_cell_variant = boost::variant; - -atomic_cell read_atomic_cell(atomic_cell_variant cv) -{ - struct atomic_cell_visitor : boost::static_visitor { - atomic_cell operator()(ser::live_cell_view& lcv) const { - return atomic_cell::make_live(lcv.created_at(), lcv.value()); - } - atomic_cell operator()(ser::expiring_cell_view& ecv) const { - return atomic_cell::make_live(ecv.c().created_at(), ecv.c().value(), ecv.expiry(), ecv.ttl()); - } - atomic_cell operator()(ser::dead_cell_view& dcv) const { - return atomic_cell::make_dead(dcv.tomb().timestamp(), dcv.tomb().deletion_time()); - } - atomic_cell operator()(ser::counter_cell_view& ccv) const { - class counter_cell_visitor : public boost::static_visitor { - api::timestamp_type _created_at; - public: - explicit counter_cell_visitor(api::timestamp_type ts) - : _created_at(ts) { } - - atomic_cell operator()(ser::counter_cell_full_view& ccv) const { - // TODO: a lot of copying for something called view - counter_cell_builder ccb; // we know the final number of shards - for (auto csv : ccv.shards()) { - ccb.add_shard(counter_shard(csv)); - } - return ccb.build(_created_at); - } - atomic_cell operator()(ser::counter_cell_update_view& ccv) const { - return atomic_cell::make_live_counter_update(_created_at, ccv.delta()); - } - atomic_cell operator()(ser::unknown_variant_type&) const { - throw std::runtime_error("Trying to deserialize counter cell in unknown state"); - } - }; - auto v = ccv.value(); - return boost::apply_visitor(counter_cell_visitor(ccv.created_at()), v); - } - atomic_cell operator()(ser::unknown_variant_type&) const { - throw std::runtime_error("Trying to deserialize cell in unknown state"); - } - }; - return boost::apply_visitor(atomic_cell_visitor(), cv); -} - -collection_mutation read_collection_cell(ser::collection_cell_view cv) -{ - collection_type_impl::mutation mut; - mut.tomb = cv.tomb(); - auto&& elements = cv.elements(); - mut.cells.reserve(elements.size()); - for (auto&& e : elements) { - mut.cells.emplace_back(e.key(), read_atomic_cell(e.value())); - } - return collection_type_impl::serialize_mutation_form(mut); -} - -template -void read_and_visit_row(ser::row_view rv, const column_mapping& cm, column_kind kind, Visitor&& visitor) -{ - for (auto&& cv : rv.columns()) { - auto id = cv.id(); - auto& col = cm.column_at(kind, id); - - class atomic_cell_or_collection_visitor : public boost::static_visitor<> { - Visitor& _visitor; - column_id _id; - const column_mapping_entry& _col; - public: - explicit atomic_cell_or_collection_visitor(Visitor& v, column_id id, const column_mapping_entry& col) - : _visitor(v), _id(id), _col(col) { } - - void operator()(atomic_cell_variant& acv) const { - if (!_col.type()->is_atomic()) { - throw std::runtime_error("A collection expected, got an atomic cell"); - } - // FIXME: Pass view to cell to avoid copy - auto&& outer = current_allocator(); - with_allocator(standard_allocator(), [&] { - auto cell = read_atomic_cell(acv); - with_allocator(outer, [&] { - _visitor.accept_atomic_cell(_id, cell); - }); - }); - } - void operator()(ser::collection_cell_view& ccv) const { - if (_col.type()->is_atomic()) { - throw std::runtime_error("An atomic cell expected, got a collection"); - } - // FIXME: Pass view to cell to avoid copy - auto&& outer = current_allocator(); - with_allocator(standard_allocator(), [&] { - auto cell = read_collection_cell(ccv); - with_allocator(outer, [&] { - _visitor.accept_collection(_id, cell); - }); - }); - } - void operator()(ser::unknown_variant_type&) const { - throw std::runtime_error("Trying to deserialize unknown cell type"); - } - }; - auto&& cell = cv.c(); - boost::apply_visitor(atomic_cell_or_collection_visitor(visitor, id, col), cell); - } -} - -row_marker read_row_marker(boost::variant rmv) -{ - struct row_marker_visitor : boost::static_visitor { - row_marker operator()(ser::live_marker_view& lmv) const { - return row_marker(lmv.created_at()); - } - row_marker operator()(ser::expiring_marker_view& emv) const { - return row_marker(emv.lm().created_at(), emv.ttl(), emv.expiry()); - } - row_marker operator()(ser::dead_marker_view& dmv) const { - return row_marker(dmv.tomb()); - } - row_marker operator()(ser::no_marker_view&) const { - return row_marker(); - } - row_marker operator()(ser::unknown_variant_type&) const { - throw std::runtime_error("Trying to deserialize unknown row marker type"); - } - }; - return boost::apply_visitor(row_marker_visitor(), rmv); -} - -} - -void -mutation_partition_view::accept(const schema& s, mutation_partition_visitor& visitor) const { - accept(s.get_column_mapping(), visitor); -} - -void -mutation_partition_view::accept(const column_mapping& cm, mutation_partition_visitor& visitor) const { - auto in = _in; - auto mpv = ser::deserialize(in, boost::type()); - - visitor.accept_partition_tombstone(mpv.tomb()); - - struct static_row_cell_visitor { - mutation_partition_visitor& _visitor; - - void accept_atomic_cell(column_id id, const atomic_cell& ac) const { - _visitor.accept_static_cell(id, ac); - } - void accept_collection(column_id id, const collection_mutation& cm) const { - _visitor.accept_static_cell(id, cm); - } - }; - read_and_visit_row(mpv.static_row(), cm, column_kind::static_column, static_row_cell_visitor{visitor}); - - for (auto&& rt : mpv.range_tombstones()) { - visitor.accept_row_tombstone(rt); - } - - for (auto&& cr : mpv.rows()) { - auto t = row_tombstone(cr.deleted_at(), shadowable_tombstone(cr.shadowable_deleted_at())); - visitor.accept_row(position_in_partition_view::for_key(cr.key()), t, read_row_marker(cr.marker())); - - struct cell_visitor { - mutation_partition_visitor& _visitor; - - void accept_atomic_cell(column_id id, const atomic_cell& ac) const { - _visitor.accept_row_cell(id, ac); - } - void accept_collection(column_id id, const collection_mutation& cm) const { - _visitor.accept_row_cell(id, cm); - } - }; - read_and_visit_row(cr.cells(), cm, column_kind::regular_column, cell_visitor{visitor}); - } -} - -mutation_partition_view mutation_partition_view::from_view(ser::mutation_partition_view v) -{ - return { v.v }; -} diff --git a/scylla/mutation_partition_view.hh b/scylla/mutation_partition_view.hh deleted file mode 100644 index 27a91be..0000000 --- a/scylla/mutation_partition_view.hh +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "database_fwd.hh" -#include "mutation_partition_visitor.hh" -#include "utils/input_stream.hh" - -namespace ser { -class mutation_partition_view; -} - -// View on serialized mutation partition. See mutation_partition_serializer. -class mutation_partition_view { - utils::input_stream _in; -private: - mutation_partition_view(utils::input_stream v) - : _in(v) - { } -public: - static mutation_partition_view from_stream(utils::input_stream v) { - return { v }; - } - static mutation_partition_view from_view(ser::mutation_partition_view v); - void accept(const schema& schema, mutation_partition_visitor& visitor) const; - void accept(const column_mapping&, mutation_partition_visitor& visitor) const; -}; diff --git a/scylla/mutation_partition_visitor.hh b/scylla/mutation_partition_visitor.hh deleted file mode 100644 index 9a7c5ac..0000000 --- a/scylla/mutation_partition_visitor.hh +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "atomic_cell.hh" -#include "tombstone.hh" -#include "range_tombstone.hh" -#include "keys.hh" - -class row_marker; -class row_tombstone; - -// When used on an entry, marks the range between this entry and the previous -// one as continuous or discontinuous, excluding the keys of both entries. -// This information doesn't apply to continuity of the entries themselves, -// that is specified by is_dummy flag. -// See class doc of mutation_partition. -using is_continuous = bool_class; - -// Dummy entry is an entry which is incomplete. -// Typically used for marking bounds of continuity range. -// See class doc of mutation_partition. -class dummy_tag {}; -using is_dummy = bool_class; - -// Guarantees: -// -// - any tombstones which affect cell's liveness are visited before that cell -// -// - rows are visited in ascending order with respect to their keys -// -// - row header (accept_row) is visited before that row's cells -// -// - row tombstones are visited in ascending order with respect to their key prefixes -// -// - cells in given row are visited in ascending order with respect to their column IDs -// -// - static row is visited before any clustered row -// -// - for each column in a row only one variant of accept_(static|row)_cell() is called, appropriate -// for column's kind (atomic or collection). -// -class mutation_partition_visitor { -public: - virtual void accept_partition_tombstone(tombstone) = 0; - - virtual void accept_static_cell(column_id, atomic_cell_view) = 0; - - virtual void accept_static_cell(column_id, collection_mutation_view) = 0; - - virtual void accept_row_tombstone(const range_tombstone&) = 0; - - virtual void accept_row(position_in_partition_view key, const row_tombstone& deleted_at, const row_marker& rm, - is_dummy = is_dummy::no, is_continuous = is_continuous::yes) = 0; - - virtual void accept_row_cell(column_id id, atomic_cell_view) = 0; - - virtual void accept_row_cell(column_id id, collection_mutation_view) = 0; -}; diff --git a/scylla/mutation_query.cc b/scylla/mutation_query.cc deleted file mode 100644 index ff6379d..0000000 --- a/scylla/mutation_query.cc +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "mutation_query.hh" -#include "gc_clock.hh" -#include "mutation_partition_serializer.hh" -#include "service/priority_manager.hh" -#include "query-result-writer.hh" - -reconcilable_result::~reconcilable_result() {} - -reconcilable_result::reconcilable_result() - : _row_count(0) -{ } - -reconcilable_result::reconcilable_result(uint32_t row_count, std::vector p, query::short_read short_read, - query::result_memory_tracker memory_tracker) - : _row_count(row_count) - , _short_read(short_read) - , _memory_tracker(std::move(memory_tracker)) - , _partitions(std::move(p)) -{ } - -const std::vector& reconcilable_result::partitions() const { - return _partitions; -} - -std::vector& reconcilable_result::partitions() { - return _partitions; -} - -bool -reconcilable_result::operator==(const reconcilable_result& other) const { - return boost::equal(_partitions, other._partitions); -} - -bool reconcilable_result::operator!=(const reconcilable_result& other) const { - return !(*this == other); -} - -query::result -to_data_query_result(const reconcilable_result& r, schema_ptr s, const query::partition_slice& slice, uint32_t max_rows, uint32_t max_partitions) { - query::result::builder builder(slice, query::result_request::only_result, { }); - for (const partition& p : r.partitions()) { - if (builder.row_count() >= max_rows || builder.partition_count() >= max_partitions) { - break; - } - // Also enforces the per-partition limit. - p.mut().unfreeze(s).query(builder, slice, gc_clock::time_point::min(), max_rows - builder.row_count()); - } - if (r.is_short_read()) { - builder.mark_as_short_read(); - } - return builder.build(); -} - -std::ostream& operator<<(std::ostream& out, const reconcilable_result::printer& pr) { - out << "{rows=" << pr.self.row_count() << ", short_read=" - << pr.self.is_short_read() << ", ["; - bool first = true; - for (const partition& p : pr.self.partitions()) { - if (!first) { - out << ", "; - } - first = false; - out << "{rows=" << p.row_count() << ", "; - out << p._m.pretty_printer(pr.schema); - out << "}"; - } - out << "]}"; - return out; -} - -reconcilable_result::printer reconcilable_result::pretty_printer(schema_ptr s) const { - return { *this, std::move(s) }; -} diff --git a/scylla/mutation_query.hh b/scylla/mutation_query.hh deleted file mode 100644 index 9522b97..0000000 --- a/scylla/mutation_query.hh +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "query-request.hh" -#include "query-result.hh" -#include "mutation_reader.hh" -#include "frozen_mutation.hh" - -class reconcilable_result; -class frozen_reconcilable_result; - -// Can be read by other cores after publishing. -struct partition { - uint32_t _row_count; - frozen_mutation _m; // FIXME: We don't need cf UUID, which frozen_mutation includes. - - partition(uint32_t row_count, frozen_mutation m) - : _row_count(row_count) - , _m(std::move(m)) - { } - - uint32_t row_count() const { - return _row_count; - } - - const frozen_mutation& mut() const { - return _m; - } - - frozen_mutation& mut() { - return _m; - } - - - bool operator==(const partition& other) const { - return _row_count == other._row_count && _m.representation() == other._m.representation(); - } - - bool operator!=(const partition& other) const { - return !(*this == other); - } -}; - -// The partitions held by this object are ordered according to dht::decorated_key ordering and non-overlapping. -// Each mutation must have different key. -// -// Can be read by other cores after publishing. -class reconcilable_result { - uint32_t _row_count; - query::short_read _short_read; - query::result_memory_tracker _memory_tracker; - std::vector _partitions; -public: - ~reconcilable_result(); - reconcilable_result(); - reconcilable_result(reconcilable_result&&) = default; - reconcilable_result& operator=(reconcilable_result&&) = default; - reconcilable_result(uint32_t row_count, std::vector partitions, query::short_read short_read, - query::result_memory_tracker memory_tracker = { }); - - const std::vector& partitions() const; - std::vector& partitions(); - - uint32_t row_count() const { - return _row_count; - } - - query::short_read is_short_read() const { - return _short_read; - } - - size_t memory_usage() const { - return _memory_tracker.used_memory(); - } - - bool operator==(const reconcilable_result& other) const; - bool operator!=(const reconcilable_result& other) const; - - struct printer { - const reconcilable_result& self; - schema_ptr schema; - friend std::ostream& operator<<(std::ostream&, const printer&); - }; - - printer pretty_printer(schema_ptr) const; -}; - -query::result to_data_query_result(const reconcilable_result&, schema_ptr, const query::partition_slice&, uint32_t row_limit, uint32_t partition_limit); - -// Performs a query on given data source returning data in reconcilable form. -// -// Reads at most row_limit rows. If less rows are returned, the data source -// didn't have more live data satisfying the query. -// -// Any cells which have expired according to query_time are returned as -// deleted cells and do not count towards live data. The mutations are -// compact, meaning that any cell which is covered by higher-level tombstone -// is absent in the results. -// -// 'source' doesn't have to survive deferring. -future mutation_query( - schema_ptr, - mutation_source source, - const dht::partition_range& range, - const query::partition_slice& slice, - uint32_t row_limit, - uint32_t partition_limit, - gc_clock::time_point query_time, - query::result_memory_accounter&& accounter = { }, - tracing::trace_state_ptr trace_ptr = nullptr); - -future<> data_query( - schema_ptr s, - const mutation_source& source, - const dht::partition_range& range, - const query::partition_slice& slice, - uint32_t row_limit, - uint32_t partition_limit, - gc_clock::time_point query_time, - query::result::builder& builder, - tracing::trace_state_ptr trace_ptr = nullptr); - -// Performs a query for counter updates. -future counter_write_query(schema_ptr, const mutation_source&, - const dht::decorated_key& dk, - const query::partition_slice& slice, - tracing::trace_state_ptr trace_ptr); - diff --git a/scylla/mutation_reader.cc b/scylla/mutation_reader.cc deleted file mode 100644 index 8eb1cc8..0000000 --- a/scylla/mutation_reader.cc +++ /dev/null @@ -1,359 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include -#include -#include - -#include "mutation_reader.hh" -#include "core/future-util.hh" -#include "utils/move.hh" -#include "stdx.hh" - -template -T move_and_clear(T& obj) { - T x = std::move(obj); - obj = T(); - return x; -} - -future<> combined_mutation_reader::prepare_next() { - return parallel_for_each(_next, [this] (mutation_reader* mr) { - return (*mr)().then([this, mr] (streamed_mutation_opt next) { - if (next) { - _ptables.emplace_back(mutation_and_reader { std::move(*next), mr }); - boost::range::push_heap(_ptables, &heap_compare); - } - }); - }).then([this] { - _next.clear(); - }); -} - -future combined_mutation_reader::next() { - if (_current.empty() && !_next.empty()) { - return prepare_next().then([this] { return next(); }); - } - if (_ptables.empty()) { - return make_ready_future(); - }; - - while (!_ptables.empty()) { - boost::range::pop_heap(_ptables, &heap_compare); - auto& candidate = _ptables.back(); - streamed_mutation& m = candidate.m; - - if (!_current.empty() && !_current.back().decorated_key().equal(*m.schema(), m.decorated_key())) { - // key has changed, so emit accumulated mutation - boost::range::push_heap(_ptables, &heap_compare); - return make_ready_future(merge_mutations(move_and_clear(_current))); - } - - _current.emplace_back(std::move(m)); - _next.emplace_back(candidate.read); - _ptables.pop_back(); - } - return make_ready_future(merge_mutations(move_and_clear(_current))); -} - -void combined_mutation_reader::init_mutation_reader_set(std::vector readers) -{ - _all_readers = std::move(readers); - _next.assign(_all_readers.begin(), _all_readers.end()); - _ptables.reserve(_all_readers.size()); -} - -future<> combined_mutation_reader::fast_forward_to(std::vector to_add, std::vector to_remove, const dht::partition_range& pr) -{ - _ptables.clear(); - - std::vector new_readers; - boost::range::sort(_all_readers); - boost::range::sort(to_remove); - boost::range::set_difference(_all_readers, to_remove, std::back_inserter(new_readers)); - _all_readers = std::move(new_readers); - return parallel_for_each(_all_readers, [this, &pr] (mutation_reader* mr) { - return mr->fast_forward_to(pr); - }).then([this, to_add = std::move(to_add)] { - _all_readers.insert(_all_readers.end(), to_add.begin(), to_add.end()); - _next.assign(_all_readers.begin(), _all_readers.end()); - }); -} - -combined_mutation_reader::combined_mutation_reader(std::vector readers) - : _readers(std::move(readers)) -{ - _next.reserve(_readers.size()); - _current.reserve(_readers.size()); - _ptables.reserve(_readers.size()); - - for (auto&& r : _readers) { - _next.emplace_back(&r); - } - _all_readers.assign(_next.begin(), _next.end()); -} - -future<> combined_mutation_reader::fast_forward_to(const dht::partition_range& pr) { - _ptables.clear(); - _next.assign(_all_readers.begin(), _all_readers.end()); - return parallel_for_each(_next, [this, &pr] (mutation_reader* mr) { - return mr->fast_forward_to(pr); - }); -} - -future combined_mutation_reader::operator()() { - return next(); -} - -mutation_reader -make_combined_reader(std::vector readers) { - return make_mutation_reader(std::move(readers)); -} - -mutation_reader -make_combined_reader(mutation_reader&& a, mutation_reader&& b) { - std::vector v; - v.reserve(2); - v.push_back(std::move(a)); - v.push_back(std::move(b)); - return make_combined_reader(std::move(v)); -} - -class reader_returning final : public mutation_reader::impl { - streamed_mutation _m; - bool _done = false; -public: - reader_returning(streamed_mutation m) : _m(std::move(m)) { - } - virtual future operator()() override { - if (_done) { - return make_ready_future(); - } else { - _done = true; - return make_ready_future(std::move(_m)); - } - } -}; - -mutation_reader make_reader_returning(mutation m, streamed_mutation::forwarding fwd) { - return make_mutation_reader(streamed_mutation_from_mutation(std::move(m), std::move(fwd))); -} - -mutation_reader make_reader_returning(streamed_mutation m) { - return make_mutation_reader(std::move(m)); -} - -class reader_returning_many final : public mutation_reader::impl { - std::vector _m; - dht::partition_range _pr; -public: - reader_returning_many(std::vector m, const dht::partition_range& pr) : _m(std::move(m)), _pr(pr) { - boost::range::reverse(_m); - } - virtual future operator()() override { - while (!_m.empty()) { - auto& sm = _m.back(); - dht::ring_position_comparator cmp(*sm.schema()); - if (_pr.before(sm.decorated_key(), cmp)) { - _m.pop_back(); - } else if (_pr.after(sm.decorated_key(), cmp)) { - break; - } else { - auto m = std::move(sm); - _m.pop_back(); - return make_ready_future(std::move(m)); - } - } - return make_ready_future(); - } - virtual future<> fast_forward_to(const dht::partition_range& pr) override { - _pr = pr; - return make_ready_future<>(); - } -}; - -mutation_reader make_reader_returning_many(std::vector mutations, const query::partition_slice& slice, streamed_mutation::forwarding fwd) { - std::vector streamed_mutations; - streamed_mutations.reserve(mutations.size()); - for (auto& m : mutations) { - auto ck_ranges = query::clustering_key_filter_ranges::get_ranges(*m.schema(), slice, m.key()); - auto mp = mutation_partition(std::move(m.partition()), *m.schema(), std::move(ck_ranges)); - auto sm = streamed_mutation_from_mutation(mutation(m.schema(), m.decorated_key(), std::move(mp)), fwd); - streamed_mutations.emplace_back(std::move(sm)); - } - return make_mutation_reader(std::move(streamed_mutations), query::full_partition_range); -} - -mutation_reader make_reader_returning_many(std::vector mutations, const dht::partition_range& pr) { - std::vector streamed_mutations; - boost::range::transform(mutations, std::back_inserter(streamed_mutations), [] (auto& m) { - return streamed_mutation_from_mutation(std::move(m)); - }); - return make_mutation_reader(std::move(streamed_mutations), pr); -} - -mutation_reader make_reader_returning_many(std::vector mutations) { - return make_mutation_reader(std::move(mutations), query::full_partition_range); -} - -class empty_reader final : public mutation_reader::impl { -public: - virtual future operator()() override { - return make_ready_future(); - } - virtual future<> fast_forward_to(const dht::partition_range&) override { - return make_ready_future<>(); - } -}; - -mutation_reader make_empty_reader() { - return make_mutation_reader(); -} - - -class restricting_mutation_reader : public mutation_reader::impl { - const restricted_mutation_reader_config& _config; - unsigned _weight = 0; - bool _waited = false; - mutation_reader _base; -public: - restricting_mutation_reader(const restricted_mutation_reader_config& config, unsigned weight, mutation_reader&& base) - : _config(config), _weight(weight), _base(std::move(base)) { - if (_config.sem->waiters() >= _config.max_queue_length) { - _config.raise_queue_overloaded_exception(); - } - } - ~restricting_mutation_reader() { - if (_waited) { - _config.sem->signal(_weight); - } - } - future operator()() override { - // FIXME: we should defer freeing until the mutation is freed, perhaps, - // rather than just returned - if (_waited) { - return _base(); - } - auto waited = _config.timeout.count() != 0 - ? _config.sem->wait(_config.timeout, _weight) - : _config.sem->wait(_weight); - return waited.then([this] { - _waited = true; - return _base(); - }); - } - - virtual future<> fast_forward_to(const dht::partition_range& pr) override { - return _base.fast_forward_to(pr); - } -}; - -mutation_reader -make_restricted_reader(const restricted_mutation_reader_config& config, unsigned weight, mutation_reader&& base) { - return make_mutation_reader(config, weight, std::move(base)); -} - -class multi_range_mutation_reader : public mutation_reader::impl { -public: - using ranges_vector = dht::partition_range_vector; -private: - const ranges_vector& _ranges; - ranges_vector::const_iterator _current_range; - mutation_reader _reader; -public: - multi_range_mutation_reader(schema_ptr s, mutation_source source, const ranges_vector& ranges, - const query::partition_slice& slice, const io_priority_class& pc, - tracing::trace_state_ptr trace_state, streamed_mutation::forwarding fwd, - mutation_reader::forwarding fwd_mr) - : _ranges(ranges) - , _current_range(_ranges.begin()) - , _reader(source(s, *_current_range, slice, pc, trace_state, fwd, - _ranges.size() > 1 ? mutation_reader::forwarding::yes : fwd_mr)) - { - } - - virtual future operator()() override { - return repeat_until_value([this] { - return _reader().then([this] (streamed_mutation_opt smopt) { - if (smopt) { - return make_ready_future>(std::move(smopt)); - } - ++_current_range; - if (_current_range == _ranges.end()) { - return make_ready_future>(streamed_mutation_opt()); - } - return _reader.fast_forward_to(*_current_range).then([] { - return make_ready_future>(); - }); - }); - }); - } - - virtual future<> fast_forward_to(const dht::partition_range& pr) override { - // When end of pr is reached, this reader will increment _current_range - // and notice that it now points to _ranges.end(). - _current_range = std::prev(_ranges.end()); - return _reader.fast_forward_to(pr); - } -}; - -mutation_reader -make_multi_range_reader(schema_ptr s, mutation_source source, const dht::partition_range_vector& ranges, - const query::partition_slice& slice, const io_priority_class& pc, - tracing::trace_state_ptr trace_state, streamed_mutation::forwarding fwd, - mutation_reader::forwarding fwd_mr) -{ - return make_mutation_reader(std::move(s), std::move(source), ranges, - slice, pc, std::move(trace_state), fwd, fwd_mr); -} - -snapshot_source make_empty_snapshot_source() { - return snapshot_source([] { - return make_empty_mutation_source(); - }); -} - -mutation_source make_empty_mutation_source() { - return mutation_source([](schema_ptr s, - const dht::partition_range& pr, - const query::partition_slice& slice, - const io_priority_class& pc, - tracing::trace_state_ptr tr, - streamed_mutation::forwarding fwd) { - return make_empty_reader(); - }); -} - -mutation_source make_combined_mutation_source(std::vector addends) { - return mutation_source([addends = std::move(addends)] (schema_ptr s, - const dht::partition_range& pr, - const query::partition_slice& slice, - const io_priority_class& pc, - tracing::trace_state_ptr tr, - streamed_mutation::forwarding fwd) { - std::vector rd; - rd.reserve(addends.size()); - for (auto&& ms : addends) { - rd.emplace_back(ms(s, pr, slice, pc, tr, fwd)); - } - return make_combined_reader(std::move(rd)); - }); -} diff --git a/scylla/mutation_reader.hh b/scylla/mutation_reader.hh deleted file mode 100644 index cee317a..0000000 --- a/scylla/mutation_reader.hh +++ /dev/null @@ -1,511 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include - -#include "mutation.hh" -#include "clustering_key_filter.hh" -#include "core/future.hh" -#include "core/future-util.hh" -#include "core/do_with.hh" -#include "tracing/trace_state.hh" - -// A mutation_reader is an object which allows iterating on mutations: invoke -// the function to get a future for the next mutation, with an unset optional -// marking the end of iteration. After calling mutation_reader's operator(), -// caller must keep the object alive until the returned future is fulfilled. -// -// streamed_mutation object emitted by mutation_reader remains valid after the -// destruction of the mutation_reader. -// -// Asking mutation_reader for another streamed_mutation (i.e. invoking -// mutation_reader::operator()) invalidates all streamed_mutation objects -// previously produced by that reader. -// -// The mutations returned have strictly monotonically increasing keys. Two -// consecutive mutations never have equal keys. -// -// TODO: When iterating over mutations, we don't need a schema_ptr for every -// single one as it is normally the same for all of them. So "mutation" might -// not be the optimal object to use here. -class mutation_reader final { -public: - // mutation_reader::forwarding determines whether fast_forward_to() may - // be used on the mutation reader to change the partition range being - // read. Enabling forwarding also changes read policy: forwarding::no - // means we will stop reading from disk at the end of the given range, - // but with forwarding::yes we may read ahead, anticipating the user to - // make a small skip with fast_forward_to() and continuing to read. - // - // Note that mutation_reader::forwarding is similarly name but different - // from streamed_mutation::forwarding - the former is about skipping to - // a different partition range, while the latter is about skipping - // inside a large partition. - class forwarding_tag; - using forwarding = bool_class; - - class impl { - public: - virtual ~impl() {} - virtual future operator()() = 0; - virtual future<> fast_forward_to(const dht::partition_range&) { - throw std::bad_function_call(); - } - }; -private: - class null_impl final : public impl { - public: - virtual future operator()() override { throw std::bad_function_call(); } - }; -private: - std::unique_ptr _impl; -public: - mutation_reader(std::unique_ptr impl) noexcept : _impl(std::move(impl)) {} - mutation_reader() : mutation_reader(std::make_unique()) {} - mutation_reader(mutation_reader&&) = default; - mutation_reader(const mutation_reader&) = delete; - mutation_reader& operator=(mutation_reader&&) = default; - mutation_reader& operator=(const mutation_reader&) = delete; - future operator()() { return _impl->operator()(); } - - // Changes the range of partitions to pr. The range can only be moved - // forwards. pr.begin() needs to be larger than pr.end() of the previousl - // used range (i.e. either the initial one passed to the constructor or a - // previous fast forward target). - // pr needs to be valid until the reader is destroyed or fast_forward_to() - // is called again. - future<> fast_forward_to(const dht::partition_range& pr) { return _impl->fast_forward_to(pr); } -}; - -// Impl: derived from mutation_reader::impl; Args/args: arguments for Impl's constructor -template -inline -mutation_reader -make_mutation_reader(Args&&... args) { - return mutation_reader(std::make_unique(std::forward(args)...)); -} - -// Combines multiple mutation_readers into one. -class combined_mutation_reader : public mutation_reader::impl { - std::vector _readers; - std::vector _all_readers; - - struct mutation_and_reader { - streamed_mutation m; - mutation_reader* read; - - bool operator<(const mutation_and_reader& other) const { - return read < other.read; - } - - struct less_compare { - bool operator()(const mutation_and_reader& a, mutation_reader* b) const { - return a.read < b; - } - bool operator()(mutation_reader* a, const mutation_and_reader& b) const { - return a < b.read; - } - bool operator()(const mutation_and_reader& a, const mutation_and_reader& b) const { - return a < b; - } - }; - }; - std::vector _ptables; - // comparison function for std::make_heap()/std::push_heap() - static bool heap_compare(const mutation_and_reader& a, const mutation_and_reader& b) { - auto&& s = a.m.schema(); - // order of comparison is inverted, because heaps produce greatest value first - return b.m.decorated_key().less_compare(*s, a.m.decorated_key()); - } - std::vector _current; - std::vector _next; -private: - future<> prepare_next(); - // Produces next mutation or disengaged optional if there are no more. - future next(); -protected: - combined_mutation_reader() = default; - void init_mutation_reader_set(std::vector); - future<> fast_forward_to(std::vector to_add, std::vector to_remove, const dht::partition_range& pr); -public: - combined_mutation_reader(std::vector readers); - virtual future operator()() override; - virtual future<> fast_forward_to(const dht::partition_range& pr) override; -}; - -// Creates a mutation reader which combines data return by supplied readers. -// Returns mutation of the same schema only when all readers return mutations -// of the same schema. -mutation_reader make_combined_reader(std::vector); -mutation_reader make_combined_reader(mutation_reader&& a, mutation_reader&& b); -// reads from the input readers, in order -mutation_reader make_reader_returning(mutation, streamed_mutation::forwarding fwd = streamed_mutation::forwarding::no); -mutation_reader make_reader_returning(streamed_mutation); -mutation_reader make_reader_returning_many(std::vector, - const query::partition_slice& slice = query::full_slice, - streamed_mutation::forwarding fwd = streamed_mutation::forwarding::no); -mutation_reader make_reader_returning_many(std::vector, const dht::partition_range&); -mutation_reader make_reader_returning_many(std::vector); -mutation_reader make_empty_reader(); - -struct restricted_mutation_reader_config { - semaphore* sem = nullptr; - std::chrono::nanoseconds timeout = {}; - size_t max_queue_length = std::numeric_limits::max(); - std::function raise_queue_overloaded_exception = default_raise_queue_overloaded_exception; - - static void default_raise_queue_overloaded_exception() { - throw std::runtime_error("restricted mutation reader queue overload"); - } -}; - -// Restricts a given `mutation_reader` to a concurrency limited according to settings in -// a restricted_mutation_reader_config. These settings include a semaphore for limiting the number -// of active concurrent readers, a timeout for inactive readers, and a maximum queue size for -// inactive readers. -mutation_reader make_restricted_reader(const restricted_mutation_reader_config& config, unsigned weight, mutation_reader&& base); - -/* -template -concept bool StreamedMutationFilter() { - return requires(T t, const streamed_mutation& sm) { - { t(sm) } -> bool; - }; -} -*/ -template -class filtering_reader : public mutation_reader::impl { - mutation_reader _rd; - MutationFilter _filter; - streamed_mutation_opt _current; - static_assert(std::is_same>::value, "bad MutationFilter signature"); -public: - filtering_reader(mutation_reader rd, MutationFilter&& filter) - : _rd(std::move(rd)), _filter(std::forward(filter)) { - } - virtual future operator()() override {\ - return repeat([this] { - return _rd().then([this] (streamed_mutation_opt&& mo) mutable { - if (!mo) { - _current = std::move(mo); - return stop_iteration::yes; - } else { - if (_filter(*mo)) { - _current = std::move(mo); - return stop_iteration::yes; - } - return stop_iteration::no; - } - }); - }).then([this] { - return make_ready_future(std::move(_current)); - }); - }; - virtual future<> fast_forward_to(const dht::partition_range& pr) override { - return _rd.fast_forward_to(pr); - } -}; - -// Creates a mutation_reader wrapper which creates a new stream of mutations -// with some mutations removed from the original stream. -// MutationFilter is a callable which decides which mutations are dropped. It -// accepts mutation const& and returns a bool. The mutation stays in the -// stream if and only if the filter returns true. -template -mutation_reader make_filtering_reader(mutation_reader rd, MutationFilter&& filter) { - return make_mutation_reader>(std::move(rd), std::forward(filter)); -} - -// Calls the consumer for each element of the reader's stream until end of stream -// is reached or the consumer requests iteration to stop by returning stop_iteration::yes. -// The consumer should accept mutation as the argument and return stop_iteration. -// The returned future<> resolves when consumption ends. -template -inline -future<> consume(mutation_reader& reader, Consumer consumer) { - static_assert(std::is_same, futurize_t>>::value, "bad Consumer signature"); - using futurator = futurize>; - - return do_with(std::move(consumer), [&reader] (Consumer& c) -> future<> { - return repeat([&reader, &c] () { - return reader().then([] (auto sm) { - return mutation_from_streamed_mutation(std::move(sm)); - }).then([&c] (mutation_opt&& mo) -> future { - if (!mo) { - return make_ready_future(stop_iteration::yes); - } - return futurator::apply(c, std::move(*mo)); - }); - }); - }); -} - -// mutation_source represents source of data in mutation form. The data source -// can be queried multiple times and in parallel. For each query it returns -// independent mutation_reader. -// The reader returns mutations having all the same schema, the one passed -// when invoking the source. -class mutation_source { - using partition_range = const dht::partition_range&; - using io_priority = const io_priority_class&; - using func_type = std::function; - // We could have our own version of std::function<> that is nothrow - // move constructible and save some indirection and allocation. - // Probably not worth the effort though. - lw_shared_ptr _fn; -private: - mutation_source() = default; - explicit operator bool() const { return bool(_fn); } - friend class optimized_optional; -public: - mutation_source(func_type fn) : _fn(make_lw_shared(std::move(fn))) {} - // For sources which don't care about the mutation_reader::forwarding flag (always fast forwardable) - mutation_source(std::function fn) - : _fn(make_lw_shared([fn = std::move(fn)] (schema_ptr s, partition_range range, const query::partition_slice& slice, io_priority pc, tracing::trace_state_ptr tr, streamed_mutation::forwarding fwd, mutation_reader::forwarding) { - return fn(s, range, slice, pc, std::move(tr), fwd); - })) {} - mutation_source(std::function fn) - : _fn(make_lw_shared([fn = std::move(fn)] (schema_ptr s, partition_range range, const query::partition_slice& slice, io_priority pc, tracing::trace_state_ptr, streamed_mutation::forwarding fwd, mutation_reader::forwarding) { - assert(!fwd); - return fn(s, range, slice, pc); - })) {} - mutation_source(std::function fn) - : _fn(make_lw_shared([fn = std::move(fn)] (schema_ptr s, partition_range range, const query::partition_slice& slice, io_priority, tracing::trace_state_ptr, streamed_mutation::forwarding fwd, mutation_reader::forwarding) { - assert(!fwd); - return fn(s, range, slice); - })) {} - mutation_source(std::function fn) - : _fn(make_lw_shared([fn = std::move(fn)] (schema_ptr s, partition_range range, const query::partition_slice&, io_priority, tracing::trace_state_ptr, streamed_mutation::forwarding fwd, mutation_reader::forwarding) { - assert(!fwd); - return fn(s, range); - })) {} - - mutation_source(const mutation_source& other) = default; - mutation_source& operator=(const mutation_source& other) = default; - mutation_source(mutation_source&&) = default; - mutation_source& operator=(mutation_source&&) = default; - - // Creates a new reader. - // - // All parameters captured by reference must remain live as long as returned - // mutation_reader or streamed_mutation obtained through it are alive. - mutation_reader operator()(schema_ptr s, - partition_range range = query::full_partition_range, - const query::partition_slice& slice = query::full_slice, - io_priority pc = default_priority_class(), - tracing::trace_state_ptr trace_state = nullptr, - streamed_mutation::forwarding fwd = streamed_mutation::forwarding::no, - mutation_reader::forwarding fwd_mr = mutation_reader::forwarding::yes) const - { - return (*_fn)(std::move(s), range, slice, pc, std::move(trace_state), fwd, fwd_mr); - } -}; - -// Returns a mutation_source which is the sum of given mutation_sources. -// -// Adding two mutation sources gives a mutation source which contains -// the sum of writes contained in the addends. -mutation_source make_combined_mutation_source(std::vector); - -// Represent mutation_source which can be snapshotted. -class snapshot_source { -private: - std::function _func; -public: - snapshot_source(std::function func) - : _func(std::move(func)) - { } - - // Creates a new snapshot. - // The returned mutation_source represents all earlier writes and only those. - // Note though that the mutations in the snapshot may get compacted over time. - mutation_source operator()() { - return _func(); - } -}; - -mutation_source make_empty_mutation_source(); -snapshot_source make_empty_snapshot_source(); - -template<> -struct move_constructor_disengages { - enum { value = true }; -}; -using mutation_source_opt = optimized_optional; - - -/// A partition_presence_checker quickly returns whether a key is known not to exist -/// in a data source (it may return false positives, but not false negatives). -enum class partition_presence_checker_result { - definitely_doesnt_exist, - maybe_exists -}; -using partition_presence_checker = std::function; - -inline -partition_presence_checker make_default_partition_presence_checker() { - return [] (const dht::decorated_key&) { return partition_presence_checker_result::maybe_exists; }; -} - -template -future do_consume_streamed_mutation_flattened(streamed_mutation& sm, Consumer& c) -{ - do { - if (sm.is_buffer_empty()) { - if (sm.is_end_of_stream()) { - break; - } - auto f = sm.fill_buffer(); - if (!f.available()) { - return f.then([&] { return do_consume_streamed_mutation_flattened(sm, c); }); - } - f.get(); - } else { - if (sm.pop_mutation_fragment().consume(c) == stop_iteration::yes) { - break; - } - } - } while (true); - return make_ready_future(c.consume_end_of_partition()); -} - -/* -template -concept bool FlattenedConsumer() { - return StreamedMutationConsumer() && requires(T obj, const dht::decorated_key& dk) { - obj.consume_new_partition(dk); - obj.consume_end_of_partition(); - }; -} -*/ -template -auto consume_flattened(mutation_reader mr, FlattenedConsumer&& c, bool reverse_mutations = false) -{ - return do_with(std::move(mr), std::move(c), stdx::optional(), [reverse_mutations] (auto& mr, auto& c, auto& sm) { - return repeat([&, reverse_mutations] { - return mr().then([&, reverse_mutations] (auto smopt) { - if (!smopt) { - return make_ready_future(stop_iteration::yes); - } - if (!reverse_mutations) { - sm.emplace(std::move(*smopt)); - } else { - sm.emplace(reverse_streamed_mutation(std::move(*smopt))); - } - c.consume_new_partition(sm->decorated_key()); - if (sm->partition_tombstone()) { - c.consume(sm->partition_tombstone()); - } - return do_consume_streamed_mutation_flattened(*sm, c); - }); - }).then([&] { - return c.consume_end_of_stream(); - }); - }); -} - -/* -template -concept bool StreamedMutationFilter() { - return requires(T obj, const streamed_mutation& sm) { - { filter(sm); } -> bool; - }; -} -*/ -// This version of consume_flattened() must be run inside a thread and -// guarantees that all FlattenedConsumer functions will also be called in the same thread -// context. -template -auto consume_flattened_in_thread(mutation_reader& mr, FlattenedConsumer& c, StreamedMutationFilter&& filter) -{ - while (true) { - auto smopt = mr().get0(); - if (!smopt) { - break; - } - auto& sm = *smopt; - if (!filter(sm)) { - continue; - } - c.consume_new_partition(sm.decorated_key()); - if (sm.partition_tombstone()) { - c.consume(sm.partition_tombstone()); - } - do { - if (sm.is_buffer_empty()) { - if (sm.is_end_of_stream()) { - break; - } - sm.fill_buffer().get0(); - } else { - if (sm.pop_mutation_fragment().consume(c) == stop_iteration::yes) { - break; - } - } - } while (true); - if (c.consume_end_of_partition() == stop_iteration::yes) { - break; - } - } - return c.consume_end_of_stream(); -} - -template -auto consume_flattened_in_thread(mutation_reader& mr, FlattenedConsumer& c) -{ - return consume_flattened_in_thread(mr, c, [] (auto&&) { return true; }); -} - -// Adapts a non-movable FlattenedConsumer to a movable one. -template -class stable_flattened_mutations_consumer { - std::unique_ptr _ptr; -public: - stable_flattened_mutations_consumer(std::unique_ptr ptr) : _ptr(std::move(ptr)) {} - auto consume_new_partition(const dht::decorated_key& dk) { return _ptr->consume_new_partition(dk); } - auto consume(tombstone t) { return _ptr->consume(t); } - auto consume(static_row&& sr) { return _ptr->consume(std::move(sr)); } - auto consume(clustering_row&& cr) { return _ptr->consume(std::move(cr)); } - auto consume(range_tombstone&& rt) { return _ptr->consume(std::move(rt)); } - auto consume_end_of_partition() { return _ptr->consume_end_of_partition(); } - auto consume_end_of_stream() { return _ptr->consume_end_of_stream(); } -}; - -template -stable_flattened_mutations_consumer make_stable_flattened_mutations_consumer(Args&&... args) { - return { std::make_unique(std::forward(args)...) }; -} - -// Requires ranges to be sorted and disjoint. -mutation_reader -make_multi_range_reader(schema_ptr s, mutation_source source, const dht::partition_range_vector& ranges, - const query::partition_slice& slice, const io_priority_class& pc = default_priority_class(), - tracing::trace_state_ptr trace_state = nullptr, streamed_mutation::forwarding fwd = streamed_mutation::forwarding::no, - mutation_reader::forwarding fwd_mr = mutation_reader::forwarding::yes); diff --git a/scylla/noexcept_traits.hh b/scylla/noexcept_traits.hh deleted file mode 100644 index 15ebcbf..0000000 --- a/scylla/noexcept_traits.hh +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include -#include -#include - -#pragma once - -// -// Utility for adapting types which are not nothrow move constructible into such -// by wrapping them if necessary. -// -// Example usage: -// -// T val{}; -// using traits = noexcept_movable; -// auto f = make_ready_future(traits::wrap(std::move(val))); -// T val2 = traits::unwrap(f.get0()); -// - -template -struct noexcept_movable; - -template -struct noexcept_movable::value>> { - using type = T; - - static type wrap(T&& v) { - return std::move(v); - } - - static future wrap(future&& v) { - return std::move(v); - } - - static T unwrap(type&& v) { - return std::move(v); - } - - static future unwrap(future&& v) { - return std::move(v); - } -}; - -template -struct noexcept_movable::value>> { - using type = std::unique_ptr; - - static type wrap(T&& v) { - return std::make_unique(std::move(v)); - } - - static T unwrap(type&& v) { - return std::move(*v); - } -}; - -template -using noexcept_movable_t = typename noexcept_movable::type; diff --git a/scylla/nway_merger.hh b/scylla/nway_merger.hh deleted file mode 100644 index 0c3abdf..0000000 --- a/scylla/nway_merger.hh +++ /dev/null @@ -1,267 +0,0 @@ -/* - * Copyright (C) 2013 ScyllaDB - * - * This work is open source software, licensed under the terms of the - * BSD license as described in the LICENSE file in the top-level directory. - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -/** @file nway_merger.hh - * Implementation of a osv::nway_merger class. - * nway_merger::merge() function merges N sorted containers with the complexity - * of O(M*log(N)), where M is a total number of elements in all merged - * containers. - */ - -#ifndef _NWAY_MERGE_HH -#define _NWAY_MERGE_HH - -#include -#include -#include -#include - -/** @class Ptr - * Compares two containers by the elemnt at the head - */ -template -class std_ptr_front_comparator { -public: - /** - * We want out heap to sort the elements in an increasing order. - * @param a - * @param b - * - * @return - */ - bool operator()(const Ptr a, const Ptr b) - { - return a->front() > b->front(); - } -}; - -/** @class nway_merger "nway_merger.hh" - * Merge N containers S sorted in an increasing order into an iterator as a - * sorted sequence in an increasing order. The containers collection is passed - * to the method merge() in a container C. - * - * @note - * In order to invert the ordering of the elements in the output stream one may - * invert the semantics in the operator>(const S::value_type) to return "<=" - * result and sort the input streams in a descreasing order. The resulting - * stream will have a decreasing order then. - * - * #### Algorithm: - * merge() method implements the "ideal merge" algorithm as described at - * http://xlinux.nist.gov/dads//HTML/idealmerge.html - * - * @note - * stl::priority_queue(heap) as we defined it will hold the "smallest" element - * at the "top". - * - * 1. Input containers should be sorted in an increasing order. - * 2. Push all the containers into the heap sorting by their front() elements. - * 3. Remove the container from the top of the heap. It'll have the smallest - * element among all containers. - * 4. Remove the front() element from this container and push it into the output - * iterator. - * 5. If this container still has elements, push it back to the heap. - * 6. Repeat steps (3)-(5) until there are containers in the heap. - * - * #### Complexity: - * O(M*log(N)), where M is a total number of elements in all merged containers - * (provided the complexity of a comparison between two S values is constant). - * - * @note S::value_type must implement operator>(). - */ -template > -class nway_merger { -public: - nway_merger(Comp comp = Comp()) : _comp(comp), _heads_heap(comp) {} - /** - * Merges the containers and outputs the resulting stream into the output - * iterator res (see class description for more details). - * - * The input (sorted) container should implement: - * - front() - return the element from the HEAD element. - * - empty() - returns true if there are no more elements left - * - begin() - return the iterator pointing to the HEAD element: - * - pop_front() - deleting the element returned by front(). - * - * Output iterator should implement: - * - Required operators to implement the *it = xx in order to consume the - * xx value. - * - operator++() - to move to the next output position. - * - * @param sorted_lists collection of the pointers to the sorted collections - * to merge - * @param res Output stream for the results of an nway_merge - */ - template - void merge(const C& sorted_lists, OutputIt res) - { - create_heap(sorted_lists, _comp); - - while (!_heads_heap.empty()) { - SPtr t = _heads_heap.top(); - _heads_heap.pop(); - auto t_it = t->begin(); - - /* Get the element from the "HEAD" of the container */ - *res = *t_it; - ++res; - - /* Erase the "HEAD" */ - t->pop_front(); - - if (!t->empty()) { - _heads_heap.push(t); - } - } - } - - /** - * Pops the "smallest" element from the merged stream and pushes it into the - * output stream. - * - * The input ordering requirements is the same as described in - * merge() above. - * This functions performs a single step of the nway_merge algorithm: - * 1) Sorts the front elements. - * 2) Pushes the least among them into the output iterator. - * - * This function is convenient when you want to merge the input streams that - * are sometimes empty in a step-by-step manner. - * - * @param sorted_lists - * @param res - * - * @return true if the element has been popped and false if there was nothing - * to pop (all input sequences were empty). - */ - template - bool pop(OutputIt res) - { - refill_heap(); - - if (!_heads_heap.empty()) { - SPtr t = _heads_heap.top(); - _heads_heap.pop(); - auto t_it = t->begin(); - - /* Get the element from the "HEAD" of the container */ - *res = *t_it; - ++res; - - /* Erase the "HEAD" */ - t->pop_front(); - - if (!t->empty()) { - _heads_heap.push(t); - } else { - _empty_lists.emplace_back(t); - } - - return true; - } else { - return false; - } - } - - void clear() { _heads_heap = heap_type(_comp); } - - /** - * Create a new heap from the sorted sequences. - * @param sorted_lists - */ - void create_heap(const C& sorted_lists) { - - clear(); - - /* Create a heap */ - for (SPtr c : sorted_lists) { - if (!c->empty()) { - _heads_heap.emplace(c); - } else { - _empty_lists.emplace_back(c); - } - } - } - - /** - * Push back all sequences from the _empty_list back to the heap. - * - * TODO: - * Come up with something better that walking on the whole list and check - * each list. One option is to use bitfield array and then use - * count_leading_zeros() based function to efficiently get the next set bit - * which may represent the non-empty list. - * - * This inefficiency may count in case of VMs with a large number of vCPUs - * when most of the queues would be empty. - */ - void refill_heap() { - /* TODO: Improve this by iterating only on those that are not empty */ - auto it = _empty_lists.begin(); - while (it != _empty_lists.end()) { - if (!(*it)->empty()) { - _heads_heap.emplace(*it); - - auto tmp_it = it; - ++it; - - _empty_lists.erase(tmp_it); - } else { - ++it; - } - } - } - - template - bool empty(EmptyChecker checker) const { - return checker(); - } - - // A stupid implementation of an empty_checker() - bool silly_empty_checker() const { - if (!_heads_heap.empty()) { - return false; - } - - for (SPtr c : _empty_lists) { - if (!c->empty()) { - return false; - } - } - - return true; - } - -private: - typedef typename C::value_type SPtr; - typedef std::priority_queue, Comp> heap_type; - - Comp _comp; - heap_type _heads_heap; - C* _sorted_lists; - std::list _empty_lists; -}; - -#endif /* _NWAY_MERGE_HH */ diff --git a/scylla/partition_builder.hh b/scylla/partition_builder.hh deleted file mode 100644 index e04ce2f..0000000 --- a/scylla/partition_builder.hh +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "mutation_partition.hh" -#include "mutation_partition_view.hh" - -// Partition visitor which builds mutation_partition corresponding to the data its fed with. -class partition_builder : public mutation_partition_visitor { -private: - const schema& _schema; - mutation_partition& _partition; - deletable_row* _current_row; -public: - // @p will hold the result of building. - // @p must be empty. - partition_builder(const schema& s, mutation_partition& p) - : _schema(s) - , _partition(p) - { } - - virtual void accept_partition_tombstone(tombstone t) override { - _partition.apply(t); - } - - virtual void accept_static_cell(column_id id, atomic_cell_view cell) override { - row& r = _partition.static_row(); - r.append_cell(id, atomic_cell_or_collection(cell)); - } - - virtual void accept_static_cell(column_id id, collection_mutation_view collection) override { - row& r = _partition.static_row(); - r.append_cell(id, atomic_cell_or_collection(collection)); - } - - virtual void accept_row_tombstone(const range_tombstone& rt) override { - _partition.apply_row_tombstone(_schema, rt); - } - - virtual void accept_row(position_in_partition_view key, const row_tombstone& deleted_at, const row_marker& rm, is_dummy dummy, is_continuous continuous) override { - deletable_row& r = _partition.clustered_row(_schema, key, dummy, continuous); - r.apply(rm); - r.apply(deleted_at); - _current_row = &r; - } - - virtual void accept_row_cell(column_id id, atomic_cell_view cell) override { - row& r = _current_row->cells(); - r.append_cell(id, atomic_cell_or_collection(cell)); - } - - virtual void accept_row_cell(column_id id, collection_mutation_view collection) override { - row& r = _current_row->cells(); - r.append_cell(id, atomic_cell_or_collection(collection)); - } -}; diff --git a/scylla/partition_range_compat.hh b/scylla/partition_range_compat.hh deleted file mode 100644 index 952e9ff..0000000 --- a/scylla/partition_range_compat.hh +++ /dev/null @@ -1,175 +0,0 @@ -/* - * Copyright 2016 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - - -#pragma once - -#include -#include -#include "range.hh" -#include "dht/i_partitioner.hh" -#include "query-request.hh" -#include "stdx.hh" - -namespace compat { - -using wrapping_partition_range = wrapping_range; - - -// unwraps a vector of wrapping ranges into a vector of nonwrapping ranges -// if the vector happens to be sorted by the left bound, it remains sorted -template -std::vector> -unwrap(std::vector>&& v, Comparator&& cmp) { - std::vector> ret; - ret.reserve(v.size() + 1); - for (auto&& wr : v) { - if (wr.is_wrap_around(cmp)) { - auto&& p = std::move(wr).unwrap(); - ret.insert(ret.begin(), nonwrapping_range(std::move(p.first))); - ret.emplace_back(std::move(p.second)); - } else { - ret.emplace_back(std::move(wr)); - } - } - return ret; -} - -// unwraps a vector of wrapping ranges into a vector of nonwrapping ranges -// if the vector happens to be sorted by the left bound, it remains sorted -template -std::vector> -unwrap(const std::vector>& v, Comparator&& cmp) { - std::vector> ret; - ret.reserve(v.size() + 1); - for (auto&& wr : v) { - if (wr.is_wrap_around(cmp)) { - auto&& p = wr.unwrap(); - ret.insert(ret.begin(), nonwrapping_range(p.first)); - ret.emplace_back(p.second); - } else { - ret.emplace_back(wr); - } - } - return ret; -} - -template -std::vector> -wrap(const std::vector>& v) { - // re-wrap (-inf,x) ... (y, +inf) into (y, x): - if (v.size() >= 2 && !v.front().start() && !v.back().end()) { - auto ret = std::vector>(); - ret.reserve(v.size() - 1); - std::copy(v.begin() + 1, v.end() - 1, std::back_inserter(ret)); - ret.emplace_back(v.back().start(), v.front().end()); - return ret; - } - return boost::copy_range>>(v); -} - -template -std::vector> -wrap(std::vector>&& v) { - // re-wrap (-inf,x) ... (y, +inf) into (y, x): - if (v.size() >= 2 && !v.front().start() && !v.back().end()) { - auto ret = std::vector>(); - ret.reserve(v.size() - 1); - std::move(v.begin() + 1, v.end() - 1, std::back_inserter(ret)); - ret.emplace_back(std::move(v.back()).start(), std::move(v.front()).end()); - return ret; - } - // want boost::adaptor::moved ... - return boost::copy_range>>(v); -} - -inline -dht::token_range_vector -unwrap(const std::vector>& v) { - return unwrap(v, dht::token_comparator()); -} - -inline -dht::token_range_vector -unwrap(std::vector>&& v) { - return unwrap(std::move(v), dht::token_comparator()); -} - - -class one_or_two_partition_ranges : public std::pair> { - using pair = std::pair>; -public: - explicit one_or_two_partition_ranges(dht::partition_range&& f) - : pair(std::move(f), stdx::nullopt) { - } - explicit one_or_two_partition_ranges(dht::partition_range&& f, dht::partition_range&& s) - : pair(std::move(f), std::move(s)) { - } - operator dht::partition_range_vector() const & { - auto ret = dht::partition_range_vector(); - // not reserving, since ret.size() is likely to be 1 - ret.push_back(first); - if (second) { - ret.push_back(*second); - } - return ret; - } - operator dht::partition_range_vector() && { - auto ret = dht::partition_range_vector(); - // not reserving, since ret.size() is likely to be 1 - ret.push_back(std::move(first)); - if (second) { - ret.push_back(std::move(*second)); - } - return ret; - } -}; - -inline -one_or_two_partition_ranges -unwrap(wrapping_partition_range pr, const schema& s) { - if (pr.is_wrap_around(dht::ring_position_comparator(s))) { - auto unw = std::move(pr).unwrap(); - // Preserve ring order - return one_or_two_partition_ranges( - dht::partition_range(std::move(unw.second)), - dht::partition_range(std::move(unw.first))); - } else { - return one_or_two_partition_ranges(dht::partition_range(std::move(pr))); - } -} - -// Unwraps `range` and calls `func` with its components, with an unwrapped -// range type, as a parameter (once or twice) -template -void -unwrap_into(wrapping_range&& range, const Comparator& cmp, Func&& func) { - if (range.is_wrap_around(cmp)) { - auto&& unw = range.unwrap(); - // Preserve ring order - func(nonwrapping_range(std::move(unw.second))); - func(nonwrapping_range(std::move(unw.first))); - } else { - func(nonwrapping_range(std::move(range))); - } -} - -} diff --git a/scylla/partition_slice_builder.cc b/scylla/partition_slice_builder.cc deleted file mode 100644 index d40d955..0000000 --- a/scylla/partition_slice_builder.cc +++ /dev/null @@ -1,153 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include -#include -#include - -#include "partition_slice_builder.hh" - -partition_slice_builder::partition_slice_builder(const schema& schema) - : _schema(schema) -{ - _options.set(); - _options.set(); - _options.set(); - _options.set(); -} - -query::partition_slice -partition_slice_builder::build() { - std::vector ranges; - if (_row_ranges) { - ranges = std::move(*_row_ranges); - } else { - ranges.emplace_back(query::clustering_range::make_open_ended_both_sides()); - } - - std::vector static_columns; - if (_static_columns) { - static_columns = std::move(*_static_columns); - } else { - boost::range::push_back(static_columns, - _schema.static_columns() | boost::adaptors::transformed(std::mem_fn(&column_definition::id))); - } - - std::vector regular_columns; - if (_regular_columns) { - regular_columns = std::move(*_regular_columns); - } else { - boost::range::push_back(regular_columns, - _schema.regular_columns() | boost::adaptors::transformed(std::mem_fn(&column_definition::id))); - } - - return { - std::move(ranges), - std::move(static_columns), - std::move(regular_columns), - std::move(_options) - }; -} - -partition_slice_builder& -partition_slice_builder::with_range(query::clustering_range range) { - if (!_row_ranges) { - _row_ranges = std::vector(); - } - _row_ranges->emplace_back(std::move(range)); - return *this; -} - -partition_slice_builder& -partition_slice_builder::with_ranges(std::vector ranges) { - if (!_row_ranges) { - _row_ranges = std::move(ranges); - } else { - for (auto&& r : ranges) { - with_range(std::move(r)); - } - } - return *this; -} - -partition_slice_builder& -partition_slice_builder::with_no_regular_columns() { - _regular_columns = std::vector(); - return *this; -} - -partition_slice_builder& -partition_slice_builder::with_regular_column(bytes name) { - if (!_regular_columns) { - _regular_columns = std::vector(); - } - - const column_definition* def = _schema.get_column_definition(name); - if (!def) { - throw std::runtime_error(sprint("No such column: %s", _schema.regular_column_name_type()->to_string(name))); - } - if (!def->is_regular()) { - throw std::runtime_error(sprint("Column is not regular: %s", _schema.column_name_type(*def)->to_string(name))); - } - _regular_columns->push_back(def->id); - return *this; -} - -partition_slice_builder& -partition_slice_builder::with_no_static_columns() { - _static_columns = std::vector(); - return *this; -} - -partition_slice_builder& -partition_slice_builder::with_static_column(bytes name) { - if (!_static_columns) { - _static_columns = std::vector(); - } - - const column_definition* def = _schema.get_column_definition(name); - if (!def) { - throw std::runtime_error(sprint("No such column: %s", utf8_type->to_string(name))); - } - if (!def->is_static()) { - throw std::runtime_error(sprint("Column is not static: %s", utf8_type->to_string(name))); - } - _static_columns->push_back(def->id); - return *this; -} - -partition_slice_builder& -partition_slice_builder::reversed() { - _options.set(); - return *this; -} - -partition_slice_builder& -partition_slice_builder::without_partition_key_columns() { - _options.remove(); - return *this; -} - -partition_slice_builder& -partition_slice_builder::without_clustering_key_columns() { - _options.remove(); - return *this; -} diff --git a/scylla/partition_slice_builder.hh b/scylla/partition_slice_builder.hh deleted file mode 100644 index 39f6f9a..0000000 --- a/scylla/partition_slice_builder.hh +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include -#include - -#include "query-request.hh" -#include "schema.hh" - -// -// Fluent builder for query::partition_slice. -// -// Selects everything by default, unless restricted. Each property can be -// restricted separately. For example, by default all static columns are -// selected, but if with_static_column() is called then only that column will -// be included. Still, all regular columns and the whole clustering range will -// be selected (unless restricted). -// -class partition_slice_builder { - std::experimental::optional> _regular_columns; - std::experimental::optional> _static_columns; - std::experimental::optional> _row_ranges; - const schema& _schema; - query::partition_slice::option_set _options; -public: - partition_slice_builder(const schema& schema); - - partition_slice_builder& with_static_column(bytes name); - partition_slice_builder& with_no_static_columns(); - partition_slice_builder& with_regular_column(bytes name); - partition_slice_builder& with_no_regular_columns(); - partition_slice_builder& with_range(query::clustering_range range); - partition_slice_builder& with_ranges(std::vector); - partition_slice_builder& without_partition_key_columns(); - partition_slice_builder& without_clustering_key_columns(); - partition_slice_builder& reversed(); - - query::partition_slice build(); -}; diff --git a/scylla/partition_snapshot_reader.hh b/scylla/partition_snapshot_reader.hh deleted file mode 100644 index a144538..0000000 --- a/scylla/partition_snapshot_reader.hh +++ /dev/null @@ -1,296 +0,0 @@ -/* - * Copyright (C) 2017 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "partition_version.hh" - -struct partition_snapshot_reader_dummy_accounter { - void operator()(const clustering_row& cr) {} - void operator()(const static_row& sr) {} - void operator()(const range_tombstone& rt) {} -}; -extern partition_snapshot_reader_dummy_accounter no_accounter; - -inline void maybe_merge_versions(lw_shared_ptr& snp, - logalloc::region& lsa_region, - logalloc::allocating_section& read_section) { - if (!snp.owned()) { - return; - } - // If no one else is using this particular snapshot try to merge partition - // versions. - with_allocator(lsa_region.allocator(), [&snp, &lsa_region, &read_section] { - return with_linearized_managed_bytes([&snp, &lsa_region, &read_section] { - try { - read_section(lsa_region, [&snp] { - snp->merge_partition_versions(); - }); - } catch (...) { } - snp = {}; - }); - }); -} - -template -class partition_snapshot_reader : public streamed_mutation::impl, public MemoryAccounter { - struct rows_position { - mutation_partition::rows_type::const_iterator _position; - mutation_partition::rows_type::const_iterator _end; - }; - - class heap_compare { - rows_entry::compare _cmp; - public: - explicit heap_compare(const schema& s) : _cmp(s) { } - bool operator()(const rows_position& a, const rows_position& b) { - return _cmp(*b._position, *a._position); - } - }; -private: - // Keeps shared pointer to the container we read mutation from to make sure - // that its lifetime is appropriately extended. - boost::any _container_guard; - - query::clustering_key_filter_ranges _ck_ranges; - query::clustering_row_ranges::const_iterator _current_ck_range; - query::clustering_row_ranges::const_iterator _ck_range_end; - bool _in_ck_range = false; - - rows_entry::compare _cmp; - position_in_partition::equal_compare _eq; - heap_compare _heap_cmp; - - lw_shared_ptr _snapshot; - stdx::optional _last_entry; - - std::vector _clustering_rows; - - range_tombstone_stream _range_tombstones; - - logalloc::region& _lsa_region; - logalloc::allocating_section& _read_section; - - MemoryAccounter& mem_accounter() { - return *this; - } - - uint64_t _reclaim_counter; - unsigned _version_count = 0; -private: - void refresh_iterators() { - _clustering_rows.clear(); - - if (!_in_ck_range) { - if (_current_ck_range == _ck_range_end) { - _end_of_stream = true; - return; - } - for (auto&& v : _snapshot->versions()) { - _range_tombstones.apply(v.partition().row_tombstones(), *_current_ck_range); - } - } - - for (auto&& v : _snapshot->versions()) { - auto cr_end = v.partition().upper_bound(*_schema, *_current_ck_range); - auto cr = [&] () -> mutation_partition::rows_type::const_iterator { - if (_in_ck_range) { - return v.partition().clustered_rows().upper_bound(*_last_entry, _cmp); - } else { - return v.partition().lower_bound(*_schema, *_current_ck_range); - } - }(); - - if (cr != cr_end) { - _clustering_rows.emplace_back(rows_position { cr, cr_end }); - } - } - - _in_ck_range = true; - boost::range::make_heap(_clustering_rows, _heap_cmp); - } - - // Valid if has_more_rows() - const rows_entry& pop_clustering_row() { - boost::range::pop_heap(_clustering_rows, _heap_cmp); - auto& current = _clustering_rows.back(); - const rows_entry& e = *current._position; - current._position = std::next(current._position); - if (current._position == current._end) { - _clustering_rows.pop_back(); - } else { - boost::range::push_heap(_clustering_rows, _heap_cmp); - } - return e; - } - - // Valid if has_more_rows() - const rows_entry& peek_row() const { - return *_clustering_rows.front()._position; - } - - bool has_more_rows() const { - return !_clustering_rows.empty(); - } - - mutation_fragment_opt read_static_row() { - _last_entry = position_in_partition(position_in_partition::static_row_tag_t()); - mutation_fragment_opt sr; - for (auto&& v : _snapshot->versions()) { - if (!v.partition().static_row().empty()) { - if (!sr) { - sr = mutation_fragment(static_row(v.partition().static_row())); - } else { - sr->as_mutable_static_row().apply(*_schema, v.partition().static_row()); - } - } - } - return sr; - } - - mutation_fragment_opt read_next() { - while (has_more_rows()) { - auto mf = _range_tombstones.get_next(peek_row()); - if (mf) { - return mf; - } - const rows_entry& e = pop_clustering_row(); - if (e.dummy()) { - continue; - } - clustering_row result = e; - while (has_more_rows() && _eq(peek_row().position(), result.position())) { - result.apply(*_schema, pop_clustering_row()); - } - _last_entry = position_in_partition(result.position()); - return mutation_fragment(std::move(result)); - } - return _range_tombstones.get_next(); - } - - void emplace_mutation_fragment(mutation_fragment&& mfopt) { - mfopt.visit(mem_accounter()); - push_mutation_fragment(std::move(mfopt)); - } - - void do_fill_buffer() { - if (!_last_entry) { - auto mfopt = read_static_row(); - if (mfopt) { - emplace_mutation_fragment(std::move(*mfopt)); - } - } - - if (!_in_ck_range || _lsa_region.reclaim_counter() != _reclaim_counter || _snapshot->version_count() != _version_count) { - refresh_iterators(); - _reclaim_counter = _lsa_region.reclaim_counter(); - _version_count = _snapshot->version_count(); - } - - while (!is_end_of_stream() && !is_buffer_full()) { - auto mfopt = read_next(); - if (mfopt) { - emplace_mutation_fragment(std::move(*mfopt)); - } else { - _in_ck_range = false; - _current_ck_range = std::next(_current_ck_range); - refresh_iterators(); - } - } - } - - static tombstone tomb(partition_snapshot& snp) { - tombstone t; - for (auto& v : snp.versions()) { - t.apply(v.partition().partition_tombstone()); - } - return t; - } -public: - template - partition_snapshot_reader(schema_ptr s, dht::decorated_key dk, lw_shared_ptr snp, - query::clustering_key_filter_ranges crr, - logalloc::region& region, logalloc::allocating_section& read_section, - boost::any pointer_to_container, Args&&... args) - : streamed_mutation::impl(s, std::move(dk), tomb(*snp)) - , MemoryAccounter(std::forward(args)...) - , _container_guard(std::move(pointer_to_container)) - , _ck_ranges(std::move(crr)) - , _current_ck_range(_ck_ranges.begin()) - , _ck_range_end(_ck_ranges.end()) - , _cmp(*s) - , _eq(*s) - , _heap_cmp(*s) - , _snapshot(snp) - , _range_tombstones(*s) - , _lsa_region(region) - , _read_section(read_section) { - do_fill_buffer(); - } - - ~partition_snapshot_reader() { - maybe_merge_versions(_snapshot, _lsa_region, _read_section); - } - - virtual future<> fill_buffer() override { - return _read_section(_lsa_region, [&] { - return with_linearized_managed_bytes([&] { - do_fill_buffer(); - return make_ready_future<>(); - }); - }); - } -}; - -template -inline streamed_mutation -make_partition_snapshot_reader(schema_ptr s, - dht::decorated_key dk, - query::clustering_key_filter_ranges crr, - lw_shared_ptr snp, - logalloc::region& region, - logalloc::allocating_section& read_section, - boost::any pointer_to_container, - streamed_mutation::forwarding fwd, - Args&&... args) -{ - auto sm = make_streamed_mutation>(s, std::move(dk), - snp, std::move(crr), region, read_section, std::move(pointer_to_container), std::forward(args)...); - if (fwd) { - return make_forwardable(std::move(sm)); // FIXME: optimize - } else { - return std::move(sm); - } -} - -inline streamed_mutation -make_partition_snapshot_reader(schema_ptr s, - dht::decorated_key dk, - query::clustering_key_filter_ranges crr, - lw_shared_ptr snp, - logalloc::region& region, - logalloc::allocating_section& read_section, - boost::any pointer_to_container, - streamed_mutation::forwarding fwd) -{ - return make_partition_snapshot_reader(std::move(s), - std::move(dk), std::move(crr), std::move(snp), region, read_section, std::move(pointer_to_container), fwd); -} diff --git a/scylla/partition_snapshot_row_cursor.hh b/scylla/partition_snapshot_row_cursor.hh deleted file mode 100644 index 671e6e8..0000000 --- a/scylla/partition_snapshot_row_cursor.hh +++ /dev/null @@ -1,210 +0,0 @@ -/* - * Copyright (C) 2017 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "partition_version.hh" - -// Allows iterating over rows of mutation_partition represented by given partition_snapshot. -// -// The cursor initially has a position before all rows and is not pointing at any row. -// To position the cursor, use advance_to(). -// -// All methods should be called with the region of the snapshot locked. The cursor is invalidated -// when that lock section is left, or if the snapshot is modified. -// -// When the cursor is invalidated, it still maintains its previous position. It can be brought -// back to validity by calling maybe_refresh(), or advance_to(). -// -class partition_snapshot_row_cursor final { - struct position_in_version { - mutation_partition::rows_type::iterator it; - mutation_partition::rows_type::iterator end; - int version_no; - - struct less_compare { - rows_entry::tri_compare _cmp; - public: - explicit less_compare(const schema& s) : _cmp(s) { } - bool operator()(const position_in_version& a, const position_in_version& b) { - auto res = _cmp(*a.it, *b.it); - return res > 0 || (res == 0 && a.version_no > b.version_no); - } - }; - }; - - const schema& _schema; - logalloc::region& _region; - partition_snapshot& _snp; - std::vector _heap; - std::vector _current_row; - position_in_partition _position; - uint64_t _last_reclaim_count = 0; - size_t _last_versions_count = 0; - - // Removes the next row from _heap and puts it into _current_row - void recreate_current_row() { - position_in_version::less_compare heap_less(_schema); - position_in_partition::equal_compare eq(_schema); - do { - boost::range::pop_heap(_heap, heap_less); - _current_row.push_back(_heap.back()); - _heap.pop_back(); - } while (!_heap.empty() && eq(_current_row[0].it->position(), _heap[0].it->position())); - _position = position_in_partition(_current_row[0].it->position()); - } -public: - partition_snapshot_row_cursor(const schema& s, logalloc::region& region, partition_snapshot& snp) - : _schema(s) - , _region(region) - , _snp(snp) - , _position(position_in_partition::static_row_tag_t{}) - { } - bool has_up_to_date_row_from_latest_version() const { - return up_to_date() && _current_row[0].version_no == 0; - } - mutation_partition::rows_type::iterator get_iterator_in_latest_version() const { - return _current_row[0].it; - } - bool up_to_date() const { - return _region.reclaim_counter() == _last_reclaim_count && _last_versions_count == _snp.version_count(); - } - - // Brings back the cursor to validity. - // Can be only called when cursor is pointing at a row. - // - // Semantically equivalent to: - // - // advance_to(position()); - // - // but avoids work if not necessary. - bool maybe_refresh() { - if (!up_to_date()) { - return advance_to(_position); - } - return true; - } - - // Moves the cursor to the first entry with position >= pos. - // - // The caller must ensure that such entry exists. - // - // Returns true iff there can't be any clustering row entries - // between lower_bound (inclusive) and the entry to which the cursor - // was advanced. - // - // May be called when cursor is not valid. - // The cursor is valid after the call. - // Must be called under reclaim lock. - bool advance_to(position_in_partition_view lower_bound) { - rows_entry::compare less(_schema); - position_in_version::less_compare heap_less(_schema); - _heap.clear(); - _current_row.clear(); - int version_no = 0; - for (auto&& v : _snp.versions()) { - auto& rows = v.partition().clustered_rows(); - auto pos = rows.lower_bound(lower_bound, less); - auto end = rows.end(); - if (pos != end) { - _heap.push_back({pos, end, version_no}); - } - ++version_no; - } - boost::range::make_heap(_heap, heap_less); - _last_reclaim_count = _region.reclaim_counter(); - _last_versions_count = _snp.version_count(); - bool found = no_clustering_row_between(_schema, lower_bound, _heap[0].it->position()); - recreate_current_row(); - return found; - } - - // Advances the cursor to the next row. - // If there is no next row, returns false and the cursor is no longer pointing at a row. - // Can be only called on a valid cursor pointing at a row. - bool next() { - position_in_version::less_compare heap_less(_schema); - assert(up_to_date()); - for (auto&& curr : _current_row) { - ++curr.it; - if (curr.it != curr.end) { - _heap.push_back(curr); - boost::range::push_heap(_heap, heap_less); - } - } - _current_row.clear(); - if (_heap.empty()) { - return false; - } - recreate_current_row(); - return true; - } - - // Can be called only when cursor is valid and pointing at a row. - bool continuous() const { return bool(_current_row[0].it->continuous()); } - - // Can be called only when cursor is valid and pointing at a row. - bool dummy() const { return bool(_current_row[0].it->dummy()); } - - // Can be called only when cursor is valid and pointing at a row, and !dummy(). - const clustering_key& key() const { return _current_row[0].it->key(); } - - // Can be called only when cursor is valid and pointing at a row. - mutation_fragment row() const { - auto it = _current_row.begin(); - auto mf = mutation_fragment(clustering_row(*it->it)); - auto& cr = mf.as_mutable_clustering_row(); - for (++it; it != _current_row.end(); ++it) { - cr.apply(_schema, *it->it); - } - return mf; - } - - // Can be called when cursor is pointing at a row, even when invalid. - const position_in_partition& position() const { - return _position; - } - - bool is_in_latest_version() const; - bool previous_row_in_latest_version_has_key(const clustering_key_prefix& key) const; - void set_continuous(bool val); -}; - -inline -bool partition_snapshot_row_cursor::is_in_latest_version() const { - return _current_row[0].version_no == 0; -} - -inline -bool partition_snapshot_row_cursor::previous_row_in_latest_version_has_key(const clustering_key_prefix& key) const { - if (_current_row[0].it == _snp.version()->partition().clustered_rows().begin()) { - return false; - } - auto prev_it = _current_row[0].it; - --prev_it; - clustering_key_prefix::equality eq(_schema); - return eq(prev_it->key(), key); -} - -inline -void partition_snapshot_row_cursor::set_continuous(bool val) { - _current_row[0].it->set_continuous(val); -} diff --git a/scylla/partition_version.cc b/scylla/partition_version.cc deleted file mode 100644 index fa52861..0000000 --- a/scylla/partition_version.cc +++ /dev/null @@ -1,574 +0,0 @@ -/* - * Copyright (C) 2016 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include -#include - -#include "partition_version.hh" - -static void remove_or_mark_as_unique_owner(partition_version* current) -{ - while (current && !current->is_referenced()) { - auto next = current->next(); - current_allocator().destroy(current); - current = next; - } - if (current) { - current->back_reference().mark_as_unique_owner(); - } -} - -partition_version::partition_version(partition_version&& pv) noexcept - : anchorless_list_base_hook(std::move(pv)) - , _backref(pv._backref) - , _partition(std::move(pv._partition)) -{ - if (_backref) { - _backref->_version = this; - } - pv._backref = nullptr; -} - -partition_version& partition_version::operator=(partition_version&& pv) noexcept -{ - if (this != &pv) { - this->~partition_version(); - new (this) partition_version(std::move(pv)); - } - return *this; -} - -partition_version::~partition_version() -{ - if (_backref) { - _backref->_version = nullptr; - } -} - -namespace { - -GCC6_CONCEPT( - -// A functor which transforms objects from Domain into objects from CoDomain -template -concept bool Mapper() { - return requires(U obj, const Domain& src) { - { obj(src) } -> const CoDomain& - }; -} - -// A functor which merges two objects from Domain into one. The result is stored in the first argument. -template -concept bool Reducer() { - return requires(U obj, Domain& dst, const Domain& src) { - { obj(dst, src) } -> void; - }; -} - -) - -// Calculates the value of particular part of mutation_partition represented by -// the version chain starting from v. -// |map| extracts the part from each version. -// |reduce| Combines parts from the two versions. -template -GCC6_CONCEPT( -requires Mapper() && Reducer() -) -inline Result squashed(const partition_version_ref& v, Map&& map, Reduce&& reduce) { - Result r = map(v->partition()); - auto it = v->next(); - while (it) { - reduce(r, map(it->partition())); - it = it->next(); - } - return r; -} - -} - -row partition_snapshot::static_row() const { - return ::squashed(version(), - [] (const mutation_partition& mp) -> const row& { return mp.static_row(); }, - [this] (row& a, const row& b) { a.apply(*_schema, column_kind::static_column, b); }); -} - -tombstone partition_snapshot::partition_tombstone() const { - return ::squashed(version(), - [] (const mutation_partition& mp) { return mp.partition_tombstone(); }, - [] (tombstone& a, tombstone b) { a.apply(b); }); -} - -mutation_partition partition_snapshot::squashed() const { - return ::squashed(version(), - [] (const mutation_partition& mp) -> const mutation_partition& { return mp; }, - [this] (mutation_partition& a, const mutation_partition& b) { a.apply(*_schema, b, *_schema); }); -} - -tombstone partition_entry::partition_tombstone() const { - return ::squashed(_version, - [] (const mutation_partition& mp) { return mp.partition_tombstone(); }, - [] (tombstone& a, tombstone b) { a.apply(b); }); -} - -partition_snapshot::~partition_snapshot() { - if (_version && _version.is_unique_owner()) { - auto v = &*_version; - _version = {}; - remove_or_mark_as_unique_owner(v); - } else if (_entry) { - _entry->_snapshot = nullptr; - } -} - -void partition_snapshot::merge_partition_versions() { - if (_version && !_version.is_unique_owner()) { - auto v = &*_version; - _version = { }; - auto first_used = v; - while (first_used->prev() && !first_used->is_referenced()) { - first_used = first_used->prev(); - } - - auto current = first_used->next(); - while (current && !current->is_referenced()) { - auto next = current->next(); - try { - first_used->partition().apply(*_schema, std::move(current->partition())); - current_allocator().destroy(current); - } catch (...) { - // Set _version so that the merge can be retried. - _version = partition_version_ref(*current); - throw; - } - current = next; - } - } -} - -unsigned partition_snapshot::version_count() -{ - unsigned count = 0; - for (auto&& v : versions()) { - (void)v; - count++; - } - return count; -} - -partition_entry::partition_entry(mutation_partition mp) -{ - auto new_version = current_allocator().construct(std::move(mp)); - _version = partition_version_ref(*new_version); -} - -partition_entry::~partition_entry() { - if (!_version) { - return; - } - if (_snapshot) { - _snapshot->_version = std::move(_version); - _snapshot->_version.mark_as_unique_owner(); - _snapshot->_entry = nullptr; - } else { - auto v = &*_version; - _version = { }; - remove_or_mark_as_unique_owner(v); - } -} - -void partition_entry::set_version(partition_version* new_version) -{ - if (_snapshot) { - _snapshot->_version = std::move(_version); - _snapshot->_entry = nullptr; - } - - _snapshot = nullptr; - _version = partition_version_ref(*new_version); -} - -void partition_entry::apply(const schema& s, const mutation_partition& mp, const schema& mp_schema) -{ - if (!_snapshot) { - _version->partition().apply(s, mp, mp_schema); - } else { - mutation_partition mp1 = mp; - if (s.version() != mp_schema.version()) { - mp1.upgrade(mp_schema, s); - } - auto new_version = current_allocator().construct(std::move(mp1)); - new_version->insert_before(*_version); - - set_version(new_version); - } -} - -void partition_entry::apply(const schema& s, mutation_partition_view mpv, const schema& mp_schema) -{ - if (!_snapshot) { - _version->partition().apply(s, mpv, mp_schema); - } else { - mutation_partition mp(s.shared_from_this()); - mp.apply(s, mpv, mp_schema); - auto new_version = current_allocator().construct(std::move(mp)); - new_version->insert_before(*_version); - - set_version(new_version); - } -} - -// Iterates over all rows in mutation represented by partition_entry. -// It abstracts away the fact that rows may be spread across multiple versions. -class partition_entry::rows_iterator final { - struct version { - mutation_partition::rows_type::iterator current_row; - mutation_partition::rows_type* rows; - bool can_move; - struct compare { - const rows_entry::tri_compare& _cmp; - public: - explicit compare(const rows_entry::tri_compare& cmp) : _cmp(cmp) { } - bool operator()(const version& a, const version& b) const { - return _cmp(*a.current_row, *b.current_row) > 0; - } - }; - }; - const schema& _schema; - rows_entry::tri_compare _rows_cmp; - rows_entry::compare _rows_less_cmp; - version::compare _version_cmp; - std::vector _heap; - std::vector _current_row; -public: - rows_iterator(partition_version* version, const schema& schema) - : _schema(schema) - , _rows_cmp(schema) - , _rows_less_cmp(schema) - , _version_cmp(_rows_cmp) - { - bool can_move = true; - while (version) { - can_move &= !version->is_referenced(); - auto& rows = version->partition().clustered_rows(); - if (!rows.empty()) { - _heap.push_back({rows.begin(), &rows, can_move}); - } - version = version->next(); - } - boost::range::make_heap(_heap, _version_cmp); - move_to_next_row(); - } - bool done() const { - return _current_row.empty(); - } - // Return clustering key of the current row in source. - // Valid only when !is_dummy(). - const clustering_key& key() const { - return _current_row[0].current_row->key(); - } - bool is_dummy() const { - return bool(_current_row[0].current_row->dummy()); - } - template - void consume_row(RowConsumer&& consumer) { - assert(!_current_row.empty()); - // versions in _current_row are not ordered but it is not a problem - // due to the fact that all rows are continuous. - for (version& v : _current_row) { - if (!v.can_move) { - consumer(deletable_row(v.current_row->row())); - } else { - consumer(std::move(v.current_row->row())); - } - } - } - void remove_current_row_when_possible() { - assert(!_current_row.empty()); - auto deleter = current_deleter(); - for (version& v : _current_row) { - if (v.can_move) { - v.rows->erase_and_dispose(v.current_row, deleter); - } - } - } - void move_to_next_row() { - _current_row.clear(); - while (!_heap.empty() && - (_current_row.empty() || _rows_cmp(*_current_row[0].current_row, *_heap[0].current_row) == 0)) { - boost::range::pop_heap(_heap, _version_cmp); - auto& curr = _heap.back(); - _current_row.push_back({curr.current_row, curr.rows, curr.can_move}); - ++curr.current_row; - if (curr.current_row == curr.rows->end()) { - _heap.pop_back(); - } else { - boost::range::push_heap(_heap, _version_cmp); - } - } - } -}; - -namespace { - -// When applying partition_entry to an incomplete partition_entry this class is used to represent -// the target incomplete partition_entry. It encapsulates the logic needed for handling multiple versions. -class apply_incomplete_target final { - struct version { - mutation_partition::rows_type::iterator current_row; - mutation_partition::rows_type* rows; - size_t version_no; - - struct compare { - const rows_entry::tri_compare& _cmp; - public: - explicit compare(const rows_entry::tri_compare& cmp) : _cmp(cmp) { } - bool operator()(const version& a, const version& b) const { - auto res = _cmp(*a.current_row, *b.current_row); - return res > 0 || (res == 0 && a.version_no > b.version_no); - } - }; - }; - const schema& _schema; - partition_entry& _pe; - rows_entry::tri_compare _rows_cmp; - rows_entry::compare _rows_less_cmp; - version::compare _version_cmp; - std::vector _heap; - mutation_partition::rows_type::iterator _next_in_latest_version; -public: - apply_incomplete_target(partition_entry& pe, const schema& schema) - : _schema(schema) - , _pe(pe) - , _rows_cmp(schema) - , _rows_less_cmp(schema) - , _version_cmp(_rows_cmp) - { - size_t version_no = 0; - _next_in_latest_version = pe.version()->partition().clustered_rows().begin(); - for (auto&& v : pe.version()->elements_from_this()) { - if (!v.partition().clustered_rows().empty()) { - _heap.push_back({v.partition().clustered_rows().begin(), &v.partition().clustered_rows(), version_no}); - } - ++version_no; - } - boost::range::make_heap(_heap, _version_cmp); - } - // Applies the row from source. - // Must be called for rows with monotonic keys. - // Weak exception guarantees. The target and source partitions are left - // in a state such that the two still commute to the same value on retry. - void apply(partition_entry::rows_iterator& src) { - auto&& key = src.key(); - while (!_heap.empty() && _rows_less_cmp(*_heap[0].current_row, key)) { - boost::range::pop_heap(_heap, _version_cmp); - auto& curr = _heap.back(); - curr.current_row = curr.rows->lower_bound(key, _rows_less_cmp); - if (curr.version_no == 0) { - _next_in_latest_version = curr.current_row; - } - if (curr.current_row == curr.rows->end()) { - _heap.pop_back(); - } else { - boost::range::push_heap(_heap, _version_cmp); - } - } - - if (!_heap.empty()) { - rows_entry& next_row = *_heap[0].current_row; - if (_rows_cmp(key, next_row) == 0) { - if (next_row.dummy()) { - return; - } - } else if (!next_row.continuous()) { - return; - } - } - - mutation_partition::rows_type& rows = _pe.version()->partition().clustered_rows(); - if (_next_in_latest_version != rows.end() && _rows_cmp(key, *_next_in_latest_version) == 0) { - src.consume_row([&] (deletable_row&& row) { - _next_in_latest_version->row().apply(_schema, std::move(row)); - }); - } else { - auto e = current_allocator().construct(key); - e->set_continuous(_heap.empty() ? is_continuous::yes : _heap[0].current_row->continuous()); - rows.insert_before(_next_in_latest_version, *e); - src.consume_row([&] (deletable_row&& row) { - e->row().apply(_schema, std::move(row)); - }); - } - } -}; - -} // namespace - -template -void partition_entry::with_detached_versions(Func&& func) { - partition_version* current = &*_version; - auto snapshot = _snapshot; - if (snapshot) { - snapshot->_version = std::move(_version); - snapshot->_entry = nullptr; - _snapshot = nullptr; - } - _version = { }; - - auto revert = defer([&] { - if (snapshot) { - _snapshot = snapshot; - snapshot->_entry = this; - _version = std::move(snapshot->_version); - } else { - _version = partition_version_ref(*current); - } - }); - - func(current); -} - -void partition_entry::apply_to_incomplete(const schema& s, partition_entry&& pe, const schema& pe_schema) -{ - if (s.version() != pe_schema.version()) { - partition_entry entry(pe.squashed(pe_schema.shared_from_this(), s.shared_from_this())); - entry.with_detached_versions([&] (partition_version* v) { - apply_to_incomplete(s, v); - }); - } else { - pe.with_detached_versions([&](partition_version* v) { - apply_to_incomplete(s, v); - }); - } -} - -void partition_entry::apply_to_incomplete(const schema& s, partition_version* version) { - partition_version& dst = open_version(s); - - bool can_move = true; - auto current = version; - bool static_row_continuous = dst.partition().static_row_continuous(); - while (current) { - can_move &= !current->is_referenced(); - dst.partition().apply(current->partition().partition_tombstone()); - if (static_row_continuous) { - row& static_row = dst.partition().static_row(); - if (can_move) { - static_row.apply(s, column_kind::static_column, std::move(current->partition().static_row())); - } else { - static_row.apply(s, column_kind::static_column, current->partition().static_row()); - } - } - range_tombstone_list& tombstones = dst.partition().row_tombstones(); - if (can_move) { - tombstones.apply_reversibly(s, current->partition().row_tombstones()).cancel(); - } else { - tombstones.apply(s, current->partition().row_tombstones()); - } - current = current->next(); - } - - partition_entry::rows_iterator source(version, s); - apply_incomplete_target target(*this, s); - - while (!source.done()) { - if (!source.is_dummy()) { - target.apply(source); - } - source.remove_current_row_when_possible(); - source.move_to_next_row(); - } -} - -mutation_partition partition_entry::squashed(schema_ptr from, schema_ptr to) -{ - mutation_partition mp(to); - mp.set_static_row_continuous(_version->partition().static_row_continuous()); - for (auto&& v : _version->all_elements()) { - mp.apply(*to, v.partition(), *from); - } - return mp; -} - -mutation_partition partition_entry::squashed(const schema& s) -{ - return squashed(s.shared_from_this(), s.shared_from_this()); -} - -void partition_entry::upgrade(schema_ptr from, schema_ptr to) -{ - auto new_version = current_allocator().construct(mutation_partition(to)); - new_version->partition().set_static_row_continuous(_version->partition().static_row_continuous()); - try { - for (auto&& v : _version->all_elements()) { - new_version->partition().apply(*to, v.partition(), *from); - } - } catch (...) { - current_allocator().destroy(new_version); - throw; - } - - auto old_version = &*_version; - set_version(new_version); - remove_or_mark_as_unique_owner(old_version); -} - -lw_shared_ptr partition_entry::read(schema_ptr entry_schema, partition_snapshot::phase_type phase) -{ - open_version(*entry_schema, phase); - if (_snapshot) { - return _snapshot->shared_from_this(); - } else { - auto snp = make_lw_shared(entry_schema, this, phase); - _snapshot = snp.get(); - return snp; - } -} - -std::vector -partition_snapshot::range_tombstones(const schema& s, position_in_partition_view start, position_in_partition_view end) -{ - range_tombstone_list list(s); - for (auto&& v : versions()) { - for (auto&& rt : v.partition().row_tombstones().slice(s, start, end)) { - list.apply(s, rt); - } - } - return boost::copy_range>(list); -} - -std::ostream& operator<<(std::ostream& out, partition_entry& e) { - out << "{"; - bool first = true; - if (e._version) { - for (const partition_version& v : e.versions()) { - if (!first) { - out << ", "; - } - if (v.is_referenced()) { - out << "(*) "; - } - out << v.partition(); - first = false; - } - } - out << "}"; - return out; -} diff --git a/scylla/partition_version.hh b/scylla/partition_version.hh deleted file mode 100644 index a1b1008..0000000 --- a/scylla/partition_version.hh +++ /dev/null @@ -1,363 +0,0 @@ -/* - * Copyright (C) 2016 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "mutation_partition.hh" -#include "streamed_mutation.hh" -#include "utils/anchorless_list.hh" -#include "utils/logalloc.hh" - -// This is MVCC implementation for mutation_partitions. -// -// It is assumed that mutation_partitions are stored in some sort of LSA-managed -// container (memtable or row cache). -// -// partition_entry - the main handle to the mutation_partition, allows writes -// and reads. -// partition_version - mutation_partition inside a list of partition versions. -// mutation_partition represents just a difference against -// the next one in the list. To get a single -// mutation_partition fully representing this version one -// needs to merge this one and all its successors in the -// list. -// partition_snapshot - a handle to some particular partition_version. It allows -// only reads and itself is immutable the partition version -// it represents won't be modified as long as the snapshot -// is alive. -// -// pe - partition_entry -// pv - partition_version -// ps - partition_snapshot -// ps(u) - partition_snapshot marked as unique owner - -// Scene I. Write-only loads -// pv -// ^ -// | -// pe -// In case of write-only loads all incoming mutations are directly applied -// to the partition_version that partition_entry is pointing to. The list -// of partition_versions contains only a single element. -// -// Scene II. Read-only loads -// pv -// ^ -// | -// pe <- ps -// In case of read-only scenarios there is only a single partition_snapshot -// object that points to the partition_entry. There is only a single -// partition_version. -// -// Scene III. Writes and reads -// pv -- pv -- pv -// ^ ^ ^ -// | | | -// pe ps ps -// If the partition_entry that needs to be modified is currently read from (i.e. -// there exist a partition_snapshot pointing to it) instead of applying new -// mutation directly a new partition version is created and added at the front -// of the list. partition_entry points to the new version (so that it has the -// most recent view of stored data) while the partition_snapshot points to the -// same partition_version it pointed to before (so that the data it sees doesn't -// change). -// As a result the list may contain multiple partition versions used by -// different partition snapshots. -// When the partition_snapshot is destroyed partition_versions are squashed -// together to minimize the amount of elements on the list. -// -// Scene IV. Schema upgrade -// pv pv --- pv -// ^ ^ ^ -// | | | -// pe ps(u) ps -// When there is a schema upgrade the list of partition versions pointed to -// by partition_entry is replaced by a new single partition_version that is a -// result of squashing and upgrading the old versions. -// Old versions not used by any partition snapshot are removed. The first -// partition snapshot on the list is marked as unique which means that upon -// its destruction it won't attempt to squash versions but instead remove -// the unused ones and pass the "unique owner" mark the next snapshot on the -// list (if there is any). -// -// Scene V. partition_entry eviction -// pv -// ^ -// | -// ps(u) -// When partition_entry is removed (e.g. because it was evicted from cache) -// the partition versions are removed in a similar manner than in the schema -// upgrade scenario. The unused ones are destroyed right away and the first -// snapshot on the list is marked as unique owner so that on its destruction -// it continues removal of the partition versions. - -class partition_version_ref; - -class partition_version : public anchorless_list_base_hook { - partition_version_ref* _backref = nullptr; - mutation_partition _partition; - - friend class partition_version_ref; -public: - explicit partition_version(schema_ptr s) noexcept - : _partition(std::move(s)) { } - explicit partition_version(mutation_partition mp) noexcept - : _partition(std::move(mp)) { } - partition_version(partition_version&& pv) noexcept; - partition_version& operator=(partition_version&& pv) noexcept; - ~partition_version(); - - mutation_partition& partition() { return _partition; } - const mutation_partition& partition() const { return _partition; } - - bool is_referenced() const { return _backref; } - partition_version_ref& back_reference() { return *_backref; } -}; - -using partition_version_range = anchorless_list_base_hook::range; - -class partition_version_ref { - partition_version* _version = nullptr; - bool _unique_owner = false; - - friend class partition_version; -public: - partition_version_ref() = default; - explicit partition_version_ref(partition_version& pv) noexcept : _version(&pv) { - assert(!_version->_backref); - _version->_backref = this; - } - ~partition_version_ref() { - if (_version) { - _version->_backref = nullptr; - } - } - partition_version_ref(partition_version_ref&& other) noexcept : _version(other._version) { - if (_version) { - _version->_backref = this; - } - other._version = nullptr; - } - partition_version_ref& operator=(partition_version_ref&& other) noexcept { - if (this != &other) { - this->~partition_version_ref(); - new (this) partition_version_ref(std::move(other)); - } - return *this; - } - - explicit operator bool() const { return _version; } - - partition_version& operator*() { - assert(_version); - return *_version; - } - partition_version* operator->() { - assert(_version); - return _version; - } - const partition_version* operator->() const { - assert(_version); - return _version; - } - - bool is_unique_owner() const { return _unique_owner; } - void mark_as_unique_owner() { _unique_owner = true; } -}; - -class partition_entry; - -class partition_snapshot : public enable_lw_shared_from_this { -public: - // Only snapshots created with the same value of phase can point to the same version. - using phase_type = uint64_t; - static constexpr phase_type default_phase = 0; - static constexpr phase_type max_phase = std::numeric_limits::max(); -private: - schema_ptr _schema; - // Either _version or _entry is non-null. - partition_version_ref _version; - partition_entry* _entry; - phase_type _phase; - - friend class partition_entry; -public: - explicit partition_snapshot(schema_ptr s, - partition_entry* entry, - phase_type phase = default_phase) - : _schema(std::move(s)), _entry(entry), _phase(phase) { } - partition_snapshot(const partition_snapshot&) = delete; - partition_snapshot(partition_snapshot&&) = delete; - partition_snapshot& operator=(const partition_snapshot&) = delete; - partition_snapshot& operator=(partition_snapshot&&) = delete; - - // If possible merges the version pointed to by this snapshot with - // adjacent partition versions. Leaves the snapshot in an unspecified state. - // Can be retried if previous merge attempt has failed. - void merge_partition_versions(); - - ~partition_snapshot(); - - partition_version_ref& version(); - - const partition_version_ref& version() const; - - partition_version_range versions() { - return version()->elements_from_this(); - } - - unsigned version_count(); - - bool at_latest_version() const { - return _entry != nullptr; - } - - tombstone partition_tombstone() const; - row static_row() const; - mutation_partition squashed() const; - // Returns range tombstones overlapping with [start, end) - std::vector range_tombstones(const schema& s, position_in_partition_view start, position_in_partition_view end); -}; - -// Represents mutation_partition with snapshotting support a la MVCC. -// -// Internally the state is represented by an ordered list of mutation_partition -// objects called versions. The logical mutation_partition state represented -// by that chain is equal to reducing the chain using mutation_partition::apply() -// from left (latest version) to right. -class partition_entry { - partition_snapshot* _snapshot = nullptr; - partition_version_ref _version; - - friend class partition_snapshot; - friend class cache_entry; -private: - // Detaches all versions temporarily around execution of the function. - // The function receives partition_version* pointing to the latest version. - template - void with_detached_versions(Func&&); - - void set_version(partition_version*); - - void apply_to_incomplete(const schema& s, partition_version* other); -public: - class rows_iterator; - partition_entry() = default; - explicit partition_entry(mutation_partition mp); - ~partition_entry(); - - partition_entry(partition_entry&& pe) noexcept - : _snapshot(pe._snapshot), _version(std::move(pe._version)) - { - if (_snapshot) { - _snapshot->_entry = this; - } - pe._snapshot = nullptr; - } - partition_entry& operator=(partition_entry&& other) noexcept { - if (this != &other) { - this->~partition_entry(); - new (this) partition_entry(std::move(other)); - } - return *this; - } - - partition_version_ref& version() { - return _version; - } - - partition_version_range versions() { - return _version->elements_from_this(); - } - - // Strong exception guarantees. - // Assumes this instance and mp are fully continuous. - void apply(const schema& s, const mutation_partition& mp, const schema& mp_schema); - - // Strong exception guarantees. - // Assumes this instance and mpv are fully continuous. - void apply(const schema& s, mutation_partition_view mpv, const schema& mp_schema); - - // Adds mutation_partition represented by "other" to the one represented - // by this entry. - // - // The argument must be fully-continuous. - // - // The rules of addition differ from that used by regular - // mutation_partition addition with regards to continuity. The continuity - // of the result is the same as in this instance. Information from "other" - // which is incomplete in this instance is dropped. In other words, this - // performs set intersection on continuity information, drops information - // which falls outside of the continuity range, and applies regular merging - // rules for the rest. - // - // Weak exception guarantees. - // If an exception is thrown this and pe will be left in some valid states - // such that if the operation is retried (possibly many times) and eventually - // succeeds the result will be as if the first attempt didn't fail. - void apply_to_incomplete(const schema& s, partition_entry&& pe, const schema& pe_schema); - - // Ensures that the latest version can be populated with data from given phase - // by inserting a new version if necessary. - // Doesn't affect value or continuity of the partition. - // Returns a reference to the new latest version. - partition_version& open_version(const schema& s, partition_snapshot::phase_type phase = partition_snapshot::max_phase) { - if (_snapshot && _snapshot->_phase != phase) { - auto new_version = current_allocator().construct(mutation_partition(s.shared_from_this())); - new_version->partition().set_static_row_continuous(_version->partition().static_row_continuous()); - new_version->insert_before(*_version); - set_version(new_version); - return *new_version; - } - return *_version; - } - - mutation_partition squashed(schema_ptr from, schema_ptr to); - mutation_partition squashed(const schema&); - tombstone partition_tombstone() const; - - // needs to be called with reclaiming disabled - void upgrade(schema_ptr from, schema_ptr to); - - // Snapshots with different values of phase will point to different partition_version objects. - lw_shared_ptr read(schema_ptr entry_schema, - partition_snapshot::phase_type phase = partition_snapshot::default_phase); - - friend std::ostream& operator<<(std::ostream& out, partition_entry& e); -}; - -inline partition_version_ref& partition_snapshot::version() -{ - if (_version) { - return _version; - } else { - return _entry->_version; - } -} - -inline const partition_version_ref& partition_snapshot::version() const -{ - if (_version) { - return _version; - } else { - return _entry->_version; - } -} diff --git a/scylla/position_in_partition.hh b/scylla/position_in_partition.hh deleted file mode 100644 index 765a1a7..0000000 --- a/scylla/position_in_partition.hh +++ /dev/null @@ -1,462 +0,0 @@ -/* - * Copyright (C) 2017 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "types.hh" -#include "keys.hh" -#include "clustering_bounds_comparator.hh" -#include "query-request.hh" - -inline -lexicographical_relation relation_for_lower_bound(composite_view v) { - switch (v.last_eoc()) { - case composite::eoc::start: - case composite::eoc::none: - return lexicographical_relation::before_all_prefixed; - case composite::eoc::end: - return lexicographical_relation::after_all_prefixed; - default: - assert(0); - } -} - -inline -lexicographical_relation relation_for_upper_bound(composite_view v) { - switch (v.last_eoc()) { - case composite::eoc::start: - return lexicographical_relation::before_all_prefixed; - case composite::eoc::none: - return lexicographical_relation::before_all_strictly_prefixed; - case composite::eoc::end: - return lexicographical_relation::after_all_prefixed; - default: - assert(0); - } -} - -class position_in_partition_view { - friend class position_in_partition; - - int _bound_weight = 0; - const clustering_key_prefix* _ck; // nullptr for static row -private: - position_in_partition_view(int bound_weight, const clustering_key_prefix* ck) - : _bound_weight(bound_weight) - , _ck(ck) - { } - // Returns placement of this position_in_partition relative to *_ck, - // or lexicographical_relation::at_prefix if !_ck. - lexicographical_relation relation() const { - // FIXME: Currently position_range cannot represent a range end bound which - // includes just the prefix key or a range start which excludes just a prefix key. - // In both cases we should return lexicographical_relation::before_all_strictly_prefixed here. - // Refs #1446. - if (_bound_weight <= 0) { - return lexicographical_relation::before_all_prefixed; - } else { - return lexicographical_relation::after_all_prefixed; - } - } -public: - struct static_row_tag_t { }; - struct clustering_row_tag_t { }; - struct range_tag_t { }; - using range_tombstone_tag_t = range_tag_t; - - position_in_partition_view(static_row_tag_t) : _ck(nullptr) { } - position_in_partition_view(clustering_row_tag_t, const clustering_key_prefix& ck) - : _ck(&ck) { } - position_in_partition_view(const clustering_key_prefix& ck) - : _ck(&ck) { } - position_in_partition_view(range_tag_t, bound_view bv) - : _bound_weight(weight(bv.kind)), _ck(&bv.prefix) { } - - static position_in_partition_view for_range_start(const query::clustering_range& r) { - return {position_in_partition_view::range_tag_t(), bound_view::from_range_start(r)}; - } - - static position_in_partition_view for_range_end(const query::clustering_range& r) { - return {position_in_partition_view::range_tag_t(), bound_view::from_range_end(r)}; - } - - static position_in_partition_view before_all_clustered_rows() { - return {range_tag_t(), bound_view::bottom()}; - } - - static position_in_partition_view after_all_clustered_rows() { - return {position_in_partition_view::range_tag_t(), bound_view::top()}; - } - - static position_in_partition_view for_static_row() { - return {static_row_tag_t()}; - } - - static position_in_partition_view for_key(const clustering_key& ck) { - return {clustering_row_tag_t(), ck}; - } - - static position_in_partition_view after_key(const clustering_key& ck) { - return {1, &ck}; - } - - bool is_static_row() const { return !_ck; } - bool is_clustering_row() const { return _ck && !_bound_weight; } - - // Returns true if all fragments that can be seen for given schema have - // positions >= than this. - bool is_before_all_fragments(const schema& s) const { - return !_ck || (!s.has_static_columns() && _bound_weight < 0 && _ck->is_empty(s)); - } - - bool is_after_all_clustered_rows(const schema& s) const { - return _ck && _ck->is_empty(s) && _bound_weight > 0; - } - - // Valid when >= before_all_clustered_rows() - const clustering_key_prefix& key() const { - return *_ck; - } - - // Can be called only when !is_static_row && !is_clustering_row(). - bound_view as_start_bound_view() const { - assert(_bound_weight != 0); - return bound_view(*_ck, _bound_weight < 0 ? bound_kind::incl_start : bound_kind::excl_start); - } - - friend std::ostream& operator<<(std::ostream&, position_in_partition_view); - friend bool no_clustering_row_between(const schema&, position_in_partition_view, position_in_partition_view); -}; - -class position_in_partition { - int _bound_weight = 0; - stdx::optional _ck; -public: - struct static_row_tag_t { }; - struct after_static_row_tag_t { }; - struct clustering_row_tag_t { }; - struct after_clustering_row_tag_t { }; - struct range_tag_t { }; - using range_tombstone_tag_t = range_tag_t; - - explicit position_in_partition(static_row_tag_t) { } - position_in_partition(clustering_row_tag_t, clustering_key_prefix ck) - : _ck(std::move(ck)) { } - position_in_partition(after_clustering_row_tag_t, clustering_key_prefix ck) - // FIXME: Use lexicographical_relation::before_strictly_prefixed here. Refs #1446 - : _bound_weight(1), _ck(std::move(ck)) { } - position_in_partition(range_tag_t, bound_view bv) - : _bound_weight(weight(bv.kind)), _ck(bv.prefix) { } - position_in_partition(after_static_row_tag_t) : - position_in_partition(range_tag_t(), bound_view::bottom()) { } - explicit position_in_partition(position_in_partition_view view) - : _bound_weight(view._bound_weight) - { - if (view._ck) { - _ck = *view._ck; - } - } - - static position_in_partition before_all_clustered_rows() { - return {position_in_partition::range_tag_t(), bound_view::bottom()}; - } - - static position_in_partition after_all_clustered_rows() { - return {position_in_partition::range_tag_t(), bound_view::top()}; - } - - static position_in_partition after_key(clustering_key ck) { - return {after_clustering_row_tag_t(), std::move(ck)}; - } - - static position_in_partition for_key(clustering_key ck) { - return {clustering_row_tag_t(), std::move(ck)}; - } - - static position_in_partition for_range_start(const query::clustering_range&); - static position_in_partition for_range_end(const query::clustering_range&); - - bool is_static_row() const { return !_ck; } - bool is_clustering_row() const { return _ck && !_bound_weight; } - - bool is_after_all_clustered_rows(const schema& s) const { - return _ck && _ck->is_empty(s) && _bound_weight > 0; - } - - template - void feed_hash(Hasher& hasher, const schema& s) const { - ::feed_hash(hasher, _bound_weight); - if (_ck) { - ::feed_hash(hasher, true); - _ck->feed_hash(hasher, s); - } else { - ::feed_hash(hasher, false); - } - } - - clustering_key_prefix& key() { - return *_ck; - } - const clustering_key_prefix& key() const { - return *_ck; - } - operator position_in_partition_view() const { - return { _bound_weight, _ck ? &*_ck : nullptr }; - } - - // Defines total order on the union of position_and_partition and composite objects. - // - // The ordering is compatible with position_range (r). The following is satisfied for - // all cells with name c included by the range: - // - // r.start() <= c < r.end() - // - // The ordering on composites given by this is compatible with but weaker than the cell name order. - // - // The ordering on position_in_partition given by this is compatible but weaker than the ordering - // given by position_in_partition::tri_compare. - // - class composite_tri_compare { - const schema& _s; - public: - composite_tri_compare(const schema& s) : _s(s) {} - - int operator()(position_in_partition_view a, position_in_partition_view b) const { - if (a.is_static_row() || b.is_static_row()) { - return b.is_static_row() - a.is_static_row(); - } - auto&& types = _s.clustering_key_type()->types(); - auto cmp = [&] (const data_type& t, bytes_view c1, bytes_view c2) { return t->compare(c1, c2); }; - return lexicographical_tri_compare(types.begin(), types.end(), - a._ck->begin(_s), a._ck->end(_s), - b._ck->begin(_s), b._ck->end(_s), - cmp, a.relation(), b.relation()); - } - - int operator()(position_in_partition_view a, composite_view b) const { - if (b.empty()) { - return 1; // a cannot be empty. - } - if (a.is_static_row() || b.is_static()) { - return b.is_static() - a.is_static_row(); - } - auto&& types = _s.clustering_key_type()->types(); - auto b_values = b.values(); - auto cmp = [&] (const data_type& t, bytes_view c1, bytes_view c2) { return t->compare(c1, c2); }; - return lexicographical_tri_compare(types.begin(), types.end(), - a._ck->begin(_s), a._ck->end(_s), - b_values.begin(), b_values.end(), - cmp, a.relation(), relation_for_lower_bound(b)); - } - - int operator()(composite_view a, position_in_partition_view b) const { - return -(*this)(b, a); - } - - int operator()(composite_view a, composite_view b) const { - if (a.is_static() != b.is_static()) { - return a.is_static() ? -1 : 1; - } - auto&& types = _s.clustering_key_type()->types(); - auto a_values = a.values(); - auto b_values = b.values(); - auto cmp = [&] (const data_type& t, bytes_view c1, bytes_view c2) { return t->compare(c1, c2); }; - return lexicographical_tri_compare(types.begin(), types.end(), - a_values.begin(), a_values.end(), - b_values.begin(), b_values.end(), - cmp, - relation_for_lower_bound(a), - relation_for_lower_bound(b)); - } - }; - - // Less comparator giving the same order as composite_tri_compare. - class composite_less_compare { - composite_tri_compare _cmp; - public: - composite_less_compare(const schema& s) : _cmp(s) {} - - template - bool operator()(const T& a, const U& b) const { - return _cmp(a, b) < 0; - } - }; - - class tri_compare { - bound_view::tri_compare _cmp; - private: - template - int compare(const T& a, const U& b) const { - bool a_rt_weight = bool(a._ck); - bool b_rt_weight = bool(b._ck); - if (!a_rt_weight || !b_rt_weight) { - return a_rt_weight - b_rt_weight; - } - return _cmp(*a._ck, a._bound_weight, *b._ck, b._bound_weight); - } - public: - tri_compare(const schema& s) : _cmp(s) { } - int operator()(const position_in_partition& a, const position_in_partition& b) const { - return compare(a, b); - } - int operator()(const position_in_partition_view& a, const position_in_partition_view& b) const { - return compare(a, b); - } - int operator()(const position_in_partition& a, const position_in_partition_view& b) const { - return compare(a, b); - } - int operator()(const position_in_partition_view& a, const position_in_partition& b) const { - return compare(a, b); - } - }; - class less_compare { - tri_compare _cmp; - public: - less_compare(const schema& s) : _cmp(s) { } - bool operator()(const position_in_partition& a, const position_in_partition& b) const { - return _cmp(a, b) < 0; - } - bool operator()(const position_in_partition_view& a, const position_in_partition_view& b) const { - return _cmp(a, b) < 0; - } - bool operator()(const position_in_partition& a, const position_in_partition_view& b) const { - return _cmp(a, b) < 0; - } - bool operator()(const position_in_partition_view& a, const position_in_partition& b) const { - return _cmp(a, b) < 0; - } - }; - class equal_compare { - clustering_key_prefix::equality _equal; - template - bool compare(const T& a, const U& b) const { - bool a_rt_weight = bool(a._ck); - bool b_rt_weight = bool(b._ck); - return a_rt_weight == b_rt_weight - && (!a_rt_weight || (_equal(*a._ck, *b._ck) - && a._bound_weight == b._bound_weight)); - } - public: - equal_compare(const schema& s) : _equal(s) { } - bool operator()(const position_in_partition& a, const position_in_partition& b) const { - return compare(a, b); - } - bool operator()(const position_in_partition_view& a, const position_in_partition_view& b) const { - return compare(a, b); - } - bool operator()(const position_in_partition_view& a, const position_in_partition& b) const { - return compare(a, b); - } - bool operator()(const position_in_partition& a, const position_in_partition_view& b) const { - return compare(a, b); - } - }; - friend std::ostream& operator<<(std::ostream&, const position_in_partition&); -}; - -inline -position_in_partition position_in_partition::for_range_start(const query::clustering_range& r) { - return {position_in_partition::range_tag_t(), bound_view::from_range_start(r)}; -} - -inline -position_in_partition position_in_partition::for_range_end(const query::clustering_range& r) { - return {position_in_partition::range_tag_t(), bound_view::from_range_end(r)}; -} - -// Returns true if and only if there can't be any clustering_row with position > a and < b. -// It is assumed that a <= b. -inline -bool no_clustering_row_between(const schema& s, position_in_partition_view a, position_in_partition_view b) { - clustering_key_prefix::equality eq(s); - if (a._ck && b._ck) { - return eq(*a._ck, *b._ck) && (a._bound_weight >= 0 || b._bound_weight <= 0); - } else { - return !a._ck && !b._ck; - } -} - -// Includes all position_in_partition objects "p" for which: start <= p < end -// And only those. -class position_range { -private: - position_in_partition _start; - position_in_partition _end; -public: - static position_range from_range(const query::clustering_range&); - - static position_range for_static_row() { - return { - position_in_partition(position_in_partition::static_row_tag_t()), - position_in_partition(position_in_partition::after_static_row_tag_t()) - }; - } - - static position_range full() { - return { - position_in_partition(position_in_partition::static_row_tag_t()), - position_in_partition::after_all_clustered_rows() - }; - } - - static position_range all_clustered_rows() { - return { - position_in_partition::before_all_clustered_rows(), - position_in_partition::after_all_clustered_rows() - }; - } - - position_range(position_range&&) = default; - position_range& operator=(position_range&&) = default; - position_range(const position_range&) = default; - position_range& operator=(const position_range&) = default; - - // Constructs position_range which covers the same rows as given clustering_range. - // position_range includes a fragment if it includes position of that fragment. - position_range(const query::clustering_range&); - position_range(query::clustering_range&&); - - position_range(position_in_partition start, position_in_partition end) - : _start(std::move(start)) - , _end(std::move(end)) - { } - - const position_in_partition& start() const& { return _start; } - position_in_partition&& start() && { return std::move(_start); } - const position_in_partition& end() const& { return _end; } - position_in_partition&& end() && { return std::move(_end); } - bool contains(const schema& s, position_in_partition_view pos) const; - bool overlaps(const schema& s, position_in_partition_view start, position_in_partition_view end) const; - - friend std::ostream& operator<<(std::ostream&, const position_range&); -}; - -inline -bool position_range::contains(const schema& s, position_in_partition_view pos) const { - position_in_partition::less_compare less(s); - return !less(pos, _start) && less(pos, _end); -} - -inline -bool position_range::overlaps(const schema& s, position_in_partition_view start, position_in_partition_view end) const { - position_in_partition::less_compare less(s); - return !less(end, _start) && less(start, _end); -} diff --git a/scylla/query-request.hh b/scylla/query-request.hh deleted file mode 100644 index cafcd1d..0000000 --- a/scylla/query-request.hh +++ /dev/null @@ -1,197 +0,0 @@ - -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include - -#include "keys.hh" -#include "dht/i_partitioner.hh" -#include "enum_set.hh" -#include "range.hh" -#include "tracing/tracing.hh" - -namespace query { - -template -using range = wrapping_range; - -using ring_position = dht::ring_position; -using clustering_range = nonwrapping_range; - -extern const dht::partition_range full_partition_range; -extern const clustering_range full_clustering_range; - -inline -bool is_single_partition(const dht::partition_range& range) { - return range.is_singular() && range.start()->value().has_key(); -} - -typedef std::vector clustering_row_ranges; - -class specific_ranges { -public: - specific_ranges(partition_key pk, clustering_row_ranges ranges) - : _pk(std::move(pk)), _ranges(std::move(ranges)) { - } - specific_ranges(const specific_ranges&) = default; - - void add(const schema& s, partition_key pk, clustering_row_ranges ranges) { - if (!_pk.equal(s, pk)) { - throw std::runtime_error("Only single specific range supported currently"); - } - _pk = std::move(pk); - _ranges = std::move(ranges); - } - bool contains(const schema& s, const partition_key& pk) { - return _pk.equal(s, pk); - } - size_t size() const { - return 1; - } - const clustering_row_ranges* range_for(const schema& s, const partition_key& key) const { - if (_pk.equal(s, key)) { - return &_ranges; - } - return nullptr; - } - const partition_key& pk() const { - return _pk; - } - const clustering_row_ranges& ranges() const { - return _ranges; - } -private: - friend std::ostream& operator<<(std::ostream& out, const specific_ranges& r); - - partition_key _pk; - clustering_row_ranges _ranges; -}; - -constexpr auto max_rows = std::numeric_limits::max(); - -// Specifies subset of rows, columns and cell attributes to be returned in a query. -// Can be accessed across cores. -// Schema-dependent. -class partition_slice { -public: - enum class option { send_clustering_key, send_partition_key, send_timestamp, send_expiry, reversed, distinct, collections_as_maps, send_ttl, - allow_short_read, }; - using option_set = enum_set>; - clustering_row_ranges _row_ranges; -public: - std::vector static_columns; // TODO: consider using bitmap - std::vector regular_columns; // TODO: consider using bitmap - option_set options; -private: - std::unique_ptr _specific_ranges; - cql_serialization_format _cql_format; - uint32_t _partition_row_limit; -public: - partition_slice(clustering_row_ranges row_ranges, std::vector static_columns, - std::vector regular_columns, option_set options, - std::unique_ptr specific_ranges = nullptr, - cql_serialization_format = cql_serialization_format::internal(), - uint32_t partition_row_limit = max_rows); - partition_slice(const partition_slice&); - partition_slice(partition_slice&&); - ~partition_slice(); - - partition_slice& operator=(partition_slice&& other) noexcept; - - const clustering_row_ranges& row_ranges(const schema&, const partition_key&) const; - void set_range(const schema&, const partition_key&, clustering_row_ranges); - void clear_range(const schema&, const partition_key&); - // FIXME: possibly make this function return a const ref instead. - clustering_row_ranges get_all_ranges() const; - - const clustering_row_ranges& default_row_ranges() const { - return _row_ranges; - } - const std::unique_ptr& get_specific_ranges() const { - return _specific_ranges; - } - const cql_serialization_format& cql_format() const { - return _cql_format; - } - const uint32_t partition_row_limit() const { - return _partition_row_limit; - } - void set_partition_row_limit(uint32_t limit) { - _partition_row_limit = limit; - } - - friend std::ostream& operator<<(std::ostream& out, const partition_slice& ps); - friend std::ostream& operator<<(std::ostream& out, const specific_ranges& ps); -}; - -constexpr auto max_partitions = std::numeric_limits::max(); - -// This is a partition slice which a full clustering row range and maximum -// per-partition row limit. No options or columns are set. -extern const query::partition_slice full_slice; - -// Full specification of a query to the database. -// Intended for passing across replicas. -// Can be accessed across cores. -class read_command { -public: - utils::UUID cf_id; - table_schema_version schema_version; // TODO: This should be enough, drop cf_id - partition_slice slice; - uint32_t row_limit; - gc_clock::time_point timestamp; - std::experimental::optional trace_info; - uint32_t partition_limit; // The maximum number of live partitions to return. - api::timestamp_type read_timestamp; // not serialized -public: - read_command(utils::UUID cf_id, - table_schema_version schema_version, - partition_slice slice, - uint32_t row_limit = max_rows, - gc_clock::time_point now = gc_clock::now(), - std::experimental::optional ti = std::experimental::nullopt, - uint32_t partition_limit = max_partitions, - api::timestamp_type rt = api::missing_timestamp) - : cf_id(std::move(cf_id)) - , schema_version(std::move(schema_version)) - , slice(std::move(slice)) - , row_limit(row_limit) - , timestamp(now) - , trace_info(std::move(ti)) - , partition_limit(partition_limit) - , read_timestamp(rt) - { } - - friend std::ostream& operator<<(std::ostream& out, const read_command& r); -}; - -} diff --git a/scylla/query-result-reader.hh b/scylla/query-result-reader.hh deleted file mode 100644 index cf9da7e..0000000 --- a/scylla/query-result-reader.hh +++ /dev/null @@ -1,193 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "query-request.hh" -#include "query-result.hh" -#include "utils/data_input.hh" -#include "digest_algorithm.hh" - -#include "idl/uuid.dist.hh" -#include "idl/keys.dist.hh" -#include "idl/query.dist.hh" -#include "serializer_impl.hh" -#include "serialization_visitors.hh" -#include "idl/query.dist.impl.hh" -#include "idl/keys.dist.impl.hh" -#include "idl/uuid.dist.impl.hh" - -namespace query { - -class result_atomic_cell_view { - api::timestamp_type _timestamp; - expiry_opt _expiry; - ttl_opt _ttl; - bytes_view _value; -public: - result_atomic_cell_view(api::timestamp_type timestamp, expiry_opt expiry, ttl_opt ttl, bytes_view value) - : _timestamp(timestamp), _expiry(expiry), _ttl(ttl), _value(value) { } - - api::timestamp_type timestamp() const { - return _timestamp; - } - - expiry_opt expiry() const { - return _expiry; - } - - ttl_opt ttl() const { - return _ttl; - } - - bytes_view value() const { - return _value; - } -}; - -// Contains cells in the same order as requested by partition_slice. -// Contains only live cells. -class result_row_view { - ser::qr_row_view _v; -public: - result_row_view(ser::qr_row_view v) : _v(v) {} - - class iterator_type { - using cells_vec = std::vector>; - cells_vec _cells; - cells_vec::iterator _i; - bytes _tmp_value; - public: - iterator_type(ser::qr_row_view v) - : _cells(v.cells()) - , _i(_cells.begin()) - { } - std::experimental::optional next_atomic_cell() { - auto cell_opt = *_i++; - if (!cell_opt) { - return {}; - } - ser::qr_cell_view v = *cell_opt; - api::timestamp_type timestamp = v.timestamp().value_or(api::missing_timestamp); - expiry_opt expiry = v.expiry(); - ttl_opt ttl = v.ttl(); - _tmp_value = v.value(); - return {result_atomic_cell_view(timestamp, expiry, ttl, _tmp_value)}; - } - std::experimental::optional next_collection_cell() { - auto cell_opt = *_i++; - if (!cell_opt) { - return {}; - } - ser::qr_cell_view v = *cell_opt; - _tmp_value = v.value(); - return {bytes_view(_tmp_value)}; - }; - void skip(const column_definition& def) { - ++_i; - } - }; - - iterator_type iterator() const { - return iterator_type(_v); - } -}; - -// Describes expectations about the ResultVisitor concept. -// -// Interaction flow: -// -> accept_new_partition() -// -> accept_new_row() -// -> accept_new_row() -// -> accept_partition_end() -// -> accept_new_partition() -// -> accept_new_row() -// -> accept_new_row() -// -> accept_new_row() -// -> accept_partition_end() -// ... -// -struct result_visitor { - void accept_new_partition( - const partition_key& key, // FIXME: use view for the key - uint32_t row_count) {} - - void accept_new_partition(uint32_t row_count) {} - - void accept_new_row( - const clustering_key& key, // FIXME: use view for the key - const result_row_view& static_row, - const result_row_view& row) {} - - void accept_new_row(const result_row_view& static_row, const result_row_view& row) {} - - void accept_partition_end(const result_row_view& static_row) {} -}; - -class result_view { - ser::query_result_view _v; - friend class result_merger; -public: - result_view(const bytes_ostream& v) : _v(ser::query_result_view{ser::as_input_stream(v)}) {} - result_view(ser::query_result_view v) : _v(v) {} - - template - static auto do_with(const query::result& res, Func&& func) { - result_view view(res.buf()); - return func(view); - } - - template - static void consume(const query::result& res, const partition_slice& slice, ResultVisitor&& visitor) { - do_with(res, [&] (result_view v) { - v.consume(slice, visitor); - }); - } - - template - void consume(const partition_slice& slice, ResultVisitor&& visitor) { - for (auto&& p : _v.partitions()) { - auto rows = p.rows(); - auto row_count = rows.size(); - if (slice.options.contains()) { - auto key = *p.key(); - visitor.accept_new_partition(key, row_count); - } else { - visitor.accept_new_partition(row_count); - } - - result_row_view static_row(p.static_row()); - - for (auto&& row : rows) { - result_row_view view(row.cells()); - if (slice.options.contains()) { - visitor.accept_new_row(*row.key(), static_row, view); - } else { - visitor.accept_new_row(static_row, view); - } - } - - visitor.accept_partition_end(static_row); - } - } -}; - -} diff --git a/scylla/query-result-set.cc b/scylla/query-result-set.cc deleted file mode 100644 index f849230..0000000 --- a/scylla/query-result-set.cc +++ /dev/null @@ -1,201 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "query-result-set.hh" -#include "query-result-reader.hh" -#include "partition_slice_builder.hh" -#include "mutation.hh" - -namespace query { - -// Result set builder is passed as a visitor to query_result::consume() -// function. You can call the build() method to obtain a result set that -// contains cells from the visited results. -class result_set_builder { - schema_ptr _schema; - const partition_slice& _slice; - std::vector _rows; - std::unordered_map _pkey_cells; - uint32_t _row_count; -public: - // Keep slice live as long as the builder is used. - result_set_builder(schema_ptr schema, const partition_slice& slice); - result_set build() const; - void accept_new_partition(const partition_key& key, uint32_t row_count); - void accept_new_partition(uint32_t row_count); - void accept_new_row(const clustering_key& key, const result_row_view& static_row, const result_row_view& row); - void accept_new_row(const result_row_view &static_row, const result_row_view &row); - void accept_partition_end(const result_row_view& static_row); -private: - std::unordered_map deserialize(const partition_key& key); - std::unordered_map deserialize(const clustering_key& key); - std::unordered_map deserialize(const result_row_view& row, bool is_static); -}; - -std::ostream& operator<<(std::ostream& out, const result_set_row& row) { - for (auto&& cell : row._cells) { - auto&& type = cell.second.type(); - auto&& value = cell.second; - out << cell.first << "=\"" << type->to_string(type->decompose(value)) << "\" "; - } - return out; -} - -std::ostream& operator<<(std::ostream& out, const result_set& rs) { - for (auto&& row : rs._rows) { - out << row << std::endl; - } - return out; -} - -result_set_builder::result_set_builder(schema_ptr schema, const partition_slice& slice) - : _schema{schema}, _slice(slice) -{ } - -result_set result_set_builder::build() const { - return { _schema, _rows }; -} - -void result_set_builder::accept_new_partition(const partition_key& key, uint32_t row_count) -{ - _pkey_cells = deserialize(key); - accept_new_partition(row_count); -} - -void result_set_builder::accept_new_partition(uint32_t row_count) -{ - _row_count = row_count; -} - -void result_set_builder::accept_new_row(const clustering_key& key, const result_row_view& static_row, const result_row_view& row) -{ - auto ckey_cells = deserialize(key); - auto static_cells = deserialize(static_row, true); - auto regular_cells = deserialize(row, false); - - std::unordered_map cells; - cells.insert(_pkey_cells.begin(), _pkey_cells.end()); - cells.insert(ckey_cells.begin(), ckey_cells.end()); - cells.insert(static_cells.begin(), static_cells.end()); - cells.insert(regular_cells.begin(), regular_cells.end()); - _rows.emplace_back(_schema, std::move(cells)); -} - -void result_set_builder::accept_new_row(const query::result_row_view &static_row, const query::result_row_view &row) -{ - auto static_cells = deserialize(static_row, true); - auto regular_cells = deserialize(row, false); - - std::unordered_map cells; - cells.insert(_pkey_cells.begin(), _pkey_cells.end()); - cells.insert(static_cells.begin(), static_cells.end()); - cells.insert(regular_cells.begin(), regular_cells.end()); - _rows.emplace_back(_schema, std::move(cells)); -} - -void result_set_builder::accept_partition_end(const result_row_view& static_row) -{ - if (_row_count == 0) { - auto static_cells = deserialize(static_row, true); - std::unordered_map cells; - cells.insert(_pkey_cells.begin(), _pkey_cells.end()); - cells.insert(static_cells.begin(), static_cells.end()); - _rows.emplace_back(_schema, std::move(cells)); - } - _pkey_cells.clear(); -} - -std::unordered_map -result_set_builder::deserialize(const partition_key& key) -{ - std::unordered_map cells; - auto i = key.begin(*_schema); - for (auto&& col : _schema->partition_key_columns()) { - cells.emplace(col.name_as_text(), col.type->deserialize_value(*i)); - ++i; - } - return cells; -} - -std::unordered_map -result_set_builder::deserialize(const clustering_key& key) -{ - std::unordered_map cells; - auto i = key.begin(*_schema); - for (auto&& col : _schema->clustering_key_columns()) { - if (i == key.end(*_schema)) { - break; - } - cells.emplace(col.name_as_text(), col.type->deserialize_value(*i)); - ++i; - } - return cells; -} - -std::unordered_map -result_set_builder::deserialize(const result_row_view& row, bool is_static) -{ - std::unordered_map cells; - auto i = row.iterator(); - auto column_ids = is_static ? _slice.static_columns : _slice.regular_columns; - auto columns = column_ids | boost::adaptors::transformed([this, is_static] (column_id id) -> const column_definition& { - if (is_static) { - return _schema->static_column_at(id); - } else { - return _schema->regular_column_at(id); - } - }); - for (auto &&col : columns) { - if (col.is_atomic()) { - auto cell = i.next_atomic_cell(); - if (cell) { - auto view = cell.value(); - cells.emplace(col.name_as_text(), col.type->deserialize_value(view.value())); - } - } else { - auto cell = i.next_collection_cell(); - if (cell) { - auto ctype = static_pointer_cast(col.type); - if (_slice.options.contains()) { - ctype = map_type_impl::get_instance(ctype->name_comparator(), ctype->value_comparator(), true); - } - cells.emplace(col.name_as_text(), ctype->deserialize_value(*cell, _slice.cql_format())); - } - } - } - return cells; -} - -result_set -result_set::from_raw_result(schema_ptr s, const partition_slice& slice, const result& r) { - result_set_builder builder{std::move(s), slice}; - result_view::consume(r, slice, builder); - return builder.build(); -} - -result_set::result_set(const mutation& m) : result_set([&m] { - auto slice = partition_slice_builder(*m.schema()).build(); - auto qr = mutation(m).query(slice, result_request::only_result); - return result_set::from_raw_result(m.schema(), slice, qr); -}()) -{ } - -} diff --git a/scylla/query-result-set.hh b/scylla/query-result-set.hh deleted file mode 100644 index d2ba4bb..0000000 --- a/scylla/query-result-set.hh +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - - -#include "core/shared_ptr.hh" -#include "query-request.hh" -#include "query-result.hh" -#include "schema.hh" -#include "mutation.hh" - -#include -#include - -namespace query { - -class no_such_column : public std::runtime_error { -public: - using runtime_error::runtime_error; -}; - -class null_column_value : public std::runtime_error { -public: - using runtime_error::runtime_error; -}; - -// Result set row is a set of cells that are associated with a row -// including regular column cells, partition keys, as well as static values. -class result_set_row { - schema_ptr _schema; - std::unordered_map _cells; -public: - result_set_row(schema_ptr schema, std::unordered_map&& cells) - : _schema{schema} - , _cells{std::move(cells)} - { } - bool has(const sstring& column_name) const { - return _cells.count(column_name) > 0; - } - // Look up a deserialized row cell value by column name; throws no_such_column on error - const data_value& - get_data_value(const sstring& column_name) const { - auto it = _cells.find(column_name); - if (it == _cells.end()) { - throw no_such_column(column_name); - } - return it->second; - } - // Look up a deserialized row cell value by column name; throws no_such_column on error. - template - std::experimental::optional - get(const sstring& column_name) const { - auto&& value = get_data_value(column_name); - if (value.is_null()) { - return std::experimental::nullopt; - } - return std::experimental::optional{value_cast(value)}; - } - // throws no_such_column or null_column_value on error - template - T get_nonnull(const sstring& column_name) const { - auto v = get(column_name); - if (v) { - return *v; - } - throw null_column_value(column_name); - } - const std::unordered_map& cells() const { return _cells; } - friend inline bool operator==(const result_set_row& x, const result_set_row& y); - friend inline bool operator!=(const result_set_row& x, const result_set_row& y); - friend std::ostream& operator<<(std::ostream& out, const result_set_row& row); -}; - -inline bool operator==(const result_set_row& x, const result_set_row& y) { - return x._schema == y._schema && x._cells == y._cells; -} - -inline bool operator!=(const result_set_row& x, const result_set_row& y) { - return !(x == y); -} - -// Result set is an in-memory representation of query results in -// deserialized format. To obtain a result set, use the result_set_builder -// class as a visitor to query_result::consume() function. -class result_set { - schema_ptr _schema; - std::vector _rows; -public: - static result_set from_raw_result(schema_ptr, const partition_slice&, const result&); - result_set(schema_ptr s, const std::vector& rows) - : _schema(std::move(s)), _rows{std::move(rows)} - { } - explicit result_set(const mutation&); - bool empty() const { - return _rows.empty(); - } - // throws std::out_of_range on error - const result_set_row& row(size_t idx) const { - if (idx >= _rows.size()) { - throw std::out_of_range("no such row in result set: " + std::to_string(idx)); - } - return _rows[idx]; - } - const std::vector& rows() const { - return _rows; - } - const schema_ptr& schema() const { - return _schema; - } - friend inline bool operator==(const result_set& x, const result_set& y); - friend std::ostream& operator<<(std::ostream& out, const result_set& rs); -}; - -inline bool operator==(const result_set& x, const result_set& y) { - return x._rows == y._rows; -} - -} diff --git a/scylla/query-result-writer.hh b/scylla/query-result-writer.hh deleted file mode 100644 index 5fab3da..0000000 --- a/scylla/query-result-writer.hh +++ /dev/null @@ -1,196 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "types.hh" -#include "atomic_cell.hh" -#include "query-request.hh" -#include "query-result.hh" -#include "digest_algorithm.hh" - -#include "idl/uuid.dist.hh" -#include "idl/keys.dist.hh" -#include "idl/query.dist.hh" -#include "serializer_impl.hh" -#include "serialization_visitors.hh" -#include "idl/query.dist.impl.hh" -#include "idl/keys.dist.impl.hh" -#include "idl/uuid.dist.impl.hh" - -namespace query { - -class result::partition_writer { - result_request _request; - ser::after_qr_partition__key _w; - const partition_slice& _slice; - // We are tasked with keeping track of the range - // as well, since we are the primary "context" - // when iterating "inside" a partition - const clustering_row_ranges& _ranges; - ser::query_result__partitions& _pw; - ser::vector_position _pos; - bool _static_row_added = false; - md5_hasher& _digest; - md5_hasher _digest_pos; - uint32_t& _row_count; - uint32_t& _partition_count; - api::timestamp_type& _last_modified; -public: - partition_writer( - result_request request, - const partition_slice& slice, - const clustering_row_ranges& ranges, - ser::query_result__partitions& pw, - ser::vector_position pos, - ser::after_qr_partition__key w, - md5_hasher& digest, - uint32_t& row_count, - uint32_t& partition_count, - api::timestamp_type& last_modified) - : _request(request) - , _w(std::move(w)) - , _slice(slice) - , _ranges(ranges) - , _pw(pw) - , _pos(std::move(pos)) - , _digest(digest) - , _digest_pos(digest) - , _row_count(row_count) - , _partition_count(partition_count) - , _last_modified(last_modified) - { } - - bool requested_digest() const { - return _request != result_request::only_result; - } - - bool requested_result() const { - return _request != result_request::only_digest; - } - - ser::after_qr_partition__key start() { - return std::move(_w); - } - - // Cancels the whole partition element. - // Can be called at any stage of writing before this element is finalized. - // Do not use this writer after that. - void retract() { - _digest = _digest_pos; - _pw.rollback(_pos); - } - - const clustering_row_ranges& ranges() const { - return _ranges; - } - const partition_slice& slice() const { - return _slice; - } - md5_hasher& digest() { - return _digest; - } - uint32_t& row_count() { - return _row_count; - } - uint32_t& partition_count() { - return _partition_count; - } - api::timestamp_type& last_modified() { - return _last_modified; - } - -}; - -class result::builder { - bytes_ostream _out; - md5_hasher _digest; - const partition_slice& _slice; - ser::query_result__partitions _w; - result_request _request; - uint32_t _row_count = 0; - uint32_t _partition_count = 0; - api::timestamp_type _last_modified = api::missing_timestamp; - short_read _short_read; - result_memory_accounter _memory_accounter; -public: - builder(const partition_slice& slice, result_request request, result_memory_accounter memory_accounter) - : _slice(slice) - , _w(ser::writer_of_query_result(_out).start_partitions()) - , _request(request) - , _memory_accounter(std::move(memory_accounter)) - { } - builder(builder&&) = delete; // _out is captured by reference - - void mark_as_short_read() { _short_read = short_read::yes; } - short_read is_short_read() const { return _short_read; } - - result_memory_accounter& memory_accounter() { return _memory_accounter; } - - const partition_slice& slice() const { return _slice; } - - uint32_t row_count() const { - return _row_count; - } - - uint32_t partition_count() const { - return _partition_count; - } - - // Starts new partition and returns a builder for its contents. - // Invalidates all previously obtained builders - partition_writer add_partition(const schema& s, const partition_key& key) { - auto pos = _w.pos(); - // fetch the row range for this partition already. - auto& ranges = _slice.row_ranges(s, key); - auto after_key = [this, pw = _w.add(), &key] () mutable { - if (_slice.options.contains()) { - return std::move(pw).write_key(key); - } else { - return std::move(pw).skip_key(); - } - }(); - if (_request != result_request::only_result) { - key.feed_hash(_digest, s); - } - return partition_writer(_request, _slice, ranges, _w, std::move(pos), std::move(after_key), _digest, _row_count, - _partition_count, _last_modified); - } - - result build() { - std::move(_w).end_partitions().end_query_result(); - switch (_request) { - case result_request::only_result: - return result(std::move(_out), _short_read, _row_count, _partition_count, std::move(_memory_accounter).done()); - case result_request::only_digest: { - bytes_ostream buf; - ser::writer_of_query_result(buf).start_partitions().end_partitions().end_query_result(); - return result(std::move(buf), result_digest(_digest.finalize_array()), _last_modified, _short_read); - } - case result_request::result_and_digest: - return result(std::move(_out), result_digest(_digest.finalize_array()), - _last_modified, _short_read, _row_count, _partition_count, std::move(_memory_accounter).done()); - } - abort(); - } -}; - -} diff --git a/scylla/query-result.hh b/scylla/query-result.hh deleted file mode 100644 index 254ea02..0000000 --- a/scylla/query-result.hh +++ /dev/null @@ -1,406 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#define CRYPTOPP_ENABLE_NAMESPACE_WEAK 1 -#include -#include "bytes_ostream.hh" -#include "query-request.hh" -#include "md5_hasher.hh" -#include -#include -#include "seastarx.hh" - -namespace query { - -// result_memory_limiter, result_memory_accounter and result_memory_tracker -// form an infrastructure for limiting size of query results. -// -// result_memory_limiter is a shard-local object which ensures that all results -// combined do not use more than 10% of the shard memory. -// -// result_memory_accounter is used by result producers, updates the shard-local -// limits as well as keeps track of the individual maximum result size limit -// which is 1 MB. -// -// result_memory_tracker is just an object that makes sure the -// result_memory_limiter is notified when memory is released (but not sooner). - -class result_memory_accounter; - -class result_memory_limiter { - const size_t _maximum_total_result_memory; - semaphore _memory_limiter; -public: - static constexpr size_t minimum_result_size = 4 * 1024; - static constexpr size_t maximum_result_size = 1 * 1024 * 1024; -public: - result_memory_limiter() - : _maximum_total_result_memory(memory::stats().total_memory() / 10) - , _memory_limiter(_maximum_total_result_memory) - { } - - result_memory_limiter(const result_memory_limiter&) = delete; - result_memory_limiter(result_memory_limiter&&) = delete; - - ssize_t total_used_memory() const { - return _maximum_total_result_memory - _memory_limiter.available_units(); - } - - // Reserves minimum_result_size and creates new memory accounter for - // mutation query. Uses the specified maximum result size and may be - // stopped before reaching it due to memory pressure on shard. - future new_mutation_read(size_t max_result_size); - - // Reserves minimum_result_size and creates new memory accounter for - // data query. Uses the specified maximum result size, result will *not* - // be stopped due to on shard memory pressure in order to avoid digest - // mismatches. - future new_data_read(size_t max_result_size); - - // Creates a memory accounter for digest reads. Such accounter doesn't - // contribute to the shard memory usage, but still stops producing the - // result after individual limit has been reached. - future new_digest_read(size_t max_result_size); - - // Checks whether the result can grow any more, takes into account only - // the per shard limit. - stop_iteration check() const { - return stop_iteration(_memory_limiter.current() <= 0); - } - - // Consumes n bytes from memory limiter and checks whether the result - // can grow any more (considering just the per-shard limit). - stop_iteration update_and_check(size_t n) { - _memory_limiter.consume(n); - return check(); - } - - void release(size_t n) noexcept { - _memory_limiter.signal(n); - } - - semaphore& sem() noexcept { return _memory_limiter; } -}; - - -class result_memory_tracker { - semaphore_units<> _units; - size_t _used_memory; -private: - static thread_local semaphore _dummy; -public: - result_memory_tracker() noexcept : _units(_dummy, 0), _used_memory(0) { } - result_memory_tracker(semaphore& sem, size_t blocked, size_t used) noexcept - : _units(sem, blocked), _used_memory(used) { } - size_t used_memory() const { return _used_memory; } -}; - -class result_memory_accounter { - result_memory_limiter* _limiter = nullptr; - size_t _blocked_bytes = 0; - size_t _used_memory = 0; - size_t _total_used_memory = 0; - size_t _maximum_result_size = 0; - stop_iteration _stop_on_global_limit; -private: - // Mutation query accounter. Uses provided individual result size limit and - // will stop when shard memory pressure grows too high. - struct mutation_query_tag { }; - explicit result_memory_accounter(mutation_query_tag, result_memory_limiter& limiter, size_t max_size) noexcept - : _limiter(&limiter) - , _blocked_bytes(result_memory_limiter::minimum_result_size) - , _maximum_result_size(max_size) - , _stop_on_global_limit(true) - { } - - // Data query accounter. Uses provided individual result size limit and - // will *not* stop even though shard memory pressure grows too high. - struct data_query_tag { }; - explicit result_memory_accounter(data_query_tag, result_memory_limiter& limiter, size_t max_size) noexcept - : _limiter(&limiter) - , _blocked_bytes(result_memory_limiter::minimum_result_size) - , _maximum_result_size(max_size) - { } - - // Digest query accounter. Uses provided individual result size limit and - // will *not* stop even though shard memory pressure grows too high. This - // accounter does not contribute to the shard memory limits. - struct digest_query_tag { }; - explicit result_memory_accounter(digest_query_tag, result_memory_limiter&, size_t max_size) noexcept - : _blocked_bytes(0) - , _maximum_result_size(max_size) - { } - - friend class result_memory_limiter; -public: - // State of a accounter on another shard. Used to pass information about - // the size of the result so far in range queries. - class foreign_state { - size_t _used_memory; - size_t _max_result_size; - public: - foreign_state(size_t used_mem, size_t max_result_size) - : _used_memory(used_mem), _max_result_size(max_result_size) { } - size_t used_memory() const { return _used_memory; } - size_t max_result_size() const { return _max_result_size; } - }; -public: - result_memory_accounter() = default; - - // This constructor is used in cases when a result is produced on multiple - // shards (range queries). foreign_accounter is an accounter that, possibly, - // exist on the other shard and is used for merging the result. This - // accouter will learn how big the total result alread is and limit the - // part produced on this shard so that after merging the final result - // does not exceed the individual limit. - result_memory_accounter(result_memory_limiter& limiter, foreign_state fstate) noexcept - : _limiter(&limiter) - , _total_used_memory(fstate.used_memory()) - , _maximum_result_size(fstate.max_result_size()) - { } - - result_memory_accounter(result_memory_accounter&& other) noexcept - : _limiter(std::exchange(other._limiter, nullptr)) - , _blocked_bytes(other._blocked_bytes) - , _used_memory(other._used_memory) - , _total_used_memory(other._total_used_memory) - , _maximum_result_size(other._maximum_result_size) - , _stop_on_global_limit(other._stop_on_global_limit) - { } - - result_memory_accounter& operator=(result_memory_accounter&& other) noexcept { - if (this != &other) { - this->~result_memory_accounter(); - new (this) result_memory_accounter(std::move(other)); - } - return *this; - } - - ~result_memory_accounter() { - if (_limiter) { - _limiter->release(_blocked_bytes); - } - } - - size_t used_memory() const { return _used_memory; } - - foreign_state state_for_another_shard() { - return foreign_state(_used_memory, _maximum_result_size); - } - - // Consume n more bytes for the result. Returns stop_iteration::yes if - // the result cannot grow any more (taking into account both individual - // and per-shard limits). - stop_iteration update_and_check(size_t n) { - _used_memory += n; - _total_used_memory += n; - auto stop = stop_iteration(_total_used_memory > _maximum_result_size); - if (_limiter && _used_memory > _blocked_bytes) { - auto to_block = std::min(_used_memory - _blocked_bytes, n); - _blocked_bytes += to_block; - stop = (_limiter->update_and_check(to_block) && _stop_on_global_limit) || stop; - } - return stop; - } - - // Checks whether the result can grow any more. - stop_iteration check() const { - stop_iteration stop { _total_used_memory > result_memory_limiter::maximum_result_size }; - if (!stop && _used_memory >= _blocked_bytes && _limiter) { - return _limiter->check() && _stop_on_global_limit; - } - return stop; - } - - // Consume n more bytes for the result. - void update(size_t n) { - update_and_check(n); - } - - result_memory_tracker done() && { - if (!_limiter) { - return { }; - } - auto& sem = std::exchange(_limiter, nullptr)->sem(); - return result_memory_tracker(sem, _blocked_bytes, _used_memory); - } -}; - -inline future result_memory_limiter::new_mutation_read(size_t max_size) { - return _memory_limiter.wait(minimum_result_size).then([this, max_size] { - return result_memory_accounter(result_memory_accounter::mutation_query_tag(), *this, max_size); - }); -} - -inline future result_memory_limiter::new_data_read(size_t max_size) { - return _memory_limiter.wait(minimum_result_size).then([this, max_size] { - return result_memory_accounter(result_memory_accounter::data_query_tag(), *this, max_size); - }); -} - -inline future result_memory_limiter::new_digest_read(size_t max_size) { - return make_ready_future(result_memory_accounter(result_memory_accounter::digest_query_tag(), *this, max_size)); -} - -enum class result_request { - only_result, - only_digest, - result_and_digest, -}; - -class result_digest { -public: - static_assert(16 == CryptoPP::Weak::MD5::DIGESTSIZE, "MD5 digest size is all wrong"); - using type = std::array; -private: - type _digest; -public: - result_digest() = default; - result_digest(type&& digest) : _digest(std::move(digest)) {} - const type& get() const { return _digest; } - bool operator==(const result_digest& rh) const { - return _digest == rh._digest; - } - bool operator!=(const result_digest& rh) const { - return _digest != rh._digest; - } -}; - -// -// The query results are stored in a serialized form. This is in order to -// address the following problems, which a structured format has: -// -// - high level of indirection (vector of vectors of vectors of blobs), which -// is not CPU cache friendly -// -// - high allocation rate due to fine-grained object structure -// -// On replica side, the query results are probably going to be serialized in -// the transport layer anyway, so serializing the results up-front doesn't add -// net work. There is no processing of the query results on replica other than -// concatenation in case of range queries and checksum calculation. If query -// results are collected in serialized form from different cores, we can -// concatenate them without copying by simply appending the fragments into the -// packet. -// -// On coordinator side, the query results would have to be parsed from the -// transport layer buffers anyway, so the fact that iterators parse it also -// doesn't add net work, but again saves allocations and copying. The CQL -// server doesn't need complex data structures to process the results, it just -// goes over it linearly consuming it. -// -// The coordinator side could be optimized even further for CQL queries which -// do not need processing (eg. select * from cf where ...). We could make the -// replica send the query results in the format which is expected by the CQL -// binary protocol client. So in the typical case the coordinator would just -// pass the data using zero-copy to the client, prepending a header. -// -// Users which need more complex structure of query results can convert this -// to query::result_set. -// -// Related headers: -// - query-result-reader.hh -// - query-result-writer.hh - -struct short_read_tag { }; -using short_read = bool_class; - -class result { - bytes_ostream _w; - stdx::optional _digest; - stdx::optional _row_count; - api::timestamp_type _last_modified = api::missing_timestamp; - short_read _short_read; - query::result_memory_tracker _memory_tracker; - stdx::optional _partition_count; -public: - class builder; - class partition_writer; - friend class result_merger; - - result(); - result(bytes_ostream&& w, short_read sr, stdx::optional c = { }, stdx::optional pc = { }, - result_memory_tracker memory_tracker = { }) - : _w(std::move(w)) - , _row_count(c) - , _short_read(sr) - , _memory_tracker(std::move(memory_tracker)) - , _partition_count(pc) - { - w.reduce_chunk_count(); - } - result(bytes_ostream&& w, stdx::optional d, api::timestamp_type last_modified, - short_read sr, stdx::optional c = { }, stdx::optional pc = { }, result_memory_tracker memory_tracker = { }) - : _w(std::move(w)) - , _digest(d) - , _row_count(c) - , _last_modified(last_modified) - , _short_read(sr) - , _memory_tracker(std::move(memory_tracker)) - , _partition_count(pc) - { - w.reduce_chunk_count(); - } - result(result&&) = default; - result(const result&) = default; - result& operator=(result&&) = default; - result& operator=(const result&) = default; - - const bytes_ostream& buf() const { - return _w; - } - - const stdx::optional& digest() const { - return _digest; - } - - const stdx::optional& row_count() const { - return _row_count; - } - - const api::timestamp_type last_modified() const { - return _last_modified; - } - - short_read is_short_read() const { - return _short_read; - } - - const stdx::optional& partition_count() const { - return _partition_count; - } - - void calculate_counts(const query::partition_slice&); - - struct printer { - schema_ptr s; - const query::partition_slice& slice; - const query::result& res; - }; - - sstring pretty_print(schema_ptr, const query::partition_slice&) const; - printer pretty_printer(schema_ptr, const query::partition_slice&) const; -}; - -std::ostream& operator<<(std::ostream& os, const query::result::printer&); -} diff --git a/scylla/query.cc b/scylla/query.cc deleted file mode 100644 index fae0d38..0000000 --- a/scylla/query.cc +++ /dev/null @@ -1,272 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include -#include "query-request.hh" -#include "query-result.hh" -#include "query-result-writer.hh" -#include "query-result-set.hh" -#include "to_string.hh" -#include "bytes.hh" -#include "mutation_partition_serializer.hh" -#include "query-result-reader.hh" -#include "query_result_merger.hh" - -namespace query { - -constexpr size_t result_memory_limiter::minimum_result_size; -constexpr size_t result_memory_limiter::maximum_result_size; - -thread_local semaphore result_memory_tracker::_dummy { 0 }; - -const dht::partition_range full_partition_range = dht::partition_range::make_open_ended_both_sides(); -const clustering_range full_clustering_range = clustering_range::make_open_ended_both_sides(); - -const query::partition_slice full_slice = query::partition_slice({ full_clustering_range }, { }, { }, { }); - -std::ostream& operator<<(std::ostream& out, const specific_ranges& s); - -std::ostream& operator<<(std::ostream& out, const partition_slice& ps) { - out << "{" - << "regular_cols=[" << join(", ", ps.regular_columns) << "]" - << ", static_cols=[" << join(", ", ps.static_columns) << "]" - << ", rows=[" << join(", ", ps._row_ranges) << "]" - ; - if (ps._specific_ranges) { - out << ", specific=[" << *ps._specific_ranges << "]"; - } - out << ", options=" << sprint("%x", ps.options.mask()); // FIXME: pretty print options - out << ", cql_format=" << ps.cql_format(); - out << ", partition_row_limit=" << ps._partition_row_limit; - return out << "}"; -} - -std::ostream& operator<<(std::ostream& out, const read_command& r) { - return out << "read_command{" - << "cf_id=" << r.cf_id - << ", version=" << r.schema_version - << ", slice=" << r.slice << "" - << ", limit=" << r.row_limit - << ", timestamp=" << r.timestamp.time_since_epoch().count() << "}" - << ", partition_limit=" << r.partition_limit << "}"; -} - -std::ostream& operator<<(std::ostream& out, const specific_ranges& s) { - return out << "{" << s._pk << " : " << join(", ", s._ranges) << "}"; -} - -partition_slice::partition_slice(clustering_row_ranges row_ranges, - std::vector static_columns, - std::vector regular_columns, - option_set options, - std::unique_ptr specific_ranges, - cql_serialization_format cql_format, - uint32_t partition_row_limit) - : _row_ranges(std::move(row_ranges)) - , static_columns(std::move(static_columns)) - , regular_columns(std::move(regular_columns)) - , options(options) - , _specific_ranges(std::move(specific_ranges)) - , _cql_format(std::move(cql_format)) - , _partition_row_limit(partition_row_limit) -{} - -partition_slice::partition_slice(partition_slice&&) = default; - -partition_slice& partition_slice::operator=(partition_slice&& other) noexcept = default; - -// Only needed because selection_statement::execute does copies of its read_command -// in the map-reduce op. -partition_slice::partition_slice(const partition_slice& s) - : _row_ranges(s._row_ranges) - , static_columns(s.static_columns) - , regular_columns(s.regular_columns) - , options(s.options) - , _specific_ranges(s._specific_ranges ? std::make_unique(*s._specific_ranges) : nullptr) - , _cql_format(s._cql_format) - , _partition_row_limit(s._partition_row_limit) -{} - -partition_slice::~partition_slice() -{} - -const clustering_row_ranges& partition_slice::row_ranges(const schema& s, const partition_key& k) const { - auto* r = _specific_ranges ? _specific_ranges->range_for(s, k) : nullptr; - return r ? *r : _row_ranges; -} - -void partition_slice::set_range(const schema& s, const partition_key& k, clustering_row_ranges range) { - if (!_specific_ranges) { - _specific_ranges = std::make_unique(k, std::move(range)); - } else { - _specific_ranges->add(s, k, std::move(range)); - } -} - -void partition_slice::clear_range(const schema& s, const partition_key& k) { - if (_specific_ranges && _specific_ranges->contains(s, k)) { - // just in case someone changes the impl above, - // we should do actual remove if specific_ranges suddenly - // becomes an actual map - assert(_specific_ranges->size() == 1); - _specific_ranges = nullptr; - } -} - -clustering_row_ranges partition_slice::get_all_ranges() const { - auto all_ranges = default_row_ranges(); - const auto& specific_ranges = get_specific_ranges(); - if (specific_ranges) { - all_ranges.insert(all_ranges.end(), specific_ranges->ranges().begin(), specific_ranges->ranges().end()); - } - return all_ranges; -} - -sstring -result::pretty_print(schema_ptr s, const query::partition_slice& slice) const { - std::ostringstream out; - out << "{ result: " << result_set::from_raw_result(s, slice, *this); - out << " digest: "; - if (_digest) { - out << std::hex << std::setw(2); - for (auto&& c : _digest->get()) { - out << unsigned(c) << " "; - } - } else { - out << "{}"; - } - out << ", short_read=" << is_short_read() << " }"; - return out.str(); -} - -query::result::printer -result::pretty_printer(schema_ptr s, const query::partition_slice& slice) const { - return query::result::printer{s, slice, *this}; -} - -std::ostream& operator<<(std::ostream& os, const query::result::printer& p) { - os << p.res.pretty_print(p.s, p.slice); - return os; -} - -void result::calculate_counts(const query::partition_slice& slice) { - struct { - uint32_t total_count = 0; - uint32_t current_partition_count = 0; - uint32_t live_partitions = 0; - void accept_new_partition(const partition_key& key, uint32_t row_count) { - accept_new_partition(row_count); - } - void accept_new_partition(uint32_t row_count) { - total_count += row_count; - current_partition_count = row_count; - live_partitions += 1; - } - void accept_new_row(const clustering_key& key, const result_row_view& static_row, const result_row_view& row) {} - void accept_new_row(const result_row_view& static_row, const result_row_view& row) {} - void accept_partition_end(const query::result_row_view& static_row) { - if (current_partition_count == 0) { - total_count++; - } - } - } counter; - - result_view::consume(*this, slice, counter); - _row_count = counter.total_count; - _partition_count = counter.live_partitions; -} - -result::result() - : result([] { - bytes_ostream out; - ser::writer_of_query_result(out).skip_partitions().end_query_result(); - return out; - }(), short_read::no, 0, 0) -{ } - -static void write_partial_partition(ser::writer_of_qr_partition&& pw, const ser::qr_partition_view& pv, uint32_t rows_to_include) { - auto key = pv.key(); - auto static_cells_wr = (key ? std::move(pw).write_key(*key) : std::move(pw).skip_key()) - .start_static_row() - .start_cells(); - for (auto&& cell : pv.static_row().cells()) { - static_cells_wr.add(cell); - } - auto rows_wr = std::move(static_cells_wr) - .end_cells() - .end_static_row() - .start_rows(); - auto rows = pv.rows(); - // rows.size() can be 0 is there's a single static row - auto it = rows.begin(); - for (uint32_t i = 0; i < std::min(rows.size(), uint64_t{rows_to_include}); ++i) { - rows_wr.add(*it++); - } - std::move(rows_wr).end_rows().end_qr_partition(); -} - -foreign_ptr> result_merger::get() { - if (_partial.size() == 1) { - return std::move(_partial[0]); - } - - bytes_ostream w; - auto partitions = ser::writer_of_query_result(w).start_partitions(); - uint32_t row_count = 0; - short_read is_short_read; - uint32_t partition_count = 0; - - for (auto&& r : _partial) { - result_view::do_with(*r, [&] (result_view rv) { - for (auto&& pv : rv._v.partitions()) { - auto rows = pv.rows(); - // If rows.empty(), then there's a static row, or there wouldn't be a partition - const uint32_t rows_in_partition = rows.size() ? : 1; - const uint32_t rows_to_include = std::min(_max_rows - row_count, rows_in_partition); - row_count += rows_to_include; - if (rows_to_include >= rows_in_partition) { - partitions.add(pv); - if (++partition_count >= _max_partitions) { - return; - } - } else if (rows_to_include > 0) { - write_partial_partition(partitions.add(), pv, rows_to_include); - return; - } else { - return; - } - } - }); - if (r->is_short_read()) { - is_short_read = short_read::yes; - break; - } - if (row_count >= _max_rows || partition_count >= _max_partitions) { - break; - } - } - - std::move(partitions).end_partitions().end_query_result(); - - return make_foreign(make_lw_shared(std::move(w), is_short_read, row_count)); -} - -} diff --git a/scylla/query_result_merger.hh b/scylla/query_result_merger.hh deleted file mode 100644 index 3745299..0000000 --- a/scylla/query_result_merger.hh +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "core/distributed.hh" -#include "query-result.hh" - -namespace query { - -// Merges non-overlapping results into one -// Implements @Reducer concept from distributed.hh -class result_merger { - std::vector>> _partial; - const uint32_t _max_rows; - const uint32_t _max_partitions; -public: - explicit result_merger(uint32_t max_rows, uint32_t max_partitions) - : _max_rows(max_rows) - , _max_partitions(max_partitions) - { } - - void reserve(size_t size) { - _partial.reserve(size); - } - - void operator()(foreign_ptr> r) { - if (!_partial.empty() && _partial.back()->is_short_read()) { - return; - } - _partial.emplace_back(std::move(r)); - } - - // FIXME: Eventually we should return a composite_query_result here - // which holds the vector of query results and which can be quickly turned - // into packet fragments by the transport layer without copying the data. - foreign_ptr> get(); -}; - -} diff --git a/scylla/range.hh b/scylla/range.hh deleted file mode 100644 index 2bec277..0000000 --- a/scylla/range.hh +++ /dev/null @@ -1,723 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "stdx.hh" -#include -#include -#include -#include -#include -#include -#include - -template -class range_bound { - T _value; - bool _inclusive; -public: - range_bound(T value, bool inclusive = true) - : _value(std::move(value)) - , _inclusive(inclusive) - { } - const T& value() const & { return _value; } - T&& value() && { return std::move(_value); } - bool is_inclusive() const { return _inclusive; } - bool operator==(const range_bound& other) const { - return (_value == other._value) && (_inclusive == other._inclusive); - } - template - bool equal(const range_bound& other, Comparator&& cmp) const { - return _inclusive == other._inclusive && cmp(_value, other._value) == 0; - } -}; - -template -class nonwrapping_range; - -// A range which can have inclusive, exclusive or open-ended bounds on each end. -// The end bound can be smaller than the start bound. -template -class wrapping_range { - template - using optional = std::experimental::optional; -public: - using bound = range_bound; -private: - optional _start; - optional _end; - bool _singular; -public: - wrapping_range(optional start, optional end, bool singular = false) - : _start(std::move(start)) - , _end(std::move(end)) - , _singular(singular) - { } - wrapping_range(T value) - : _start(bound(std::move(value), true)) - , _end() - , _singular(true) - { } - wrapping_range() : wrapping_range({}, {}) { } -private: - // Bound wrappers for compile-time dispatch and safety. - struct start_bound_ref { const optional& b; }; - struct end_bound_ref { const optional& b; }; - - start_bound_ref start_bound() const { return { start() }; } - end_bound_ref end_bound() const { return { end() }; } - - template - static bool greater_than_or_equal(end_bound_ref end, start_bound_ref start, Comparator&& cmp) { - return !end.b || !start.b || cmp(end.b->value(), start.b->value()) - >= (!end.b->is_inclusive() || !start.b->is_inclusive()); - } - - template - static bool less_than(end_bound_ref end, start_bound_ref start, Comparator&& cmp) { - return !greater_than_or_equal(end, start, cmp); - } - - template - static bool less_than_or_equal(start_bound_ref first, start_bound_ref second, Comparator&& cmp) { - return !first.b || (second.b && cmp(first.b->value(), second.b->value()) - <= -(!first.b->is_inclusive() && second.b->is_inclusive())); - } - - template - static bool less_than(start_bound_ref first, start_bound_ref second, Comparator&& cmp) { - return second.b && (!first.b || cmp(first.b->value(), second.b->value()) - < (first.b->is_inclusive() && !second.b->is_inclusive())); - } - - template - static bool greater_than_or_equal(end_bound_ref first, end_bound_ref second, Comparator&& cmp) { - return !first.b || (second.b && cmp(first.b->value(), second.b->value()) - >= (!first.b->is_inclusive() && second.b->is_inclusive())); - } -public: - // the point is before the range (works only for non wrapped ranges) - // Comparator must define a total ordering on T. - template - bool before(const T& point, Comparator&& cmp) const { - assert(!is_wrap_around(cmp)); - if (!start()) { - return false; //open start, no points before - } - auto r = cmp(point, start()->value()); - if (r < 0) { - return true; - } - if (!start()->is_inclusive() && r == 0) { - return true; - } - return false; - } - // the point is after the range (works only for non wrapped ranges) - // Comparator must define a total ordering on T. - template - bool after(const T& point, Comparator&& cmp) const { - assert(!is_wrap_around(cmp)); - if (!end()) { - return false; //open end, no points after - } - auto r = cmp(end()->value(), point); - if (r < 0) { - return true; - } - if (!end()->is_inclusive() && r == 0) { - return true; - } - return false; - } - // check if two ranges overlap. - // Comparator must define a total ordering on T. - template - bool overlaps(const wrapping_range& other, Comparator&& cmp) const { - bool this_wraps = is_wrap_around(cmp); - bool other_wraps = other.is_wrap_around(cmp); - - if (this_wraps && other_wraps) { - return true; - } else if (this_wraps) { - auto unwrapped = unwrap(); - return other.overlaps(unwrapped.first, cmp) || other.overlaps(unwrapped.second, cmp); - } else if (other_wraps) { - auto unwrapped = other.unwrap(); - return overlaps(unwrapped.first, cmp) || overlaps(unwrapped.second, cmp); - } - - // No range should reach this point as wrap around. - assert(!this_wraps); - assert(!other_wraps); - - // if both this and other have an open start, the two ranges will overlap. - if (!start() && !other.start()) { - return true; - } - - return greater_than_or_equal(end_bound(), other.start_bound(), cmp) - && greater_than_or_equal(other.end_bound(), start_bound(), cmp); - } - static wrapping_range make(bound start, bound end) { - return wrapping_range({std::move(start)}, {std::move(end)}); - } - static wrapping_range make_open_ended_both_sides() { - return {{}, {}}; - } - static wrapping_range make_singular(T value) { - return {std::move(value)}; - } - static wrapping_range make_starting_with(bound b) { - return {{std::move(b)}, {}}; - } - static wrapping_range make_ending_with(bound b) { - return {{}, {std::move(b)}}; - } - bool is_singular() const { - return _singular; - } - bool is_full() const { - return !_start && !_end; - } - void reverse() { - if (!_singular) { - std::swap(_start, _end); - } - } - const optional& start() const { - return _start; - } - const optional& end() const { - return _singular ? _start : _end; - } - // Range is a wrap around if end value is smaller than the start value - // or they're equal and at least one bound is not inclusive. - // Comparator must define a total ordering on T. - template - bool is_wrap_around(Comparator&& cmp) const { - if (_end && _start) { - auto r = cmp(end()->value(), start()->value()); - return r < 0 - || (r == 0 && (!start()->is_inclusive() || !end()->is_inclusive())); - } else { - return false; // open ended range or singular range don't wrap around - } - } - // Converts a wrap-around range to two non-wrap-around ranges. - // The returned ranges are not overlapping and ordered. - // Call only when is_wrap_around(). - std::pair unwrap() const { - return { - { {}, end() }, - { start(), {} } - }; - } - // the point is inside the range - // Comparator must define a total ordering on T. - template - bool contains(const T& point, Comparator&& cmp) const { - if (is_wrap_around(cmp)) { - auto unwrapped = unwrap(); - return unwrapped.first.contains(point, cmp) - || unwrapped.second.contains(point, cmp); - } else { - return !before(point, cmp) && !after(point, cmp); - } - } - // Returns true iff all values contained by other are also contained by this. - // Comparator must define a total ordering on T. - template - bool contains(const wrapping_range& other, Comparator&& cmp) const { - bool this_wraps = is_wrap_around(cmp); - bool other_wraps = other.is_wrap_around(cmp); - - if (this_wraps && other_wraps) { - return cmp(start()->value(), other.start()->value()) - <= -(!start()->is_inclusive() && other.start()->is_inclusive()) - && cmp(end()->value(), other.end()->value()) - >= (!end()->is_inclusive() && other.end()->is_inclusive()); - } - - if (!this_wraps && !other_wraps) { - return less_than_or_equal(start_bound(), other.start_bound(), cmp) - && greater_than_or_equal(end_bound(), other.end_bound(), cmp); - } - - if (other_wraps) { // && !this_wraps - return !start() && !end(); - } - - // !other_wraps && this_wraps - return (other.start() && cmp(start()->value(), other.start()->value()) - <= -(!start()->is_inclusive() && other.start()->is_inclusive())) - || (other.end() && cmp(end()->value(), other.end()->value()) - >= (!end()->is_inclusive() && other.end()->is_inclusive())); - } - // Returns ranges which cover all values covered by this range but not covered by the other range. - // Ranges are not overlapping and ordered. - // Comparator must define a total ordering on T. - template - std::vector subtract(const wrapping_range& other, Comparator&& cmp) const { - std::vector result; - std::list left; - std::list right; - - if (is_wrap_around(cmp)) { - auto u = unwrap(); - left.emplace_back(std::move(u.first)); - left.emplace_back(std::move(u.second)); - } else { - left.push_back(*this); - } - - if (other.is_wrap_around(cmp)) { - auto u = other.unwrap(); - right.emplace_back(std::move(u.first)); - right.emplace_back(std::move(u.second)); - } else { - right.push_back(other); - } - - // left and right contain now non-overlapping, ordered ranges - - while (!left.empty() && !right.empty()) { - auto& r1 = left.front(); - auto& r2 = right.front(); - if (less_than(r2.end_bound(), r1.start_bound(), cmp)) { - right.pop_front(); - } else if (less_than(r1.end_bound(), r2.start_bound(), cmp)) { - result.emplace_back(std::move(r1)); - left.pop_front(); - } else { // Overlap - auto tmp = std::move(r1); - left.pop_front(); - if (!greater_than_or_equal(r2.end_bound(), tmp.end_bound(), cmp)) { - left.push_front({bound(r2.end()->value(), !r2.end()->is_inclusive()), tmp.end()}); - } - if (!less_than_or_equal(r2.start_bound(), tmp.start_bound(), cmp)) { - left.push_front({tmp.start(), bound(r2.start()->value(), !r2.start()->is_inclusive())}); - } - } - } - - boost::copy(left, std::back_inserter(result)); - - // TODO: Merge adjacent ranges (optimization) - return result; - } - // split range in two around a split_point. split_point has to be inside the range - // split_point will belong to first range - // Comparator must define a total ordering on T. - template - std::pair, wrapping_range> split(const T& split_point, Comparator&& cmp) const { - assert(contains(split_point, std::forward(cmp))); - wrapping_range left(start(), bound(split_point)); - wrapping_range right(bound(split_point, false), end()); - return std::make_pair(std::move(left), std::move(right)); - } - // Create a sub-range including values greater than the split_point. Returns stdx::nullopt if - // split_point is after the end (but not included in the range, in case of wraparound ranges) - // Comparator must define a total ordering on T. - template - stdx::optional> split_after(const T& split_point, Comparator&& cmp) const { - if (contains(split_point, std::forward(cmp)) - && (!end() || cmp(split_point, end()->value()) != 0)) { - return wrapping_range(bound(split_point, false), end()); - } else if (end() && cmp(split_point, end()->value()) >= 0) { - // whether to return stdx::nullopt or the full range is not - // well-defined for wraparound ranges; we return nullopt - // if split_point is after the end. - return stdx::nullopt; - } else { - return *this; - } - } - template::type> - static stdx::optional::bound> transform_bound(Bound&& b, Transformer&& transformer) { - if (b) { - return { { transformer(std::forward(b).value().value()), b->is_inclusive() } }; - }; - return {}; - } - // Transforms this range into a new range of a different value type - // Supplied transformer should transform value of type T (the old type) into value of type U (the new type). - template::type> - wrapping_range transform(Transformer&& transformer) && { - return wrapping_range(transform_bound(std::move(_start), transformer), transform_bound(std::move(_end), transformer), _singular); - } - template::type> - wrapping_range transform(Transformer&& transformer) const & { - return wrapping_range(transform_bound(_start, transformer), transform_bound(_end, transformer), _singular); - } - template - bool equal(const wrapping_range& other, Comparator&& cmp) const { - return bool(_start) == bool(other._start) - && bool(_end) == bool(other._end) - && (!_start || _start->equal(*other._start, cmp)) - && (!_end || _end->equal(*other._end, cmp)) - && _singular == other._singular; - } - bool operator==(const wrapping_range& other) const { - return (_start == other._start) && (_end == other._end) && (_singular == other._singular); - } - - template - friend std::ostream& operator<<(std::ostream& out, const wrapping_range& r); -private: - friend class nonwrapping_range; -}; - -template -std::ostream& operator<<(std::ostream& out, const wrapping_range& r) { - if (r.is_singular()) { - return out << "{" << r.start()->value() << "}"; - } - - if (!r.start()) { - out << "(-inf, "; - } else { - if (r.start()->is_inclusive()) { - out << "["; - } else { - out << "("; - } - out << r.start()->value() << ", "; - } - - if (!r.end()) { - out << "+inf)"; - } else { - out << r.end()->value(); - if (r.end()->is_inclusive()) { - out << "]"; - } else { - out << ")"; - } - } - - return out; -} - -// A range which can have inclusive, exclusive or open-ended bounds on each end. -// The end bound can never be smaller than the start bound. -template -class nonwrapping_range { - template - using optional = std::experimental::optional; -public: - using bound = range_bound; -private: - wrapping_range _range; -public: - nonwrapping_range(T value) - : _range(std::move(value)) - { } - nonwrapping_range() : nonwrapping_range({}, {}) { } - // Can only be called if start <= end. IDL ctor. - nonwrapping_range(optional start, optional end, bool singular = false) - : _range(std::move(start), std::move(end), singular) - { } - // Can only be called if !r.is_wrap_around(). - explicit nonwrapping_range(wrapping_range&& r) - : _range(std::move(r)) - { } - // Can only be called if !r.is_wrap_around(). - explicit nonwrapping_range(const wrapping_range& r) - : _range(r) - { } - operator wrapping_range() const & { - return _range; - } - operator wrapping_range() && { - return std::move(_range); - } - - // the point is before the range. - // Comparator must define a total ordering on T. - template - bool before(const T& point, Comparator&& cmp) const { - return _range.before(point, std::forward(cmp)); - } - // the point is after the range. - // Comparator must define a total ordering on T. - template - bool after(const T& point, Comparator&& cmp) const { - return _range.after(point, std::forward(cmp)); - } - // check if two ranges overlap. - // Comparator must define a total ordering on T. - template - bool overlaps(const nonwrapping_range& other, Comparator&& cmp) const { - // if both this and other have an open start, the two ranges will overlap. - if (!start() && !other.start()) { - return true; - } - - return wrapping_range::greater_than_or_equal(_range.end_bound(), other._range.start_bound(), cmp) - && wrapping_range::greater_than_or_equal(other._range.end_bound(), _range.start_bound(), cmp); - } - static nonwrapping_range make(bound start, bound end) { - return nonwrapping_range({std::move(start)}, {std::move(end)}); - } - static nonwrapping_range make_open_ended_both_sides() { - return {{}, {}}; - } - static nonwrapping_range make_singular(T value) { - return {std::move(value)}; - } - static nonwrapping_range make_starting_with(bound b) { - return {{std::move(b)}, {}}; - } - static nonwrapping_range make_ending_with(bound b) { - return {{}, {std::move(b)}}; - } - bool is_singular() const { - return _range.is_singular(); - } - bool is_full() const { - return _range.is_full(); - } - const optional& start() const { - return _range.start(); - } - const optional& end() const { - return _range.end(); - } - // the point is inside the range - // Comparator must define a total ordering on T. - template - bool contains(const T& point, Comparator&& cmp) const { - return !before(point, cmp) && !after(point, cmp); - } - // Returns true iff all values contained by other are also contained by this. - // Comparator must define a total ordering on T. - template - bool contains(const nonwrapping_range& other, Comparator&& cmp) const { - return wrapping_range::less_than_or_equal(_range.start_bound(), other._range.start_bound(), cmp) - && wrapping_range::greater_than_or_equal(_range.end_bound(), other._range.end_bound(), cmp); - } - // Returns ranges which cover all values covered by this range but not covered by the other range. - // Ranges are not overlapping and ordered. - // Comparator must define a total ordering on T. - template - std::vector subtract(const nonwrapping_range& other, Comparator&& cmp) const { - auto subtracted = _range.subtract(other._range, std::forward(cmp)); - return boost::copy_range>(subtracted | boost::adaptors::transformed([](auto&& r) { - return nonwrapping_range(std::move(r)); - })); - } - // split range in two around a split_point. split_point has to be inside the range - // split_point will belong to first range - // Comparator must define a total ordering on T. - template - std::pair, nonwrapping_range> split(const T& split_point, Comparator&& cmp) const { - assert(contains(split_point, std::forward(cmp))); - nonwrapping_range left(start(), bound(split_point)); - nonwrapping_range right(bound(split_point, false), end()); - return std::make_pair(std::move(left), std::move(right)); - } - // Create a sub-range including values greater than the split_point. If split_point is after - // the end, returns stdx::nullopt. - template - stdx::optional split_after(const T& split_point, Comparator&& cmp) const { - if (end() && cmp(split_point, end()->value()) >= 0) { - return stdx::nullopt; - } else if (start() && cmp(split_point, start()->value()) < 0) { - return *this; - } else { - return nonwrapping_range(range_bound(split_point, false), end()); - } - } - // Creates a new sub-range which is the intersection of this range and a range starting with "start". - // If there is no overlap, returns stdx::nullopt. - template - stdx::optional trim_front(stdx::optional&& start, Comparator&& cmp) const { - return intersection(nonwrapping_range(std::move(start), {}), cmp); - } - // Transforms this range into a new range of a different value type - // Supplied transformer should transform value of type T (the old type) into value of type U (the new type). - template::type> - nonwrapping_range transform(Transformer&& transformer) && { - return nonwrapping_range(std::move(_range).transform(std::forward(transformer))); - } - template::type> - nonwrapping_range transform(Transformer&& transformer) const & { - return nonwrapping_range(_range.transform(std::forward(transformer))); - } - template - bool equal(const nonwrapping_range& other, Comparator&& cmp) const { - return _range.equal(other._range, std::forward(cmp)); - } - bool operator==(const nonwrapping_range& other) const { - return _range == other._range; - } - // Takes a vector of possibly overlapping ranges and returns a vector containing - // a set of non-overlapping ranges covering the same values. - template - static std::vector deoverlap(std::vector ranges, Comparator&& cmp) { - auto size = ranges.size(); - if (size <= 1) { - return ranges; - } - - std::sort(ranges.begin(), ranges.end(), [&](auto&& r1, auto&& r2) { - return wrapping_range::less_than(r1._range.start_bound(), r2._range.start_bound(), cmp); - }); - - std::vector deoverlapped_ranges; - deoverlapped_ranges.reserve(size); - - auto&& current = ranges[0]; - for (auto&& r : ranges | boost::adaptors::sliced(1, ranges.size())) { - bool includes_end = wrapping_range::greater_than_or_equal(r._range.end_bound(), current._range.start_bound(), cmp) - && wrapping_range::greater_than_or_equal(current._range.end_bound(), r._range.end_bound(), cmp); - if (includes_end) { - continue; // last.start <= r.start <= r.end <= last.end - } - bool includes_start = wrapping_range::greater_than_or_equal(current._range.end_bound(), r._range.start_bound(), cmp); - if (includes_start) { - current = nonwrapping_range(std::move(current.start()), std::move(r.end())); - } else { - deoverlapped_ranges.emplace_back(std::move(current)); - current = std::move(r); - } - } - - deoverlapped_ranges.emplace_back(std::move(current)); - return deoverlapped_ranges; - } - -private: - // These private functions optimize the case where a sequence supports the - // lower and upper bound operations more efficiently, as is the case with - // some boost containers. - struct std_ {}; - struct built_in_ : std_ {}; - - template().lower_bound(std::declval(), std::declval()))> - typename std::remove_reference::type::const_iterator do_lower_bound(const T& value, Range&& r, LessComparator&& cmp, built_in_) const { - return r.lower_bound(value, std::forward(cmp)); - } - - template().upper_bound(std::declval(), std::declval()))> - typename std::remove_reference::type::const_iterator do_upper_bound(const T& value, Range&& r, LessComparator&& cmp, built_in_) const { - return r.upper_bound(value, std::forward(cmp)); - } - - template - typename std::remove_reference::type::const_iterator do_lower_bound(const T& value, Range&& r, LessComparator&& cmp, std_) const { - return std::lower_bound(r.begin(), r.end(), value, std::forward(cmp)); - } - - template - typename std::remove_reference::type::const_iterator do_upper_bound(const T& value, Range&& r, LessComparator&& cmp, std_) const { - return std::upper_bound(r.begin(), r.end(), value, std::forward(cmp)); - } -public: - // Return the lower bound of the specified sequence according to these bounds. - template - typename std::remove_reference::type::const_iterator lower_bound(Range&& r, LessComparator&& cmp) const { - return start() - ? (start()->is_inclusive() - ? do_lower_bound(start()->value(), std::forward(r), std::forward(cmp), built_in_()) - : do_upper_bound(start()->value(), std::forward(r), std::forward(cmp), built_in_())) - : std::cbegin(r); - } - // Return the upper bound of the specified sequence according to these bounds. - template - typename std::remove_reference::type::const_iterator upper_bound(Range&& r, LessComparator&& cmp) const { - return end() - ? (end()->is_inclusive() - ? do_upper_bound(end()->value(), std::forward(r), std::forward(cmp), built_in_()) - : do_lower_bound(end()->value(), std::forward(r), std::forward(cmp), built_in_())) - : (is_singular() - ? do_upper_bound(start()->value(), std::forward(r), std::forward(cmp), built_in_()) - : std::cend(r)); - } - // Returns a subset of the range that is within these bounds. - template - boost::iterator_range::type::const_iterator> - slice(Range&& range, LessComparator&& cmp) const { - return boost::make_iterator_range(lower_bound(range, cmp), upper_bound(range, cmp)); - } - - // Returns the intersection between this range and other. - template - stdx::optional intersection(const nonwrapping_range& other, Comparator&& cmp) const { - auto p = std::minmax(_range, other._range, [&cmp] (auto&& a, auto&& b) { - return wrapping_range::less_than(a.start_bound(), b.start_bound(), cmp); - }); - if (wrapping_range::greater_than_or_equal(p.first.end_bound(), p.second.start_bound(), cmp)) { - auto end = std::min(p.first.end_bound(), p.second.end_bound(), [&cmp] (auto&& a, auto&& b) { - return !wrapping_range::greater_than_or_equal(a, b, cmp); - }); - return nonwrapping_range(p.second.start(), end.b); - } - return {}; - } - - template - friend std::ostream& operator<<(std::ostream& out, const nonwrapping_range& r); -}; - -template -std::ostream& operator<<(std::ostream& out, const nonwrapping_range& r) { - return out << r._range; -} - -template -using range = wrapping_range; - -GCC6_CONCEPT( -template typename T, typename U> -concept bool Range = std::is_same, wrapping_range>::value || std::is_same, nonwrapping_range>::value; -) - -// Allow using range in a hash table. The hash function 31 * left + -// right is the same one used by Cassandra's AbstractBounds.hashCode(). -namespace std { - -template -struct hash> { - using argument_type = wrapping_range; - using result_type = decltype(std::hash()(std::declval())); - result_type operator()(argument_type const& s) const { - auto hash = std::hash(); - auto left = s.start() ? hash(s.start()->value()) : 0; - auto right = s.end() ? hash(s.end()->value()) : 0; - return 31 * left + right; - } -}; - -template -struct hash> { - using argument_type = nonwrapping_range; - using result_type = decltype(std::hash()(std::declval())); - result_type operator()(argument_type const& s) const { - return hash>()(s); - } -}; - -} diff --git a/scylla/range_tombstone.cc b/scylla/range_tombstone.cc deleted file mode 100644 index c031cf6..0000000 --- a/scylla/range_tombstone.cc +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright (C) 2016 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "range_tombstone.hh" -#include "streamed_mutation.hh" - -std::ostream& operator<<(std::ostream& out, const range_tombstone& rt) { - if (rt) { - return out << "{range_tombstone: start=" << rt.start_bound() << ", end=" << rt.end_bound() << ", " << rt.tomb << "}"; - } else { - return out << "{range_tombstone: none}"; - } -} - -stdx::optional range_tombstone::apply(const schema& s, range_tombstone&& src) -{ - bound_view::compare cmp(s); - if (tomb == src.tomb) { - if (cmp(end_bound(), src.end_bound())) { - end = std::move(src.end); - end_kind = src.end_kind; - } - return { }; - } - if (tomb < src.tomb) { - std::swap(*this, src); - } - if (cmp(end_bound(), src.end_bound())) { - return range_tombstone(end, invert_kind(end_kind), std::move(src.end), src.end_kind, src.tomb); - } - return { }; -} - -position_in_partition_view range_tombstone::position() const { - return position_in_partition_view(position_in_partition_view::range_tombstone_tag_t(), start_bound()); -} - -position_in_partition_view range_tombstone::end_position() const { - return position_in_partition_view(position_in_partition_view::range_tombstone_tag_t(), end_bound()); -} - -void range_tombstone_accumulator::update_current_tombstone() { - _current_tombstone = boost::accumulate(_range_tombstones, _partition_tombstone, [] (tombstone t, const range_tombstone& rt) { - t.apply(rt.tomb); - return t; - }); -} - -void range_tombstone_accumulator::drop_unneeded_tombstones(const clustering_key_prefix& ck, int w) { - auto cmp = [&] (bound_view bv, const clustering_key_prefix& ck, int w) { - return _reversed ? _cmp(ck, w, bv.prefix, weight(bv.kind)) : _cmp(bv.prefix, weight(bv.kind), ck, w); - }; - while (!_range_tombstones.empty() && cmp(_range_tombstones.begin()->end_bound(), ck, w)) { - _range_tombstones.pop_front(); - } - update_current_tombstone(); -} - -void range_tombstone_accumulator::apply(range_tombstone rt) { - drop_unneeded_tombstones(rt.start, weight(rt.start_kind)); - _current_tombstone.apply(rt.tomb); - - auto cmp = [&] (const range_tombstone& rt1, const range_tombstone& rt2) { - return _reversed ? _cmp(rt2.end_bound(), rt1.end_bound()) : _cmp(rt1.end_bound(), rt2.end_bound()); - }; - _range_tombstones.insert(boost::upper_bound(_range_tombstones, rt, cmp), std::move(rt)); -} - -void range_tombstone_accumulator::clear() { - _range_tombstones.clear(); - _partition_tombstone = { }; - _current_tombstone = { }; -} diff --git a/scylla/range_tombstone.hh b/scylla/range_tombstone.hh deleted file mode 100644 index 9ab3338..0000000 --- a/scylla/range_tombstone.hh +++ /dev/null @@ -1,256 +0,0 @@ -/* - * Copyright (C) 2016 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include -#include -#include -#include "hashing.hh" -#include "keys.hh" -#include "tombstone.hh" -#include "clustering_bounds_comparator.hh" -#include "stdx.hh" -#include "position_in_partition.hh" - -namespace bi = boost::intrusive; - -/** - * Represents a ranged deletion operation. Can be empty. - */ -class range_tombstone final { - bi::set_member_hook> _link; -public: - clustering_key_prefix start; - bound_kind start_kind; - clustering_key_prefix end; - bound_kind end_kind; - tombstone tomb; - range_tombstone(clustering_key_prefix start, bound_kind start_kind, clustering_key_prefix end, bound_kind end_kind, tombstone tomb) - : start(std::move(start)) - , start_kind(start_kind) - , end(std::move(end)) - , end_kind(end_kind) - , tomb(std::move(tomb)) - { } - range_tombstone(bound_view start, bound_view end, tombstone tomb) - : range_tombstone(start.prefix, start.kind, end.prefix, end.kind, std::move(tomb)) - { } - range_tombstone(clustering_key_prefix&& start, clustering_key_prefix&& end, tombstone tomb) - : range_tombstone(std::move(start), bound_kind::incl_start, std::move(end), bound_kind::incl_end, std::move(tomb)) - { } - // IDL constructor - range_tombstone(clustering_key_prefix&& start, tombstone tomb, bound_kind start_kind, clustering_key_prefix&& end, bound_kind end_kind) - : range_tombstone(std::move(start), start_kind, std::move(end), end_kind, std::move(tomb)) - { } - range_tombstone(range_tombstone&& rt) noexcept - : range_tombstone(std::move(rt.start), rt.start_kind, std::move(rt.end), rt.end_kind, std::move(rt.tomb)) { - update_node(rt._link); - } - struct without_link { }; - range_tombstone(range_tombstone&& rt, without_link) noexcept - : range_tombstone(std::move(rt.start), rt.start_kind, std::move(rt.end), rt.end_kind, std::move(rt.tomb)) { - } - range_tombstone(const range_tombstone& rt) - : range_tombstone(rt.start, rt.start_kind, rt.end, rt.end_kind, rt.tomb) - { } - range_tombstone& operator=(range_tombstone&& rt) noexcept { - update_node(rt._link); - move_assign(std::move(rt)); - return *this; - } - range_tombstone& operator=(const range_tombstone& rt) { - start = rt.start; - start_kind = rt.start_kind; - end = rt.end; - end_kind = rt.end_kind; - tomb = rt.tomb; - return *this; - } - const bound_view start_bound() const { - return bound_view(start, start_kind); - } - const bound_view end_bound() const { - return bound_view(end, end_kind); - } - // Range tombstone covers all rows with positions p such that: position() <= p < end_position() - position_in_partition_view position() const; - position_in_partition_view end_position() const; - bool empty() const { - return !bool(tomb); - } - explicit operator bool() const { - return bool(tomb); - } - bool equal(const schema& s, const range_tombstone& other) const { - return tomb == other.tomb && start_bound().equal(s, other.start_bound()) && end_bound().equal(s, other.end_bound()); - } - struct compare { - bound_view::compare _c; - compare(const schema& s) : _c(s) {} - bool operator()(const range_tombstone& rt1, const range_tombstone& rt2) const { - return _c(rt1.start_bound(), rt2.start_bound()); - } - }; - template - void feed_hash(Hasher& h, const schema& s) const { - start.feed_hash(h, s); - // For backward compatibility, don't consider new fields if - // this could be an old-style, overlapping, range tombstone. - if (!start.equal(s, end) || start_kind != bound_kind::incl_start || end_kind != bound_kind::incl_end) { - ::feed_hash(h, start_kind); - end.feed_hash(h, s); - ::feed_hash(h, end_kind); - } - ::feed_hash(h, tomb); - } - friend void swap(range_tombstone& rt1, range_tombstone& rt2) { - range_tombstone tmp(std::move(rt2), without_link()); - rt2.move_assign(std::move(rt1)); - rt1.move_assign(std::move(tmp)); - } - friend std::ostream& operator<<(std::ostream& out, const range_tombstone& rt); - using container_type = bi::set>, &range_tombstone::_link>, - bi::compare, - bi::constant_time_size>; - - static bool is_single_clustering_row_tombstone(const schema& s, const clustering_key_prefix& start, - bound_kind start_kind, const clustering_key_prefix& end, bound_kind end_kind) - { - return start.is_full(s) && start_kind == bound_kind::incl_start - && end_kind == bound_kind::incl_end && start.equal(s, end); - } - - // Applies src to this. The tombstones may be overlapping. - // If the tombstone with larger timestamp has the smaller range the remainder - // is returned, it guaranteed not to overlap with this. - // The start bounds of this and src are required to be equal. The start bound - // of this is not changed. The start bound of the remainder (if there is any) - // is larger than the end bound of this. - stdx::optional apply(const schema& s, range_tombstone&& src); - - // Intersects the range of this tombstone with [pos, +inf) and replaces - // the range of the tombstone if there is an overlap. - // Returns true if there is an overlap. When returns false, the tombstone - // is not modified. - // - // pos must satisfy: - // 1) before_all_clustered_rows() <= pos - // 2) !pos.is_clustering_row() - because range_tombstone bounds can't represent such positions - bool trim_front(const schema& s, position_in_partition_view pos) { - position_in_partition::less_compare less(s); - if (!less(pos, end_position())) { - return false; - } - if (less(position(), pos)) { - bound_view new_start = pos.as_start_bound_view(); - start = new_start.prefix; - start_kind = new_start.kind; - } - return true; - } - - size_t external_memory_usage() const { - return start.external_memory_usage() + end.external_memory_usage(); - } - - size_t memory_usage() const { - return sizeof(range_tombstone) + external_memory_usage(); - } - - // Flips start and end bound so that range tombstone can be used in reversed - // streams. - void flip() { - std::swap(start, end); - std::swap(start_kind, end_kind); - start_kind = flip_bound_kind(start_kind); - end_kind = flip_bound_kind(end_kind); - } -private: - void move_assign(range_tombstone&& rt) { - start = std::move(rt.start); - start_kind = rt.start_kind; - end = std::move(rt.end); - end_kind = rt.end_kind; - tomb = std::move(rt.tomb); - } - void update_node(bi::set_member_hook>& other_link) { - if (other_link.is_linked()) { - // Move the link in case we're being relocated by LSA. - container_type::node_algorithms::replace_node(other_link.this_ptr(), _link.this_ptr()); - container_type::node_algorithms::init(other_link.this_ptr()); - } - } -}; - -// This is a helper intended for accumulating tombstones from a streamed -// mutation and determining what is the tombstone for a given clustering row. -// -// After apply(rt) or tombstone_for_row(ck) are called there are followng -// restrictions for subsequent calls: -// - apply(rt1) can be invoked only if rt.start_bound() < rt1.start_bound() -// and ck < rt1.start_bound() -// - tombstone_for_row(ck1) can be invoked only if rt.start_bound() < ck1 -// and ck < ck1 -// -// In other words position in partition of the mutation fragments passed to the -// accumulator must be increasing. -class range_tombstone_accumulator { - bound_view::compare _cmp; - tombstone _partition_tombstone; - std::deque _range_tombstones; - tombstone _current_tombstone; - bool _reversed; -private: - void update_current_tombstone(); - void drop_unneeded_tombstones(const clustering_key_prefix& ck, int w = 0); -public: - range_tombstone_accumulator(const schema& s, bool reversed) - : _cmp(s), _reversed(reversed) { } - - void set_partition_tombstone(tombstone t) { - _partition_tombstone = t; - update_current_tombstone(); - } - - tombstone get_partition_tombstone() const { - return _partition_tombstone; - } - - tombstone current_tombstone() const { - return _current_tombstone; - } - - tombstone tombstone_for_row(const clustering_key_prefix& ck) { - drop_unneeded_tombstones(ck); - return _current_tombstone; - } - - const std::deque& range_tombstones_for_row(const clustering_key_prefix& ck) { - drop_unneeded_tombstones(ck); - return _range_tombstones; - } - - void apply(range_tombstone rt); - - void clear(); -}; diff --git a/scylla/range_tombstone_list.cc b/scylla/range_tombstone_list.cc deleted file mode 100644 index 78eb115..0000000 --- a/scylla/range_tombstone_list.cc +++ /dev/null @@ -1,415 +0,0 @@ -/* - * Copyright (C) 2016 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include -#include "range_tombstone_list.hh" -#include "utils/allocation_strategy.hh" -#include "utils/to_boost_visitor.hh" - -range_tombstone_list::range_tombstone_list(const range_tombstone_list& x) - : _tombstones(x._tombstones.value_comp()) { - auto cloner = [] (const range_tombstone& x) { - return current_allocator().construct(x); - }; - _tombstones.clone_from(x._tombstones, cloner, current_deleter()); -} - -range_tombstone_list::~range_tombstone_list() { - _tombstones.clear_and_dispose(current_deleter()); -} - -void range_tombstone_list::apply_reversibly(const schema& s, - clustering_key_prefix start, bound_kind start_kind, - clustering_key_prefix end, - bound_kind end_kind, - tombstone tomb, - reverter& rev) -{ - if (!_tombstones.empty()) { - bound_view::compare less(s); - bound_view start_bound(start, start_kind); - auto last = --_tombstones.end(); - range_tombstones_type::iterator it; - if (less(start_bound, last->end_bound())) { - it = _tombstones.upper_bound(start_bound, [less](auto&& sb, auto&& rt) { - return less(sb, rt.end_bound()); - }); - } else { - it = _tombstones.end(); - } - insert_from(s, std::move(it), std::move(start), start_kind, std::move(end), end_kind, std::move(tomb), rev); - return; - } - auto rt = current_allocator().construct( - std::move(start), start_kind, std::move(end), end_kind, std::move(tomb)); - rev.insert(_tombstones.end(), *rt); -} - -/* - * Inserts a new element starting at the position pointed to by the iterator, it. - * This method assumes that: - * (it - 1)->end <= start < it->end - * - * A range tombstone list is a list of ranges [s_0, e_0]...[s_n, e_n] such that: - * - s_i is a start bound and e_i is a end bound - * - s_i < e_i - * - e_i <= s_i+1 - * Basically, ranges are ordered and non-overlapping. - */ -void range_tombstone_list::insert_from(const schema& s, - range_tombstones_type::iterator it, - clustering_key_prefix start, - bound_kind start_kind, - clustering_key_prefix end, - bound_kind end_kind, - tombstone tomb, - reverter& rev) -{ - bound_view::compare less(s); - bound_view end_bound(end, end_kind); - if (it != _tombstones.begin()) { - auto prev = std::prev(it); - if (prev->tomb == tomb && prev->end_bound().adjacent(s, bound_view(start, start_kind))) { - start = prev->start; - start_kind = prev->start_kind; - rev.erase(prev); - } - } - while (it != _tombstones.end()) { - bound_view start_bound(start, start_kind); - if (less(end_bound, start_bound)) { - return; - } - - if (less(end_bound, it->start_bound())) { - // not overlapping - if (it->tomb == tomb && end_bound.adjacent(s, it->start_bound())) { - rev.update(it, {std::move(start), start_kind, it->end, it->end_kind, tomb}); - } else { - auto rt = current_allocator().construct(std::move(start), start_kind, std::move(end), - end_kind, tomb); - rev.insert(it, *rt); - } - return; - } - - auto c = tomb.compare(it->tomb); - if (c == 0) { - // same timestamp, overlapping or adjacent, so merge. - if (less(it->start_bound(), start_bound)) { - start = it->start; - start_kind = it->start_kind; - } - if (less(end_bound, it->end_bound())) { - end = it->end; - end_kind = it->end_kind; - } - it = rev.erase(it); - } else if (c > 0) { - // We overwrite the current tombstone. - - if (less(it->start_bound(), start_bound)) { - auto new_end = bound_view(start, invert_kind(start_kind)); - if (!less(new_end, it->start_bound())) { - // Here it->start < start - auto rt = alloc_strategy_unique_ptr( - current_allocator().construct(it->start_bound(), new_end, it->tomb)); - rev.update(it, {start_bound, it->end_bound(), it->tomb}); - rev.insert(it, *rt.release()); - } - } - - if (less(end_bound, it->end_bound())) { - // Here start <= it->start and end < it->end. - auto rt = alloc_strategy_unique_ptr( - current_allocator().construct(std::move(start), start_kind, end, end_kind, std::move(tomb))); - rev.update(it, {std::move(end), invert_kind(end_kind), it->end, it->end_kind, it->tomb}); - rev.insert(it, *rt.release()); - return; - } - - // Here start <= it->start and end >= it->end. - it = rev.erase(it); - } else { - // We don't overwrite the current tombstone. - - if (less(start_bound, it->start_bound())) { - // The new tombstone starts before the current one. - if (less(it->start_bound(), end_bound)) { - // Here start < it->start and it->start < end. - auto new_end_kind = invert_kind(it->start_kind); - if (!less(bound_view(it->start, new_end_kind), start_bound)) { - auto rt = current_allocator().construct( - std::move(start), start_kind, it->start, new_end_kind, tomb); - it = rev.insert(it, *rt); - ++it; - } - } else { - // Here start < it->start and end <= it->start, so just insert the new tombstone. - auto rt = current_allocator().construct( - std::move(start), start_kind, std::move(end), end_kind, std::move(tomb)); - rev.insert(it, *rt); - return; - } - } - - if (less(it->end_bound(), end_bound)) { - // Here the current tombstone overwrites a range of the new one. - start = it->end; - start_kind = invert_kind(it->end_kind); - ++it; - } else { - // Here the current tombstone completely overwrites the new one. - return; - } - } - } - - // If we got here, then just insert the remainder at the end. - auto rt = current_allocator().construct( - std::move(start), start_kind, std::move(end), end_kind, std::move(tomb)); - rev.insert(it, *rt); -} - -range_tombstone_list::range_tombstones_type::iterator range_tombstone_list::find(const schema& s, const range_tombstone& rt) { - bound_view::compare less(s); - auto it = _tombstones.find(rt, [less](auto&& rt1, auto&& rt2) { - return less(rt1.end_bound(), rt2.end_bound()); - }); - - if (it != _tombstones.end() && it->equal(s, rt)) { - return it; - } - return _tombstones.end(); -} - -/* - * Returns the tombstone covering the specified key, or an empty tombstone otherwise. - */ -tombstone range_tombstone_list::search_tombstone_covering(const schema& s, const clustering_key_prefix& key) const { - bound_view::compare less(s); - auto it = _tombstones.upper_bound(key, [less](auto&& k, auto&& rt) { - return less(k, rt.end_bound()); - }); - - if (it == _tombstones.end() || less(key, it->start_bound())) { - return {}; - } - - return it->tomb; -} - -range_tombstone_list range_tombstone_list::difference(const schema& s, const range_tombstone_list& other) const { - range_tombstone_list diff(s); - bound_view::compare cmp_rt(s); - auto other_rt = other.begin(); - auto this_rt = begin(); - if (this_rt == end()) { - return diff; - } - bound_view cur_start = this_rt->start_bound(); - bound_view cur_end = this_rt->end_bound(); - auto advance_this_rt = [&] () { - if (++this_rt != end()) { - cur_start = this_rt->start_bound(); - cur_end = this_rt->end_bound(); - } - }; - while (this_rt != end() && other_rt != other.end()) { - if (cmp_rt(cur_end, other_rt->start_bound())) { - diff.apply(s, cur_start, cur_end, this_rt->tomb); - advance_this_rt(); - continue; - } - if (cmp_rt(other_rt->end_bound(), cur_start)) { - ++other_rt; - continue; - } - auto new_end = bound_view(other_rt->start_bound().prefix, invert_kind(other_rt->start_bound().kind)); - if (cmp_rt(cur_start, new_end)) { - diff.apply(s, cur_start, new_end, this_rt->tomb); - cur_start = other_rt->start_bound(); - } - if (cmp_rt(cur_end, other_rt->end_bound())) { - if (this_rt->tomb > other_rt->tomb) { - diff.apply(s, cur_start, cur_end, this_rt->tomb); - } - advance_this_rt(); - } else { - auto end = other_rt->end_bound(); - if (this_rt->tomb > other_rt->tomb) { - diff.apply(s, cur_start, end, this_rt->tomb); - } - cur_start = bound_view(end.prefix, invert_kind(end.kind)); - ++other_rt; - if (cmp_rt(cur_end, cur_start)) { - advance_this_rt(); - } - } - } - while (this_rt != end()) { - diff.apply(s, cur_start, cur_end, this_rt->tomb); - advance_this_rt(); - } - return diff; -} - -void range_tombstone_list::apply(const schema& s, const range_tombstone_list& rt_list) { - for (auto&& rt : rt_list) { - apply(s, rt); - } -} - -// See reversibly_mergeable.hh -range_tombstone_list::reverter range_tombstone_list::apply_reversibly(const schema& s, range_tombstone_list& rt_list) { - reverter rev(s, *this); - for (auto&& rt : rt_list) { - apply_reversibly(s, rt.start, rt.start_kind, rt.end, rt.end_kind, rt.tomb, rev); - } - return rev; -} - -boost::iterator_range -range_tombstone_list::slice(const schema& s, const query::clustering_range& r) const { - auto bv_range = bound_view::from_range(r); - struct order_by_end { - bound_view::compare less; - order_by_end(const schema& s) : less(s) {} - bool operator()(bound_view v, const range_tombstone& rt) const { return less(v, rt.end_bound()); } - bool operator()(const range_tombstone& rt, bound_view v) const { return less(rt.end_bound(), v); } - }; - struct order_by_start { - bound_view::compare less; - order_by_start(const schema& s) : less(s) {} - bool operator()(bound_view v, const range_tombstone& rt) const { return less(v, rt.start_bound()); } - bool operator()(const range_tombstone& rt, bound_view v) const { return less(rt.start_bound(), v); } - }; - return boost::make_iterator_range( - _tombstones.lower_bound(bv_range.first, order_by_end{s}), - _tombstones.upper_bound(bv_range.second, order_by_start{s})); -} - -boost::iterator_range -range_tombstone_list::slice(const schema& s, position_in_partition_view start, position_in_partition_view end) const { - struct order_by_end { - position_in_partition::less_compare less; - order_by_end(const schema& s) : less(s) {} - bool operator()(position_in_partition_view v, const range_tombstone& rt) const { return less(v, rt.end_position()); } - bool operator()(const range_tombstone& rt, position_in_partition_view v) const { return less(rt.end_position(), v); } - }; - struct order_by_start { - position_in_partition::less_compare less; - order_by_start(const schema& s) : less(s) {} - bool operator()(position_in_partition_view v, const range_tombstone& rt) const { return less(v, rt.position()); } - bool operator()(const range_tombstone& rt, position_in_partition_view v) const { return less(rt.position(), v); } - }; - return boost::make_iterator_range( - _tombstones.upper_bound(start, order_by_end{s}), // end_position() is exclusive, hence upper_bound() - _tombstones.lower_bound(end, order_by_start{s})); -} - -range_tombstone_list::iterator -range_tombstone_list::erase(const_iterator a, const_iterator b) { - return _tombstones.erase_and_dispose(a, b, current_deleter()); -} - -void range_tombstone_list::trim(const schema& s, const query::clustering_row_ranges& ranges) { - range_tombstone_list list(s); - bound_view::compare less(s); - for (auto&& range : ranges) { - auto start = bound_view::from_range_start(range); - auto end = bound_view::from_range_end(range); - for (const range_tombstone& rt : slice(s, range)) { - list.apply(s, range_tombstone( - std::max(rt.start_bound(), start, less), - std::min(rt.end_bound(), end, less), - rt.tomb)); - } - } - *this = std::move(list); -} - -range_tombstone_list::range_tombstones_type::iterator -range_tombstone_list::reverter::insert(range_tombstones_type::iterator it, range_tombstone& new_rt) { - _ops.emplace_back(insert_undo_op(new_rt)); - return _dst._tombstones.insert_before(it, new_rt); -} - -range_tombstone_list::range_tombstones_type::iterator -range_tombstone_list::reverter::erase(range_tombstones_type::iterator it) { - _ops.emplace_back(erase_undo_op(*it)); - return _dst._tombstones.erase(it); -} - -void range_tombstone_list::reverter::update(range_tombstones_type::iterator it, range_tombstone&& new_rt) { - _ops.reserve(_ops.size() + 1); - swap(*it, new_rt); - _ops.emplace_back(update_undo_op(std::move(new_rt), *it)); -} - -void range_tombstone_list::reverter::revert() noexcept { - for (auto&& rt : _ops | boost::adaptors::reversed) { - boost::apply_visitor(to_boost_visitor([this] (auto& op) { - op.undo(_s, _dst); - }), rt); - } - cancel(); -} - -range_tombstone_list::range_tombstones_type::iterator -range_tombstone_list::nop_reverter::insert(range_tombstones_type::iterator it, range_tombstone& new_rt) { - return _dst._tombstones.insert_before(it, new_rt); -} - -range_tombstone_list::range_tombstones_type::iterator -range_tombstone_list::nop_reverter::erase(range_tombstones_type::iterator it) { - return _dst._tombstones.erase_and_dispose(it, alloc_strategy_deleter()); -} - -void range_tombstone_list::nop_reverter::update(range_tombstones_type::iterator it, range_tombstone&& new_rt) { - *it = std::move(new_rt); -} - -void range_tombstone_list::insert_undo_op::undo(const schema& s, range_tombstone_list& rt_list) noexcept { - auto it = rt_list.find(s, _new_rt); - assert (it != rt_list.end()); - rt_list._tombstones.erase_and_dispose(it, current_deleter()); -} - -void range_tombstone_list::erase_undo_op::undo(const schema& s, range_tombstone_list& rt_list) noexcept { - rt_list._tombstones.insert(*_rt.release()); -} - -void range_tombstone_list::update_undo_op::undo(const schema& s, range_tombstone_list& rt_list) noexcept { - auto it = rt_list.find(s, _new_rt); - assert (it != rt_list.end()); - *it = std::move(_old_rt); -} - -std::ostream& operator<<(std::ostream& out, const range_tombstone_list& list) { - return out << "{" << ::join(", ", list) << "}"; -} - -bool range_tombstone_list::equal(const schema& s, const range_tombstone_list& other) const { - return boost::equal(_tombstones, other._tombstones, [&s] (auto&& rt1, auto&& rt2) { - return rt1.equal(s, rt2); - }); -} diff --git a/scylla/range_tombstone_list.hh b/scylla/range_tombstone_list.hh deleted file mode 100644 index 646e30e..0000000 --- a/scylla/range_tombstone_list.hh +++ /dev/null @@ -1,180 +0,0 @@ -/* - * Copyright (C) 2016 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "range_tombstone.hh" -#include "query-request.hh" -#include "position_in_partition.hh" -#include - -class range_tombstone_list final { - using range_tombstones_type = range_tombstone::container_type; - class insert_undo_op { - const range_tombstone& _new_rt; - public: - insert_undo_op(const range_tombstone& new_rt) - : _new_rt(new_rt) { } - void undo(const schema& s, range_tombstone_list& rt_list) noexcept; - }; - class erase_undo_op { - alloc_strategy_unique_ptr _rt; - public: - erase_undo_op(range_tombstone& rt) - : _rt(&rt) { } - void undo(const schema& s, range_tombstone_list& rt_list) noexcept; - }; - class update_undo_op { - range_tombstone _old_rt; - const range_tombstone& _new_rt; - public: - update_undo_op(range_tombstone&& old_rt, const range_tombstone& new_rt) - : _old_rt(std::move(old_rt)), _new_rt(new_rt) { } - void undo(const schema& s, range_tombstone_list& rt_list) noexcept; - }; - class reverter { - private: - using op = boost::variant; - std::vector _ops; - const schema& _s; - protected: - range_tombstone_list& _dst; - public: - reverter(const schema& s, range_tombstone_list& dst) - : _s(s) - , _dst(dst) { } - virtual ~reverter() { - revert(); - } - reverter(reverter&&) = default; - reverter& operator=(reverter&&) = default; - reverter(const reverter&) = delete; - reverter& operator=(reverter&) = delete; - virtual range_tombstones_type::iterator insert(range_tombstones_type::iterator it, range_tombstone& new_rt); - virtual range_tombstones_type::iterator erase(range_tombstones_type::iterator it); - virtual void update(range_tombstones_type::iterator it, range_tombstone&& new_rt); - void revert() noexcept; - void cancel() noexcept { - _ops.clear(); - } - }; - class nop_reverter : public reverter { - public: - nop_reverter(const schema& s, range_tombstone_list& rt_list) - : reverter(s, rt_list) { } - virtual range_tombstones_type::iterator insert(range_tombstones_type::iterator it, range_tombstone& new_rt) override; - virtual range_tombstones_type::iterator erase(range_tombstones_type::iterator it) override; - virtual void update(range_tombstones_type::iterator it, range_tombstone&& new_rt) override; - }; -private: - range_tombstones_type _tombstones; -public: - // ForwardIterator - using iterator = range_tombstones_type::iterator; - using const_iterator = range_tombstones_type::const_iterator; - - struct copy_comparator_only { }; - range_tombstone_list(const schema& s) - : _tombstones(range_tombstone::compare(s)) - { } - range_tombstone_list(const range_tombstone_list& x, copy_comparator_only) - : _tombstones(x._tombstones.key_comp()) - { } - range_tombstone_list(const range_tombstone_list&); - range_tombstone_list& operator=(range_tombstone_list&) = delete; - range_tombstone_list(range_tombstone_list&&) = default; - range_tombstone_list& operator=(range_tombstone_list&&) = default; - ~range_tombstone_list(); - size_t size() const { - return _tombstones.size(); - } - bool empty() const { - return _tombstones.empty(); - } - range_tombstones_type& tombstones() { - return _tombstones; - } - auto begin() { - return _tombstones.begin(); - } - auto begin() const { - return _tombstones.begin(); - } - auto end() { - return _tombstones.end(); - } - auto end() const { - return _tombstones.end(); - } - void apply(const schema& s, const bound_view& start_bound, const bound_view& end_bound, tombstone tomb) { - apply(s, start_bound.prefix, start_bound.kind, end_bound.prefix, end_bound.kind, std::move(tomb)); - } - void apply(const schema& s, const range_tombstone& rt) { - apply(s, rt.start, rt.start_kind, rt.end, rt.end_kind, rt.tomb); - } - void apply(const schema& s, range_tombstone&& rt) { - apply(s, std::move(rt.start), rt.start_kind, std::move(rt.end), rt.end_kind, std::move(rt.tomb)); - } - void apply(const schema& s, clustering_key_prefix start, bound_kind start_kind, - clustering_key_prefix end, bound_kind end_kind, tombstone tomb) { - nop_reverter rev(s, *this); - apply_reversibly(s, std::move(start), start_kind, std::move(end), end_kind, std::move(tomb), rev); - } - tombstone search_tombstone_covering(const schema& s, const clustering_key_prefix& key) const; - // Returns range of tombstones which overlap with given range - boost::iterator_range slice(const schema& s, const query::clustering_range&) const; - // Returns range tombstones which overlap with [start, end) - boost::iterator_range slice(const schema& s, position_in_partition_view start, position_in_partition_view end) const; - iterator erase(const_iterator, const_iterator); - // Ensures that every range tombstone is strictly contained within given clustering ranges. - // Preserves all information which may be relevant for rows from that ranges. - void trim(const schema& s, const query::clustering_row_ranges&); - range_tombstone_list difference(const schema& s, const range_tombstone_list& rt_list) const; - // Erases the range tombstones for which filter returns true. - template - void erase_where(Pred filter) { - static_assert(std::is_same>::value, - "bad Pred signature"); - auto it = begin(); - while (it != end()) { - if (filter(*it)) { - it = _tombstones.erase_and_dispose(it, current_deleter()); - } else { - ++it; - } - } - } - void clear() { - _tombstones.clear_and_dispose(current_deleter()); - } - void apply(const schema& s, const range_tombstone_list& rt_list); - // See reversibly_mergeable.hh - reverter apply_reversibly(const schema& s, range_tombstone_list& rt_list); - - friend std::ostream& operator<<(std::ostream& out, const range_tombstone_list&); - bool equal(const schema&, const range_tombstone_list&) const; -private: - void apply_reversibly(const schema& s, clustering_key_prefix start, bound_kind start_kind, - clustering_key_prefix end, bound_kind end_kind, tombstone tomb, reverter& rev); - void insert_from(const schema& s, range_tombstones_type::iterator it, clustering_key_prefix start, - bound_kind start_kind, clustering_key_prefix end, bound_kind end_kind, tombstone tomb, reverter& rev); - range_tombstones_type::iterator find(const schema& s, const range_tombstone& rt); -}; diff --git a/scylla/read_context.hh b/scylla/read_context.hh deleted file mode 100644 index e22027a..0000000 --- a/scylla/read_context.hh +++ /dev/null @@ -1,243 +0,0 @@ -/* - * Copyright (C) 2017 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "schema.hh" -#include "query-request.hh" -#include "streamed_mutation.hh" -#include "partition_version.hh" -#include "tracing/tracing.hh" -#include "row_cache.hh" - -namespace cache { - -/* - * Represent a reader to the underlying source. - * This reader automatically makes sure that it's up to date with all cache updates - */ -class autoupdating_underlying_reader final { - row_cache& _cache; - read_context& _read_context; - stdx::optional _reader; - utils::phased_barrier::phase_type _reader_creation_phase; - dht::partition_range _range = { }; - stdx::optional _last_key; - stdx::optional _new_last_key; -public: - autoupdating_underlying_reader(row_cache& cache, read_context& context) - : _cache(cache) - , _read_context(context) - { } - // Reads next partition without changing mutation source snapshot. - future read_next_same_phase() { - _last_key = std::move(_new_last_key); - return (*_reader)().then([this] (auto&& smopt) { - if (smopt) { - _new_last_key = smopt->decorated_key(); - } - return std::move(smopt); - }); - } - future operator()() { - _last_key = std::move(_new_last_key); - auto start = population_range_start(); - auto phase = _cache.phase_of(start); - if (!_reader || _reader_creation_phase != phase) { - if (_last_key) { - auto cmp = dht::ring_position_comparator(*_cache._schema); - auto&& new_range = _range.split_after(*_last_key, cmp); - if (!new_range) { - return make_ready_future(streamed_mutation_opt()); - } - _range = std::move(*new_range); - _last_key = {}; - } - if (_reader) { - ++_cache._tracker._stats.underlying_recreations; - } - auto& snap = _cache.snapshot_for_phase(phase); - _reader = {}; // See issue #2644 - _reader = _cache.create_underlying_reader(_read_context, snap, _range); - _reader_creation_phase = phase; - } - return (*_reader)().then([this] (auto&& smopt) { - if (smopt) { - _new_last_key = smopt->decorated_key(); - } - return std::move(smopt); - }); - } - future<> fast_forward_to(dht::partition_range&& range) { - auto snapshot_and_phase = _cache.snapshot_of(dht::ring_position_view::for_range_start(_range)); - return fast_forward_to(std::move(range), snapshot_and_phase.snapshot, snapshot_and_phase.phase); - } - future<> fast_forward_to(dht::partition_range&& range, mutation_source& snapshot, row_cache::phase_type phase) { - _range = std::move(range); - _last_key = { }; - _new_last_key = { }; - if (_reader) { - if (_reader_creation_phase == phase) { - ++_cache._tracker._stats.underlying_partition_skips; - return _reader->fast_forward_to(_range); - } else { - ++_cache._tracker._stats.underlying_recreations; - _reader = {}; // See issue #2644 - } - } - _reader = _cache.create_underlying_reader(_read_context, snapshot, _range); - _reader_creation_phase = phase; - return make_ready_future<>(); - } - utils::phased_barrier::phase_type creation_phase() const { - assert(_reader); - return _reader_creation_phase; - } - const dht::partition_range& range() const { - return _range; - } - dht::ring_position_view population_range_start() const { - return _last_key ? dht::ring_position_view::for_after_key(*_last_key) - : dht::ring_position_view::for_range_start(_range); - } -}; - -class read_context final : public enable_lw_shared_from_this { - row_cache& _cache; - schema_ptr _schema; - const dht::partition_range& _range; - const query::partition_slice& _slice; - const io_priority_class& _pc; - tracing::trace_state_ptr _trace_state; - streamed_mutation::forwarding _fwd; - mutation_reader::forwarding _fwd_mr; - bool _range_query; - autoupdating_underlying_reader _underlying; - uint64_t _underlying_created = 0; - - // When reader enters a partition, it must be set up for reading that - // partition from the underlying mutation source (_sm) in one of two ways: - // - // 1) either _underlying is already in that partition, then _sm is set to the - // stream obtained from it. - // - // 2) _underlying is before the partition, then _underlying_snapshot and _key - // are set so that _sm can be created on demand. - // - streamed_mutation_opt _sm; - mutation_source_opt _underlying_snapshot; - dht::partition_range _sm_range; - stdx::optional _key; - row_cache::phase_type _phase; -public: - read_context(row_cache& cache, - schema_ptr schema, - const dht::partition_range& range, - const query::partition_slice& slice, - const io_priority_class& pc, - tracing::trace_state_ptr trace_state, - streamed_mutation::forwarding fwd, - mutation_reader::forwarding fwd_mr) - : _cache(cache) - , _schema(std::move(schema)) - , _range(range) - , _slice(slice) - , _pc(pc) - , _trace_state(std::move(trace_state)) - , _fwd(fwd) - , _fwd_mr(fwd_mr) - , _range_query(!range.is_singular() || !range.start()->value().has_key()) - , _underlying(_cache, *this) - { - ++_cache._tracker._stats.reads; - } - ~read_context() { - ++_cache._tracker._stats.reads_done; - if (_underlying_created) { - _cache._stats.reads_with_misses.mark(); - ++_cache._tracker._stats.reads_with_misses; - } else { - _cache._stats.reads_with_no_misses.mark(); - } - } - read_context(const read_context&) = delete; - row_cache& cache() { return _cache; } - const schema_ptr& schema() const { return _schema; } - const dht::partition_range& range() const { return _range; } - const query::partition_slice& slice() const { return _slice; } - const io_priority_class& pc() const { return _pc; } - tracing::trace_state_ptr trace_state() const { return _trace_state; } - streamed_mutation::forwarding fwd() const { return _fwd; } - mutation_reader::forwarding fwd_mr() const { return _fwd_mr; } - bool is_range_query() const { return _range_query; } - autoupdating_underlying_reader& underlying() { return _underlying; } - row_cache::phase_type phase() const { return _phase; } - const dht::decorated_key& key() const { return _sm->decorated_key(); } - void on_underlying_created() { ++_underlying_created; } -private: - future<> create_sm(); - future<> ensure_sm_created() { - if (_sm) { - return make_ready_future<>(); - } - return create_sm(); - } -public: - // Prepares the underlying streamed_mutation to represent dk in given snapshot. - // Partitions must be entered with strictly monotonic keys. - // The key must be after the current range of the underlying() reader. - // The phase argument must match the snapshot's phase. - void enter_partition(const dht::decorated_key& dk, mutation_source& snapshot, row_cache::phase_type phase) { - _phase = phase; - _sm = {}; - _underlying_snapshot = snapshot; - _key = dk; - } - // Prepares the underlying streamed_mutation to be sm. - // The phase argument must match the phase of the snapshot used to obtain sm. - void enter_partition(streamed_mutation&& sm, row_cache::phase_type phase) { - _phase = phase; - _sm = std::move(sm); - _underlying_snapshot = {}; - } - // Fast forwards the underlying streamed_mutation to given range. - future<> fast_forward_to(position_range range) { - return ensure_sm_created().then([this, range = std::move(range)] () mutable { - ++_cache._tracker._stats.underlying_row_skips; - return _sm->fast_forward_to(std::move(range)); - }); - } - // Returns the underlying streamed_mutation. - // The caller has to ensure that the streamed mutation was already created - // (e.g. the most recent call to enter_partition(const dht::decorated_key&, ...) - // was followed by a call to fast_forward_to()). - streamed_mutation& get_streamed_mutation() noexcept { - return *_sm; - } - // Gets the next fragment from the underlying streamed_mutation - future get_next_fragment() { - return ensure_sm_created().then([this] { - return (*_sm)(); - }); - } -}; - -} diff --git a/scylla/release.cc b/scylla/release.cc deleted file mode 100644 index ecd159a..0000000 --- a/scylla/release.cc +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "version.hh" - -#include - -static const char scylla_version_str[] = SCYLLA_VERSION; -static const char scylla_release_str[] = SCYLLA_RELEASE; - -std::string scylla_version() -{ - return sprint("%s-%s", scylla_version_str, scylla_release_str); -} - -// get the version number into writeable memory, so we can grep for it if we get a core dump -std::string version_stamp_for_core - = "VERSION VERSION VERSION $Id: " + scylla_version() + " $ VERSION VERSION VERSION"; diff --git a/scylla/release.hh b/scylla/release.hh deleted file mode 100644 index a6482eb..0000000 --- a/scylla/release.hh +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include - -std::string scylla_version(); diff --git a/scylla/repair/range_split.hh b/scylla/repair/range_split.hh deleted file mode 100644 index f660e80..0000000 --- a/scylla/repair/range_split.hh +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright (C) 2017 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include - -#include "dht/i_partitioner.hh" - -// range_splitter(r, N, K) is a helper for splitting a given token_range r of -// estimated size N into many small ranges of size K, and later iterating -// over those small ranges once with the has_next() and next() methods. -// This implementation assumes only the availability of a range::midpoint() -// operation, and as result creates ranges with size between K/2 and K. -// Moreover, it has memory requirement log(N). With more general arithmetic -// support over tokens, we could get exactly K and O(1) memory. -class range_splitter { - std::stack> _stack; - uint64_t _desired; -public: - range_splitter(::dht::token_range r, uint64_t N, uint64_t K) { - _stack.push({r, N}); - _desired = K; - } - bool has_next() const { - return !_stack.empty(); - } - ::dht::token_range next() { - // If the head range's estimated size is small enough, return it. - // Otherwise split it to two halves, push the second half on the - // stack, and repeat with the first half. May need to do this more - // than once (up to log(N/K) times) until we have one range small - // enough to return. - assert(!_stack.empty()); - auto range = _stack.top().first; - auto size = _stack.top().second; - _stack.pop(); - while (size > _desired) { - // The use of minimum_token() here twice is not a typo - because wrap- - // around token ranges are supported by midpoint(), the beyond-maximum - // token can also be represented by minimum_token(). - auto midpoint = dht::global_partitioner().midpoint( - range.start() ? range.start()->value() : dht::minimum_token(), - range.end() ? range.end()->value() : dht::minimum_token()); - // This shouldn't happen, but if the range included just one token, we - // can't split further (split() may actually fail with assertion failure) - if ((range.start() && midpoint == range.start()->value()) || - (range.end() && midpoint == range.end()->value())) { - return range; - } - auto halves = range.split(midpoint, dht::token_comparator()); - _stack.push({halves.second, size / 2.0}); - range = halves.first; - size /= 2.0; - } - return range; - } -}; diff --git a/scylla/repair/repair.cc b/scylla/repair/repair.cc deleted file mode 100644 index 5f38ba9..0000000 --- a/scylla/repair/repair.cc +++ /dev/null @@ -1,1177 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "repair.hh" -#include "range_split.hh" - -#include "streaming/stream_plan.hh" -#include "streaming/stream_state.hh" -#include "gms/inet_address.hh" -#include "db/config.hh" -#include "service/storage_service.hh" -#include "service/priority_manager.hh" -#include "message/messaging_service.hh" - -#include -#include -#include -#include -#include - -#include -#include - -static logging::logger rlogger("repair"); - -class repair_info { -public: - seastar::sharded& db; - sstring keyspace; - dht::token_range_vector ranges; - std::vector cfs; - int id; - shard_id shard; - std::vector data_centers; - std::vector hosts; - size_t nr_failed_ranges = 0; - // Map of peer -> - std::unordered_map> ranges_need_repair_in; - std::unordered_map> ranges_need_repair_out; - // FIXME: this "100" needs to be a parameter. - uint64_t target_partitions = 100; - // This affects how many ranges we put in a stream plan. The more the more - // memory we use to store the ranges in memory. However, it can reduce the - // total number of stream_plan we use for the repair. - size_t sub_ranges_to_stream = 10 * 1024; - size_t sp_index = 0; - size_t current_sub_ranges_nr_in = 0; - size_t current_sub_ranges_nr_out = 0; - int ranges_index = 0; - // Only allow one stream_plan in flight - semaphore sp_parallelism_semaphore{1}; -public: - repair_info(seastar::sharded& db_, - const sstring& keyspace_, - const dht::token_range_vector& ranges_, - const std::vector& cfs_, - int id_, - const std::vector& data_centers_, - const std::vector& hosts_) - : db(db_) - , keyspace(keyspace_) - , ranges(ranges_) - , cfs(cfs_) - , id(id_) - , shard(engine().cpu_id()) - , data_centers(data_centers_) - , hosts(hosts_) { - } - future<> do_streaming() { - size_t ranges_in = 0; - size_t ranges_out = 0; - auto sp_in = make_lw_shared(sprint("repair-in-id-%d-shard-%d-index-%d", id, shard, sp_index)); - auto sp_out = make_lw_shared(sprint("repair-out-id-%d-shard-%d-index-%d", id, shard, sp_index)); - - for (auto& x : ranges_need_repair_in) { - auto& peer = x.first; - for (auto& y : x.second) { - auto& cf = y.first; - auto& stream_ranges = y.second; - ranges_in += stream_ranges.size(); - sp_in->request_ranges(peer, keyspace, std::move(stream_ranges), {cf}); - } - } - ranges_need_repair_in.clear(); - current_sub_ranges_nr_in = 0; - - for (auto& x : ranges_need_repair_out) { - auto& peer = x.first; - for (auto& y : x.second) { - auto& cf = y.first; - auto& stream_ranges = y.second; - ranges_out += stream_ranges.size(); - sp_out->transfer_ranges(peer, keyspace, std::move(stream_ranges), {cf}); - } - } - ranges_need_repair_out.clear(); - current_sub_ranges_nr_out = 0; - - if (ranges_in || ranges_out) { - rlogger.info("Start streaming for repair id={}, shard={}, index={}, ranges_in={}, ranges_out={}", id, shard, sp_index, ranges_in, ranges_out); - } - sp_index++; - - return sp_in->execute().discard_result().then([sp_in, sp_out] { - return sp_out->execute().discard_result(); - }).handle_exception([] (auto ep) { - rlogger.warn("repair's stream failed: {}", ep); - return make_exception_future(ep); - }); - } - void check_failed_ranges() { - if (nr_failed_ranges) { - rlogger.info("repair {} on shard {} failed - {} ranges failed", id, shard, nr_failed_ranges); - throw std::runtime_error(sprint("repair %d on shard %d failed to do checksum for %d sub ranges", id, shard, nr_failed_ranges)); - } else { - rlogger.info("repair {} on shard {} completed successfully", id, shard); - } - } - future<> request_transfer_ranges(const sstring& cf, - const ::dht::token_range& range, - const std::vector& neighbors_in, - const std::vector& neighbors_out) { - rlogger.debug("Add cf {}, range {}, current_sub_ranges_nr_in {}, current_sub_ranges_nr_out {}", cf, range, current_sub_ranges_nr_in, current_sub_ranges_nr_out); - return sp_parallelism_semaphore.wait(1).then([this, cf, range, neighbors_in, neighbors_out] { - for (const auto& peer : neighbors_in) { - ranges_need_repair_in[peer][cf].emplace_back(range); - current_sub_ranges_nr_in++; - } - for (const auto& peer : neighbors_out) { - ranges_need_repair_out[peer][cf].emplace_back(range); - current_sub_ranges_nr_out++; - } - if (current_sub_ranges_nr_in >= sub_ranges_to_stream || current_sub_ranges_nr_out >= sub_ranges_to_stream) { - return do_streaming(); - } - return make_ready_future<>(); - }).finally([this] { - sp_parallelism_semaphore.signal(1); - }); - } -}; - -template -inline -static std::ostream& operator<<(std::ostream& os, const std::unordered_map& v) { - bool first = true; - os << "{"; - for (auto&& elem : v) { - if (!first) { - os << ", "; - } else { - first = false; - } - os << elem.first << "=" << elem.second; - } - os << "}"; - return os; -} - -static std::vector list_column_families(const database& db, const sstring& keyspace) { - std::vector ret; - for (auto &&e : db.get_column_families_mapping()) { - if (e.first.first == keyspace) { - ret.push_back(e.first.second); - } - } - return ret; -} - -template -void remove_item(Collection& c, T& item) { - auto it = std::find(c.begin(), c.end(), item); - if (it != c.end()) { - c.erase(it); - } -} - -// Return all of the neighbors with whom we share the provided range. -static std::vector get_neighbors(database& db, - const sstring& ksname, query::range range, - const std::vector& data_centers, - const std::vector& hosts) { - - keyspace& ks = db.find_keyspace(ksname); - auto& rs = ks.get_replication_strategy(); - - dht::token tok = range.end() ? range.end()->value() : dht::maximum_token(); - auto ret = rs.get_natural_endpoints(tok); - remove_item(ret, utils::fb_utilities::get_broadcast_address()); - - if (!data_centers.empty()) { - auto dc_endpoints_map = service::get_local_storage_service().get_token_metadata().get_topology().get_datacenter_endpoints(); - std::unordered_set dc_endpoints; - for (const sstring& dc : data_centers) { - auto it = dc_endpoints_map.find(dc); - if (it == dc_endpoints_map.end()) { - std::vector dcs; - for (const auto& e : dc_endpoints_map) { - dcs.push_back(e.first); - } - throw std::runtime_error(sprint("Unknown data center '%s'. " - "Known data centers: %s", dc, dcs)); - } - for (const auto& endpoint : it->second) { - dc_endpoints.insert(endpoint); - } - } - // We require, like Cassandra does, that the current host must also - // be part of the repair - if (!dc_endpoints.count(utils::fb_utilities::get_broadcast_address())) { - throw std::runtime_error("The current host must be part of the repair"); - } - // The resulting list of nodes is the intersection of the nodes in the - // listed data centers, and the (range-dependent) list of neighbors. - std::unordered_set neighbor_set(ret.begin(), ret.end()); - ret.clear(); - for (const auto& endpoint : dc_endpoints) { - if (neighbor_set.count(endpoint)) { - ret.push_back(endpoint); - } - } - } else if (!hosts.empty()) { - bool found_me = false; - std::unordered_set neighbor_set(ret.begin(), ret.end()); - ret.clear(); - for (const sstring& host : hosts) { - gms::inet_address endpoint; - try { - endpoint = gms::inet_address(host); - } catch(...) { - throw std::runtime_error(sprint("Unknown host specified: %s", host)); - } - if (endpoint == utils::fb_utilities::get_broadcast_address()) { - found_me = true; - } else if (neighbor_set.count(endpoint)) { - ret.push_back(endpoint); - // If same host is listed twice, don't add it again later - neighbor_set.erase(endpoint); - } - // Nodes which aren't neighbors for this range are ignored. - // This allows the user to give a list of "good" nodes, where - // for each different range, only the subset of nodes actually - // holding a replica of the given range is used. This, - // however, means the user is never warned if one of the nodes - // on the list isn't even part of the cluster. - } - // We require, like Cassandra does, that the current host must also - // be listed on the "-hosts" option - even those we don't want it in - // the returned list: - if (!found_me) { - throw std::runtime_error("The current host must be part of the repair"); - } - if (ret.size() < 1) { - auto me = utils::fb_utilities::get_broadcast_address(); - auto others = rs.get_natural_endpoints(tok); - remove_item(others, me); - throw std::runtime_error(sprint("Repair requires at least two " - "endpoints that are neighbors before it can continue, " - "the endpoint used for this repair is %s, other " - "available neighbors are %s but these neighbors were not " - "part of the supplied list of hosts to use during the " - "repair (%s).", me, others, hosts)); - } - } - - return ret; - -#if 0 - // Origin's ActiveRepairService.getNeighbors() also verifies that the - // requested range fits into a local range - StorageService ss = StorageService.instance; - Map, List> replicaSets = ss.getRangeToAddressMap(keyspaceName); - Range rangeSuperSet = null; - for (Range range : ss.getLocalRanges(keyspaceName)) - { - if (range.contains(toRepair)) - { - rangeSuperSet = range; - break; - } - else if (range.intersects(toRepair)) - { - throw new IllegalArgumentException("Requested range intersects a local range but is not fully contained in one; this would lead to imprecise repair"); - } - } - if (rangeSuperSet == null || !replicaSets.containsKey(rangeSuperSet)) - return Collections.emptySet(); - -#endif -} - -// The repair_tracker tracks ongoing repair operations and their progress. -// A repair which has already finished successfully is dropped from this -// table, but a failed repair will remain in the table forever so it can -// be queried about more than once (FIXME: reconsider this. But note that -// failed repairs should be rare anwyay). -// This object is not thread safe, and must be used by only one cpu. -class tracker { -private: - // Each repair_start() call returns a unique int which the user can later - // use to follow the status of this repair with repair_status(). - // We can't use the number 0 - if repair_start() returns 0, it means it - // decide quickly that there is nothing to repair. - int _next_repair_command = 1; - // Note that there are no "SUCCESSFUL" entries in the "status" map: - // Successfully-finished repairs are those with id < _next_repair_command - // but aren't listed as running or failed the status map. - std::unordered_map _status; - // Used to allow shutting down repairs in progress, and waiting for them. - seastar::gate _gate; - // Set when the repair service is being shutdown - std::atomic_bool _shutdown alignas(64); -public: - tracker() : _shutdown(false) { - } - void start(int id) { - _gate.enter(); - _status[id] = repair_status::RUNNING; - } - void done(int id, bool succeeded) { - if (succeeded) { - _status.erase(id); - } else { - _status[id] = repair_status::FAILED; - } - _gate.leave(); - } - repair_status get(int id) { - if (id >= _next_repair_command) { - throw std::runtime_error(sprint("unknown repair id %d", id)); - } - auto it = _status.find(id); - if (it == _status.end()) { - return repair_status::SUCCESSFUL; - } else { - return it->second; - } - } - int next_repair_command() { - return _next_repair_command++; - } - future<> shutdown() { - _shutdown.store(true, std::memory_order_relaxed); - return _gate.close(); - } - void check_in_shutdown() { - if (_shutdown.load(std::memory_order_relaxed)) { - throw std::runtime_error(sprint("Repair service is being shutdown")); - } - } -}; - -static tracker repair_tracker; - -static void check_in_shutdown() { - repair_tracker.check_in_shutdown(); -} - -class sha256_hasher { - CryptoPP::SHA256 hash{}; -public: - void update(const char* ptr, size_t length) { - static_assert(sizeof(char) == sizeof(byte), "Assuming lengths will be the same"); - hash.Update(reinterpret_cast(ptr), length * sizeof(byte)); - } - - void finalize(std::array& digest) { - static_assert(CryptoPP::SHA256::DIGESTSIZE == std::tuple_size>::value * sizeof(digest[0]), - "digest size"); - hash.Final(reinterpret_cast(digest.data())); - } -}; - -future partition_checksum::compute_legacy(streamed_mutation m) -{ - return mutation_from_streamed_mutation(std::move(m)).then([] (auto mopt) { - assert(mopt); - std::array digest; - sha256_hasher h; - feed_hash(h, *mopt); - h.finalize(digest); - return partition_checksum(digest); - }); -} - -future partition_checksum::compute_streamed(streamed_mutation m) -{ - auto& s = *m.schema(); - auto h = make_lw_shared(); - m.key().feed_hash(*h, s); - return do_with(std::move(m), [&s, h] (auto& sm) mutable { - mutation_hasher mh(s, *h); - return consume(sm, std::move(mh)).then([ h ] { - std::array digest; - h->finalize(digest); - return partition_checksum(digest); - }); - }); -} - -future partition_checksum::compute(streamed_mutation m, repair_checksum hash_version) -{ - switch (hash_version) { - case repair_checksum::legacy: return compute_legacy(std::move(m)); - case repair_checksum::streamed: return compute_streamed(std::move(m)); - default: throw std::runtime_error(sprint("Unknown hash version: %d", static_cast(hash_version))); - } -} - -static inline unaligned& qword(std::array& b, int n) { - return *unaligned_cast(b.data() + 8 * n); -} -static inline const unaligned& qword(const std::array& b, int n) { - return *unaligned_cast(b.data() + 8 * n); -} - -void partition_checksum::add(const partition_checksum& other) { - static_assert(std::tuple_size::value == 32, "digest size"); - // Hopefully the following trickery is faster than XOR'ing 32 separate bytes - qword(_digest, 0) = qword(_digest, 0) ^ qword(other._digest, 0); - qword(_digest, 1) = qword(_digest, 1) ^ qword(other._digest, 1); - qword(_digest, 2) = qword(_digest, 2) ^ qword(other._digest, 2); - qword(_digest, 3) = qword(_digest, 3) ^ qword(other._digest, 3); -} - -bool partition_checksum::operator==(const partition_checksum& other) const { - static_assert(std::tuple_size::value == 32, "digest size"); - return qword(_digest, 0) == qword(other._digest, 0) && - qword(_digest, 1) == qword(other._digest, 1) && - qword(_digest, 2) == qword(other._digest, 2) && - qword(_digest, 3) == qword(other._digest, 3); -} - -const std::array& partition_checksum::digest() const { - return _digest; -} - -std::ostream& operator<<(std::ostream& out, const partition_checksum& c) { - auto save_flags = out.flags(); - out << std::hex << std::setfill('0'); - for (auto b : c._digest) { - out << std::setw(2) << (unsigned int)b; - } - out.flags(save_flags); - return out; -} - -// Calculate the checksum of the data held *on this shard* of a column family, -// in the given token range. -// All parameters to this function are constant references, and the caller -// must ensure they live as long as the future returned by this function is -// not resolved. -// FIXME: Both master and slave will typically call this on consecutive ranges -// so it would be useful to have this code cache its stopping point or have -// some object live throughout the operation. Moreover, it makes sense to to -// vary the collection of sstables used throught a long repair. -static future checksum_range_shard(database &db, - const sstring& keyspace_name, const sstring& cf_name, - const dht::partition_range_vector& prs, repair_checksum hash_version) { - auto& cf = db.find_column_family(keyspace_name, cf_name); - auto reader = cf.make_streaming_reader(cf.schema(), prs); - return do_with(std::move(reader), partition_checksum(), - [hash_version] (auto& reader, auto& checksum) { - return repeat([&reader, &checksum, hash_version] () { - return reader().then([&checksum, hash_version] (auto mopt) { - if (mopt) { - return partition_checksum::compute(std::move(*mopt), hash_version).then([&checksum] (auto pc) { - checksum.add(pc); - return stop_iteration::no; - }); - } else { - return make_ready_future(stop_iteration::yes); - } - }); - }).then([&checksum] { - return checksum; - }); - }); -} - -// It is counter-productive to allow a large number of range checksum -// operations to proceed in parallel (on the same shard), because the read -// operation can already parallelize itself as much as needed, and doing -// multiple reads in parallel just adds a lot of memory overheads. -// So checksum_parallelism_semaphore is used to limit this parallelism, -// and should be set to 1, or another small number. -// -// Note that checksumming_parallelism_semaphore applies not just in the -// repair master, but also in the slave: The repair slave may receive many -// checksum requests in parallel, but will only work on one or a few -// (checksum_parallelism_semaphore) at once. -static thread_local semaphore checksum_parallelism_semaphore(2); - -// Calculate the checksum of the data held on all shards of a column family, -// in the given token range. -// In practice, we only need to consider one or two shards which intersect the -// given "range". This is because the token ring has nodes*vnodes tokens, -// dividing the token space into nodes*vnodes ranges, with "range" being one -// of those. This number is big (vnodes = 256 by default). At the same time, -// sharding divides the token space into relatively few large ranges, one per -// thread. -// Watch out: All parameters to this function are constant references, and the -// caller must ensure they live as line as the future returned by this -// function is not resolved. -future checksum_range(seastar::sharded &db, - const sstring& keyspace, const sstring& cf, - const ::dht::token_range& range, repair_checksum hash_version) { - auto& schema = db.local().find_column_family(keyspace, cf).schema(); - auto shard_ranges = dht::split_range_to_shards(dht::to_partition_range(range), *schema); - return do_with(partition_checksum(), std::move(shard_ranges), [&db, &keyspace, &cf, hash_version] (auto& result, auto& shard_ranges) { - return parallel_for_each(shard_ranges, [&db, &keyspace, &cf, &result, hash_version] (auto& shard_range) { - auto& shard = shard_range.first; - auto& prs = shard_range.second; - return db.invoke_on(shard, [keyspace, cf, prs = std::move(prs), hash_version] (database& db) mutable { - return do_with(std::move(keyspace), std::move(cf), std::move(prs), [&db, hash_version] (auto& keyspace, auto& cf, auto& prs) { - return seastar::with_semaphore(checksum_parallelism_semaphore, 1, [&db, hash_version, &keyspace, &cf, &prs] { - return checksum_range_shard(db, keyspace, cf, prs, hash_version); - }); - }); - }).then([&result] (partition_checksum sum) { - result.add(sum); - }); - }).then([&result] { - return make_ready_future(result); - }); - }); -} - -// parallelism_semaphore limits the number of parallel ongoing checksum -// comparisons. This could mean, for example, that this number of checksum -// requests have been sent to other nodes and we are waiting for them to -// return so we can compare those to our own checksums. This limit can be -// set fairly high because the outstanding comparisons take only few -// resources. In particular, we do NOT do this number of file reads in -// parallel because file reads have large memory overhads (read buffers, -// partitions, etc.) - the number of concurrent reads is further limited -// by an additional semaphore checksum_parallelism_semaphore (see above). -// -// FIXME: This would be better of in a repair service, or even a per-shard -// repair instance holding all repair state. However, since we are anyway -// considering ditching those semaphores for a more fine grained resource-based -// solution, let's do the simplest thing here and change it later -constexpr int parallelism = 100; -static thread_local semaphore parallelism_semaphore(parallelism); - -static future estimate_partitions(seastar::sharded& db, const sstring& keyspace, - const sstring& cf, const dht::token_range& range) { - return db.map_reduce0( - [keyspace, cf, range] (auto& db) { - // FIXME: column_family should have a method to estimate the number of - // partitions (and of course it should use cardinality estimation bitmaps, - // not trivial sum). We shouldn't have this ugly code here... - // FIXME: If sstables are shared, they will be accounted more than - // once. However, shared sstables should exist for a short-time only. - auto sstables = db.find_column_family(keyspace, cf).get_sstables(); - return boost::accumulate(*sstables, uint64_t(0), - [&range] (uint64_t x, auto&& sst) { return x + sst->estimated_keys_for_range(range); }); - }, - uint64_t(0), - std::plus() - ); -} - -// Repair a single cf in a single local range. -// Comparable to RepairJob in Origin. -static future<> repair_cf_range(repair_info& ri, - sstring cf, ::dht::token_range range, - const std::vector& neighbors) { - if (neighbors.empty()) { - // Nothing to do in this case... - return make_ready_future<>(); - } - - return estimate_partitions(ri.db, ri.keyspace, cf, range).then([&ri, cf, range, &neighbors] (uint64_t estimated_partitions) { - range_splitter ranges(range, estimated_partitions, ri.target_partitions); - return do_with(seastar::gate(), true, std::move(cf), std::move(ranges), - [&ri, &neighbors] (auto& completion, auto& success, const auto& cf, auto& ranges) { - return do_until([&ranges] () { return !ranges.has_next(); }, - [&ranges, &ri, &completion, &success, &neighbors, &cf] () { - auto range = ranges.next(); - check_in_shutdown(); - return parallelism_semaphore.wait(1).then([&ri, &completion, &success, &neighbors, &cf, range] { - auto checksum_type = service::get_local_storage_service().cluster_supports_large_partitions() - ? repair_checksum::streamed : repair_checksum::legacy; - - // Ask this node, and all neighbors, to calculate checksums in - // this range. When all are done, compare the results, and if - // there are any differences, sync the content of this range. - std::vector> checksums; - checksums.reserve(1 + neighbors.size()); - checksums.push_back(checksum_range(ri.db, ri.keyspace, cf, range, checksum_type)); - for (auto&& neighbor : neighbors) { - checksums.push_back( - netw::get_local_messaging_service().send_repair_checksum_range( - netw::msg_addr{neighbor}, ri.keyspace, cf, range, checksum_type)); - } - - completion.enter(); - when_all(checksums.begin(), checksums.end()).then( - [&ri, &cf, range, &neighbors, &success] - (std::vector> checksums) { - // If only some of the replicas of this range are alive, - // we set success=false so repair will fail, but we can - // still do our best to repair available replicas. - std::vector live_neighbors; - std::vector live_neighbors_checksum; - for (unsigned i = 0; i < checksums.size(); i++) { - if (checksums[i].failed()) { - rlogger.warn( - "Checksum of range {} on {} failed: {}", - range, - (i ? neighbors[i-1] : - utils::fb_utilities::get_broadcast_address()), - checksums[i].get_exception()); - success = false; - ri.nr_failed_ranges++; - // Do not break out of the loop here, so we can log - // (and discard) all the exceptions. - } else if (i > 0) { - live_neighbors.push_back(neighbors[i - 1]); - live_neighbors_checksum.push_back(checksums[i].get0()); - } - } - if (!checksums[0].available() || live_neighbors.empty() || live_neighbors_checksum.empty()) { - return make_ready_future<>(); - } - // If one of the available checksums is different, repair - // all the neighbors which returned a checksum. - auto checksum0 = checksums[0].get0(); - std::vector live_neighbors_in(live_neighbors); - std::vector live_neighbors_out(live_neighbors); - - std::unordered_map> checksum_map; - for (size_t idx = 0 ; idx < live_neighbors.size(); idx++) { - checksum_map[live_neighbors_checksum[idx]].emplace_back(live_neighbors[idx]); - } - - auto node_reducer = [] (std::vector& live_neighbors_in_or_out, - std::vector& nodes_with_same_checksum, size_t nr_nodes_to_keep) { - // nodes_with_same_checksum contains two types of nodes: - // 1) the nodes we want to remove from live_neighbors_in_or_out. - // 2) the nodes, nr_nodes_to_keep in number, not to remove from - // live_neighbors_in_or_out - auto nr_nodes = nodes_with_same_checksum.size(); - if (nr_nodes <= nr_nodes_to_keep) { - return; - } - - if (nr_nodes_to_keep == 0) { - // All nodes in nodes_with_same_checksum will be removed from live_neighbors_in_or_out - } else if (nr_nodes_to_keep == 1) { - auto node_is_remote = [] (gms::inet_address ip) { return !service::get_local_storage_service().is_local_dc(ip); }; - boost::partition(nodes_with_same_checksum, node_is_remote); - nodes_with_same_checksum.resize(nr_nodes - nr_nodes_to_keep); - } else { - throw std::runtime_error(sprint("nr_nodes_to_keep = {}, but it can only be 1 or 0", nr_nodes_to_keep)); - } - - // Now, nodes_with_same_checksum contains nodes we want to remove, remove it from live_neighbors_in_or_out - auto it = boost::range::remove_if(live_neighbors_in_or_out, [&nodes_with_same_checksum] (const auto& ip) { - return boost::algorithm::any_of_equal(nodes_with_same_checksum, ip); - }); - live_neighbors_in_or_out.erase(it, live_neighbors_in_or_out.end()); - }; - - // Reduce in traffic - for (auto& item : checksum_map) { - auto& sum = item.first; - auto nodes_with_same_checksum = item.second; - // If remote nodes have the same checksum, fetch only from one of them - size_t nr_nodes_to_fetch = 1; - // If remote nodes have zero checksum or have the same - // checksum as local checksum, do not fetch from them at all - if (sum == partition_checksum() || sum == checksum0) { - nr_nodes_to_fetch = 0; - } - // E.g., - // Local Remote1 Remote2 Remote3 - // 5 5 5 5 : IN: 0 - // 5 5 5 0 : IN: 0 - // 5 5 0 0 : IN: 0 - // 5 0 0 0 : IN: 0 - // 0 5 5 5 : IN: 1 - // 0 5 5 0 : IN: 1 - // 0 5 0 0 : IN: 1 - // 0 0 0 0 : IN: 0 - // 3 5 5 3 : IN: 1 - // 3 5 3 3 : IN: 1 - // 3 3 3 3 : IN: 0 - // 3 5 4 3 : IN: 2 - node_reducer(live_neighbors_in, nodes_with_same_checksum, nr_nodes_to_fetch); - } - - // Reduce out traffic - if (live_neighbors_in.empty()) { - for (auto& item : checksum_map) { - auto& sum = item.first; - auto nodes_with_same_checksum = item.second; - // Skip to send to the nodes with the same checksum as local node - // E.g., - // Local Remote1 Remote2 Remote3 - // 5 5 5 5 : IN: 0 OUT: 0 SKIP_OUT: Remote1, Remote2, Remote3 - // 5 5 5 0 : IN: 0 OUT: 1 SKIP_OUT: Remote1, Remote2 - // 5 5 0 0 : IN: 0 OUT: 2 SKIP_OUT: Remote1 - // 5 0 0 0 : IN: 0 OUT: 3 SKIP_OUT: None - // 0 0 0 0 : IN: 0 OUT: 0 SKIP_OUT: Remote1, Remote2, Remote3 - if (sum == checksum0) { - size_t nr_nodes_to_send = 0; - node_reducer(live_neighbors_out, nodes_with_same_checksum, nr_nodes_to_send); - } - } - } else if (live_neighbors_in.size() == 1 && checksum0 == partition_checksum()) { - for (auto& item : checksum_map) { - auto& sum = item.first; - auto nodes_with_same_checksum = item.second; - // Skip to send to the nodes with none zero checksum - // E.g., - // Local Remote1 Remote2 Remote3 - // 0 5 5 5 : IN: 1 OUT: 0 SKIP_OUT: Remote1, Remote2, Remote3 - // 0 5 5 0 : IN: 1 OUT: 1 SKIP_OUT: Remote1, Remote2 - // 0 5 0 0 : IN: 1 OUT: 2 SKIP_OUT: Remote1 - if (sum != checksum0) { - size_t nr_nodes_to_send = 0; - node_reducer(live_neighbors_out, nodes_with_same_checksum, nr_nodes_to_send); - } - } - } - if (!(live_neighbors_in.empty() && live_neighbors_out.empty())) { - rlogger.debug("Found differing range {} on nodes {}, in = {}, out = {}", range, - live_neighbors, live_neighbors_in, live_neighbors_out); - return ri.request_transfer_ranges(cf, range, live_neighbors_in, live_neighbors_out); - } - return make_ready_future<>(); - }).handle_exception([&ri, &success, &cf, range] (std::exception_ptr eptr) { - // Something above (e.g., request_transfer_ranges) failed. We could - // stop the repair immediately, or let it continue with - // other ranges (at the moment, we do the latter). But in - // any case, we need to remember that the repair failed to - // tell the caller. - success = false; - ri.nr_failed_ranges++; - rlogger.warn("Failed sync of range {}: {}", range, eptr); - }).finally([&completion] { - parallelism_semaphore.signal(1); - completion.leave(); // notify do_for_each that we're done - }); - }); - }).finally([&success, &completion] { - return completion.close().then([&success] { - if (!success) { - rlogger.warn("Checksum or sync of partial range failed"); - } - // We probably want the repair contiunes even if some - // ranges fail to do the checksum. We need to set the - // per-repair success flag to false and report after the - // streaming is done. - return make_ready_future<>(); - }); - }); - }); - }); -} - -// Repair a single local range, multiple column families. -// Comparable to RepairSession in Origin -static future<> repair_range(repair_info& ri, const dht::token_range& range) { - auto id = utils::UUID_gen::get_time_UUID(); - return do_with(get_neighbors(ri.db.local(), ri.keyspace, range, ri.data_centers, ri.hosts), [&ri, range, id] (const auto& neighbors) { - rlogger.debug("[repair #{}] new session: will sync {} on range {} for {}.{}", id, neighbors, range, ri.keyspace, ri.cfs); - return do_for_each(ri.cfs.begin(), ri.cfs.end(), [&ri, &neighbors, range] (auto&& cf) { - return repair_cf_range(ri, cf, range, neighbors); - }); - }); -} - -static dht::token_range_vector get_ranges_for_endpoint( - database& db, sstring keyspace, gms::inet_address ep) { - auto& rs = db.find_keyspace(keyspace).get_replication_strategy(); - return rs.get_ranges(ep); -} - -static dht::token_range_vector get_local_ranges( - database& db, sstring keyspace) { - return get_ranges_for_endpoint(db, keyspace, utils::fb_utilities::get_broadcast_address()); -} - -static dht::token_range_vector get_primary_ranges_for_endpoint( - database& db, sstring keyspace, gms::inet_address ep) { - auto& rs = db.find_keyspace(keyspace).get_replication_strategy(); - return rs.get_primary_ranges(ep); -} - -static dht::token_range_vector get_primary_ranges( - database& db, sstring keyspace) { - return get_primary_ranges_for_endpoint(db, keyspace, - utils::fb_utilities::get_broadcast_address()); -} - - -struct repair_options { - // If primary_range is true, we should perform repair only on this node's - // primary ranges. The default of false means perform repair on all ranges - // held by the node. primary_range=true is useful if the user plans to - // repair all nodes. - bool primary_range = false; - // If ranges is not empty, it overrides the repair's default heuristics - // for determining the list of ranges to repair. In particular, "ranges" - // overrides the setting of "primary_range". - dht::token_range_vector ranges; - // If start_token and end_token are set, they define a range which is - // intersected with the ranges actually held by this node to decide what - // to repair. - sstring start_token; - sstring end_token; - // column_families is the list of column families to repair in the given - // keyspace. If this list is empty (the default), all the column families - // in this keyspace are repaired - std::vector column_families; - // hosts specifies the list of known good hosts to repair with this host - // (note that this host is required to also be on this list). For each - // range repaired, only the relevant subset of the hosts (holding a - // replica of this range) is used. - std::vector hosts; - // data_centers is used to restrict the repair to the local data center. - // The node starting the repair must be in the data center; Issuing a - // repair to a data center other than the named one returns an error. - std::vector data_centers; - - repair_options(std::unordered_map options) { - bool_opt(primary_range, options, PRIMARY_RANGE_KEY); - ranges_opt(ranges, options, RANGES_KEY); - list_opt(column_families, options, COLUMNFAMILIES_KEY); - list_opt(hosts, options, HOSTS_KEY); - list_opt(data_centers, options, DATACENTERS_KEY); - // We currently do not support incremental repair. We could probably - // ignore this option as it is just an optimization, but for now, - // let's make it an error. - bool incremental = false; - bool_opt(incremental, options, INCREMENTAL_KEY); - if (incremental) { - throw std::runtime_error("unsupported incremental repair"); - } - // We do not currently support the distinction between "parallel" and - // "sequential" repair, and operate the same for both. - // We don't currently support "dc parallel" parallelism. - int parallelism = PARALLEL; - int_opt(parallelism, options, PARALLELISM_KEY); - if (parallelism != PARALLEL && parallelism != SEQUENTIAL) { - throw std::runtime_error(sprint( - "unsupported repair parallelism: %d", parallelism)); - } - string_opt(start_token, options, START_TOKEN); - string_opt(end_token, options, END_TOKEN); - - bool trace = false; - bool_opt(trace, options, TRACE_KEY); - if (trace) { - throw std::runtime_error("unsupported trace"); - } - // Consume, ignore. - int job_threads; - int_opt(job_threads, options, JOB_THREADS_KEY); - - // The parsing code above removed from the map options we have parsed. - // If anything is left there in the end, it's an unsupported option. - if (!options.empty()) { - throw std::runtime_error(sprint("unsupported repair options: %s", - options)); - } - } - - static constexpr const char* PRIMARY_RANGE_KEY = "primaryRange"; - static constexpr const char* PARALLELISM_KEY = "parallelism"; - static constexpr const char* INCREMENTAL_KEY = "incremental"; - static constexpr const char* JOB_THREADS_KEY = "jobThreads"; - static constexpr const char* RANGES_KEY = "ranges"; - static constexpr const char* COLUMNFAMILIES_KEY = "columnFamilies"; - static constexpr const char* DATACENTERS_KEY = "dataCenters"; - static constexpr const char* HOSTS_KEY = "hosts"; - static constexpr const char* TRACE_KEY = "trace"; - static constexpr const char* START_TOKEN = "startToken"; - static constexpr const char* END_TOKEN = "endToken"; - - // Settings of "parallelism" option. Numbers must match Cassandra's - // RepairParallelism enum, which is used by the caller. - enum repair_parallelism { - SEQUENTIAL=0, PARALLEL=1, DATACENTER_AWARE=2 - }; - -private: - static void bool_opt(bool& var, - std::unordered_map& options, - const sstring& key) { - auto it = options.find(key); - if (it != options.end()) { - // Same parsing as Boolean.parseBoolean does: - if (boost::algorithm::iequals(it->second, "true")) { - var = true; - } else { - var = false; - } - options.erase(it); - } - } - - static void int_opt(int& var, - std::unordered_map& options, - const sstring& key) { - auto it = options.find(key); - if (it != options.end()) { - errno = 0; - var = strtol(it->second.c_str(), nullptr, 10); - if (errno) { - throw(std::runtime_error(sprint("cannot parse integer: '%s'", it->second))); - } - options.erase(it); - } - } - - static void string_opt(sstring& var, - std::unordered_map& options, - const sstring& key) { - auto it = options.find(key); - if (it != options.end()) { - var = it->second; - options.erase(it); - } - } - - // A range is expressed as start_token:end token and multiple ranges can - // be given as comma separated ranges(e.g. aaa:bbb,ccc:ddd). - static void ranges_opt(dht::token_range_vector& var, - std::unordered_map& options, - const sstring& key) { - auto it = options.find(key); - if (it == options.end()) { - return; - } - std::vector range_strings; - boost::split(range_strings, it->second, boost::algorithm::is_any_of(",")); - for (auto range : range_strings) { - std::vector token_strings; - boost::split(token_strings, range, boost::algorithm::is_any_of(":")); - if (token_strings.size() != 2) { - throw(std::runtime_error("range must have two components " - "separated by ':', got '" + range + "'")); - } - auto tok_start = dht::global_partitioner().from_sstring(token_strings[0]); - auto tok_end = dht::global_partitioner().from_sstring(token_strings[1]); - auto rng = wrapping_range( - ::range::bound(tok_start, false), - ::range::bound(tok_end, true)); - compat::unwrap_into(std::move(rng), dht::token_comparator(), [&] (dht::token_range&& x) { - var.push_back(std::move(x)); - }); - } - options.erase(it); - } - - // A comma-separate list of strings - static void list_opt(std::vector& var, - std::unordered_map& options, - const sstring& key) { - auto it = options.find(key); - if (it == options.end()) { - return; - } - std::vector range_strings; - boost::split(var, it->second, boost::algorithm::is_any_of(",")); - options.erase(it); - } -}; - -// repair_ranges repairs a list of token ranges, each assumed to be a token -// range for which this node holds a replica, and, importantly, each range -// is assumed to be a indivisible in the sense that all the tokens in has the -// same nodes as replicas. -static future<> repair_ranges(repair_info ri) { - return do_with(std::move(ri), [] (auto& ri) { - #if 0 - // repair all the ranges in parallel - return parallel_for_each(ri.ranges, [&ri] (auto&& range) { - #else - // repair all the ranges in sequence - return do_for_each(ri.ranges, [&ri] (auto&& range) { - #endif - ri.ranges_index++; - rlogger.info("Repair {} out of {} ranges, id={}, shard={}, keyspace={}, table={}, range={}", - ri.ranges_index, ri.ranges.size(), ri.id, ri.shard, ri.keyspace, ri.cfs, range); - return do_with(dht::selective_token_range_sharder(range, ri.shard), [&ri] (auto& sharder) { - return repeat([&ri, &sharder] () { - check_in_shutdown(); - auto range_shard = sharder.next(); - if (range_shard) { - return repair_range(ri, *range_shard).then([] { - return make_ready_future(stop_iteration::no); - }); - } else { - return make_ready_future(stop_iteration::yes); - } - }); - }); - }).then([&ri] { - // Do streaming for the remaining ranges we do not stream in - // repair_cf_range - return ri.do_streaming(); - }).then([&ri] { - ri.check_failed_ranges(); - return make_ready_future<>(); - }).handle_exception([&ri] (std::exception_ptr eptr) { - rlogger.info("repair {} failed - {}", ri.id, eptr); - return make_exception_future<>(std::move(eptr)); - }); - }); -} - -// repair_start() can run on any cpu; It runs on cpu0 the function -// do_repair_start(). The benefit of always running that function on the same -// CPU is that it allows us to keep some state (like a list of ongoing -// repairs). It is fine to always do this on one CPU, because the function -// itself does very little (mainly tell other nodes and CPUs what to do). -static int do_repair_start(seastar::sharded& db, sstring keyspace, - std::unordered_map options_map) { - check_in_shutdown(); - - repair_options options(options_map); - - // Note: Cassandra can, in some cases, decide immediately that there is - // nothing to repair, and return 0. "nodetool repair" prints in this case - // that "Nothing to repair for keyspace '...'". We don't have such a case - // yet. Real ids returned by next_repair_command() will be >= 1. - int id = repair_tracker.next_repair_command(); - rlogger.info("starting user-requested repair for keyspace {}, repair id {}, options {}", keyspace, id, options_map); - repair_tracker.start(id); - - // If the "ranges" option is not explicitly specified, we repair all the - // local ranges (the token ranges for which this node holds a replica of). - // Each of these ranges may have a different set of replicas, so the - // repair of each range is performed separately with repair_range(). - dht::token_range_vector ranges; - if (options.ranges.size()) { - ranges = options.ranges; - } else if (options.primary_range) { - rlogger.info("primary-range repair"); - // when "primary_range" option is on, neither data_centers nor hosts - // may be set, except data_centers may contain only local DC (-local) -#if 0 - if (options.data_centers.size() == 1 && - options.data_centers[0] == DatabaseDescriptor.getLocalDataCenter()) { - ranges = get_primary_ranges_within_dc(db.local(), keyspace); - } else -#endif -#if 0 - if (options.data_centers.size() > 0 || options.hosts.size() > 0) { - throw std::runtime_error("You need to run primary range repair on all nodes in the cluster."); - } else { -#endif - ranges = get_primary_ranges(db.local(), keyspace); -#if 0 - } -#endif - } else { - ranges = get_local_ranges(db.local(), keyspace); - } - - if (!options.start_token.empty() || !options.end_token.empty()) { - // Intersect the list of local ranges with the given token range, - // dropping ranges with no intersection. - // We don't have a range::intersect() method, but we can use - // range::subtract() and subtract the complement range. - std::experimental::optional<::range::bound> tok_start; - std::experimental::optional<::range::bound> tok_end; - if (!options.start_token.empty()) { - tok_start = ::range::bound( - dht::global_partitioner().from_sstring(options.start_token), - true); - } - if (!options.end_token.empty()) { - tok_end = ::range::bound( - dht::global_partitioner().from_sstring(options.end_token), - false); - } - dht::token_range given_range_complement(tok_end, tok_start); - dht::token_range_vector intersections; - for (const auto& range : ranges) { - auto rs = range.subtract(given_range_complement, - dht::token_comparator()); - intersections.insert(intersections.end(), rs.begin(), rs.end()); - } - ranges = std::move(intersections); - } - - std::vector cfs; - if (options.column_families.size()) { - cfs = options.column_families; - for (auto& cf : cfs) { - try { - db.local().find_column_family(keyspace, cf); - } catch(...) { - throw std::runtime_error(sprint( - "No column family '%s' in keyspace '%s'", cf, keyspace)); - } - } - } else { - cfs = list_column_families(db.local(), keyspace); - } - - - std::vector> repair_results; - repair_results.reserve(smp::count); - - for (auto shard : boost::irange(unsigned(0), smp::count)) { - auto f = db.invoke_on(shard, [keyspace, cfs, id, ranges, - data_centers = options.data_centers, hosts = options.hosts] (database& localdb) mutable { - return repair_ranges(repair_info(service::get_local_storage_service().db(), - std::move(keyspace), std::move(ranges), std::move(cfs), - id, std::move(data_centers), std::move(hosts))); - }); - repair_results.push_back(std::move(f)); - } - - when_all(repair_results.begin(), repair_results.end()).then([id] (std::vector> results) { - if (std::any_of(results.begin(), results.end(), [] (auto&& f) { return f.failed(); })) { - repair_tracker.done(id, false); - rlogger.info("repair {} failed", id); - } else { - repair_tracker.done(id, true); - rlogger.info("repair {} completed successfully", id); - } - return make_ready_future<>(); - }).handle_exception([id] (std::exception_ptr eptr) { - repair_tracker.done(id, false); - rlogger.info("repair {} failed: {}", id, eptr); - }); - - return id; -} - -future repair_start(seastar::sharded& db, sstring keyspace, - std::unordered_map options) { - return db.invoke_on(0, [&db, keyspace = std::move(keyspace), options = std::move(options)] (database& localdb) { - return do_repair_start(db, std::move(keyspace), std::move(options)); - }); -} - -future repair_get_status(seastar::sharded& db, int id) { - return db.invoke_on(0, [id] (database& localdb) { - return repair_tracker.get(id); - }); -} - -future<> repair_shutdown(seastar::sharded& db) { - rlogger.info("Starting shutdown of repair"); - return db.invoke_on(0, [] (database& localdb) { - return repair_tracker.shutdown().then([] { - rlogger.info("Completed shutdown of repair"); - }); - }); -} diff --git a/scylla/repair/repair.hh b/scylla/repair/repair.hh deleted file mode 100644 index 7562acd..0000000 --- a/scylla/repair/repair.hh +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include -#include - -#include -#include -#include - -#include "database.hh" -#include "utils/UUID.hh" - - -class repair_exception : public std::exception { -private: - sstring _what; -public: - repair_exception(sstring msg) : _what(std::move(msg)) { } - virtual const char* what() const noexcept override { return _what.c_str(); } -}; - -class repair_stopped_exception : public repair_exception { -public: - repair_stopped_exception() : repair_exception("Repair stopped") { } -}; - -// NOTE: repair_start() can be run on any node, but starts a node-global -// operation. -// repair_start() starts the requested repair on this node. It returns an -// integer id which can be used to query the repair's status with -// repair_get_status(). The returned future becomes available quickly, -// as soon as repair_get_status() can be used - it doesn't wait for the -// repair to complete. -future repair_start(seastar::sharded& db, sstring keyspace, - std::unordered_map options); - -// TODO: Have repair_progress contains a percentage progress estimator -// instead of just "RUNNING". -enum class repair_status { RUNNING, SUCCESSFUL, FAILED }; - -// repair_get_status() returns a future because it needs to run code on a -// different CPU (cpu 0) and that might be a deferring operation. -future repair_get_status(seastar::sharded& db, int id); - -// repair_shutdown() stops all ongoing repairs started on this node (and -// prevents any further repairs from being started). It returns a future -// saying when all repairs have stopped, and attempts to stop them as -// quickly as possible (we do not wait for repairs to finish but rather -// stop them abruptly). -future<> repair_shutdown(seastar::sharded& db); - -enum class repair_checksum { - legacy = 0, - streamed = 1, -}; - -// The class partition_checksum calculates a 256-bit cryptographically-secure -// checksum of a set of partitions fed to it. The checksum of a partition set -// is calculated by calculating a strong hash function (SHA-256) of each -// individual partition, and then XORing the individual hashes together. -// XOR is good enough for merging strong checksums, and allows us to -// independently calculate the checksums of different subsets of the original -// set, and then combine the results into one checksum with the add() method. -// The hash of an individual partition uses both its key and value. -class partition_checksum { -private: - std::array _digest; // 256 bits -private: - static future compute_legacy(streamed_mutation m); - static future compute_streamed(streamed_mutation m); -public: - constexpr partition_checksum() : _digest{} { } - explicit partition_checksum(std::array digest) : _digest(std::move(digest)) { } - static future compute(streamed_mutation m, repair_checksum rt); - void add(const partition_checksum& other); - bool operator==(const partition_checksum& other) const; - bool operator!=(const partition_checksum& other) const { return !operator==(other); } - friend std::ostream& operator<<(std::ostream&, const partition_checksum&); - const std::array& digest() const; -}; - -// Calculate the checksum of the data held on all shards of a column family, -// in the given token range. -// All parameters to this function are constant references, and the caller -// must ensure they live as long as the future returned by this function is -// not resolved. -future checksum_range(seastar::sharded &db, - const sstring& keyspace, const sstring& cf, - const ::dht::token_range& range, repair_checksum rt); - -namespace std { -template<> -struct hash { - size_t operator()(partition_checksum sum) const { - size_t h = 0; - std::copy_n(sum.digest().begin(), std::min(sizeof(size_t), sizeof(sum.digest())), reinterpret_cast(&h)); - return h; - } -}; -} diff --git a/scylla/reversibly_mergeable.hh b/scylla/reversibly_mergeable.hh deleted file mode 100644 index 08a8694..0000000 --- a/scylla/reversibly_mergeable.hh +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (C) 2016 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "utils/allocation_strategy.hh" -#include - -// -// ~~ Definitions ~~ -// -// Mergeable type is a type which has an associated "apply" binary operation (T x T -> T) -// which forms a commutative semigroup with instances of that type. -// -// ReversiblyMergeable type is a Mergeable type which has two binary operations associated, -// "apply_reversibly" and "revert", both working on objects of that type (T x T -> T x T) -// with the following properties: -// -// apply_reversibly(x, y) = (x', y') -// revert(x', y') = (x'', y'') -// -// x' = apply(x, y) -// x'' = x -// apply(x'', y'') = apply(x, y) -// -// Note that it is not guaranteed that y'' = y and the state of y' is unspecified. -// -// ~~ API ~~ -// -// "apply_reversibly" and "revert" are usually implemented as instance methods or functions -// mutating both arguments to store the result of the operation in them. -// -// "revert" is not allowed to throw. If "apply_reversibly" throws the objects on which it operates -// are left in valid states, with guarantees the same as if a successful apply_reversibly() was -// followed by revert(). -// - - -template -struct default_reversible_applier { - void operator()(T& dst, T& src) const { - dst.apply_reversibly(src); - } -}; - -template -struct default_reverter { - void operator()(T& dst, T& src) const noexcept { - dst.revert(src); - } -}; diff --git a/scylla/row_cache.cc b/scylla/row_cache.cc deleted file mode 100644 index 525687a..0000000 --- a/scylla/row_cache.cc +++ /dev/null @@ -1,1024 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "row_cache.hh" -#include "core/memory.hh" -#include "core/do_with.hh" -#include "core/future-util.hh" -#include -#include -#include "memtable.hh" -#include "partition_snapshot_reader.hh" -#include -#include "utils/move.hh" -#include -#include -#include "stdx.hh" -#include "cache_streamed_mutation.hh" -#include "read_context.hh" -#include "schema_upgrader.hh" - -using namespace std::chrono_literals; -using namespace cache; - - -static logging::logger clogger("cache"); - -thread_local seastar::thread_scheduling_group row_cache::_update_thread_scheduling_group(1ms, 0.2); - -mutation_reader -row_cache::create_underlying_reader(read_context& ctx, mutation_source& src, const dht::partition_range& pr) { - ctx.on_underlying_created(); - return src(_schema, pr, ctx.slice(), ctx.pc(), ctx.trace_state(), streamed_mutation::forwarding::yes); -} - -cache_tracker& global_cache_tracker() { - static thread_local cache_tracker instance; - return instance; -} - -cache_tracker::cache_tracker() { - setup_metrics(); - - _region.make_evictable([this] { - return with_allocator(_region.allocator(), [this] { - // Removing a partition may require reading large keys when we rebalance - // the rbtree, so linearize anything we read - return with_linearized_managed_bytes([&] { - try { - auto evict_last = [this](lru_type& lru) { - cache_entry& ce = lru.back(); - auto it = row_cache::partitions_type::s_iterator_to(ce); - clear_continuity(*std::next(it)); - lru.pop_back_and_dispose(current_deleter()); - }; - if (_lru.empty()) { - return memory::reclaiming_result::reclaimed_nothing; - } - evict_last(_lru); - --_stats.partitions; - ++_stats.partition_evictions; - ++_stats.modification_count; - return memory::reclaiming_result::reclaimed_something; - } catch (std::bad_alloc&) { - // Bad luck, linearization during partition removal caused us to - // fail. Drop the entire cache so we can make forward progress. - clear(); - return memory::reclaiming_result::reclaimed_something; - } - }); - }); - }); -} - -cache_tracker::~cache_tracker() { - clear(); -} - -void -cache_tracker::setup_metrics() { - namespace sm = seastar::metrics; - _metrics.add_group("cache", { - sm::make_gauge("bytes_used", sm::description("current bytes used by the cache out of the total size of memory"), [this] { return _region.occupancy().used_space(); }), - sm::make_gauge("bytes_total", sm::description("total size of memory for the cache"), [this] { return _region.occupancy().total_space(); }), - sm::make_derive("partition_hits", sm::description("number of partitions needed by reads and found in cache"), _stats.partition_hits), - sm::make_derive("partition_misses", sm::description("number of partitions needed by reads and missing in cache"), _stats.partition_misses), - sm::make_derive("partition_insertions", sm::description("total number of partitions added to cache"), _stats.partition_insertions), - sm::make_derive("row_hits", sm::description("total number of rows needed by reads and found in cache"), _stats.row_hits), - sm::make_derive("row_misses", sm::description("total number of rows needed by reads and missing in cache"), _stats.row_misses), - sm::make_derive("row_insertions", sm::description("total number of rows added to cache"), _stats.row_insertions), - sm::make_derive("concurrent_misses_same_key", sm::description("total number of operation with misses same key"), _stats.concurrent_misses_same_key), - sm::make_derive("partition_merges", sm::description("total number of partitions merged"), _stats.partition_merges), - sm::make_derive("partition_evictions", sm::description("total number of evicted partitions"), _stats.partition_evictions), - sm::make_derive("partition_removals", sm::description("total number of invalidated partitions"), _stats.partition_removals), - sm::make_derive("mispopulations", sm::description("number of entries not inserted by reads"), _stats.mispopulations), - sm::make_gauge("partitions", sm::description("total number of cached partitions"), _stats.partitions), - sm::make_derive("reads", sm::description("number of started reads"), _stats.reads), - sm::make_derive("reads_with_misses", sm::description("number of reads which had to read from sstables"), _stats.reads_with_misses), - sm::make_gauge("active_reads", sm::description("number of currently active reads"), [this] { return _stats.active_reads(); }), - sm::make_derive("sstable_reader_recreations", sm::description("number of times sstable reader was recreated due to memtable flush"), _stats.underlying_recreations), - sm::make_derive("sstable_partition_skips", sm::description("number of times sstable reader was fast forwarded across partitions"), _stats.underlying_partition_skips), - sm::make_derive("sstable_row_skips", sm::description("number of times sstable reader was fast forwarded within a partition"), _stats.underlying_row_skips), - }); -} - -void cache_tracker::clear() { - with_allocator(_region.allocator(), [this] { - auto clear = [this] (lru_type& lru) { - while (!lru.empty()) { - cache_entry& ce = lru.back(); - auto it = row_cache::partitions_type::s_iterator_to(ce); - while (it->is_evictable()) { - cache_entry& to_remove = *it; - ++it; - to_remove._lru_link.unlink(); - current_deleter()(&to_remove); - } - clear_continuity(*it); - } - }; - clear(_lru); - }); - _stats.partition_removals += _stats.partitions; - _stats.partitions = 0; - ++_stats.modification_count; -} - -void cache_tracker::touch(cache_entry& e) { - auto move_to_front = [this] (lru_type& lru, cache_entry& e) { - lru.erase(lru.iterator_to(e)); - lru.push_front(e); - }; - move_to_front(_lru, e); -} - -void cache_tracker::insert(cache_entry& entry) { - ++_stats.partition_insertions; - ++_stats.partitions; - ++_stats.modification_count; - _lru.push_front(entry); -} - -void cache_tracker::on_erase() { - --_stats.partitions; - ++_stats.partition_removals; - ++_stats.modification_count; -} - -void cache_tracker::on_merge() { - ++_stats.partition_merges; -} - -void cache_tracker::on_partition_hit() { - ++_stats.partition_hits; -} - -void cache_tracker::on_partition_miss() { - ++_stats.partition_misses; -} - -void cache_tracker::on_row_hit() { - ++_stats.row_hits; -} - -void cache_tracker::on_row_miss() { - ++_stats.row_misses; -} - -void cache_tracker::on_mispopulate() { - ++_stats.mispopulations; -} - -void cache_tracker::on_miss_already_populated() { - ++_stats.concurrent_misses_same_key; -} - -allocation_strategy& cache_tracker::allocator() { - return _region.allocator(); -} - -logalloc::region& cache_tracker::region() { - return _region; -} - -const logalloc::region& cache_tracker::region() const { - return _region; -} - -// Stable cursor over partition entries from given range. -// -// Must be accessed with reclaim lock held on the cache region. -// The position of the cursor is always valid, but cache entry reference -// is not always valid. It remains valid as long as the iterators -// into _cache._partitions remain valid. Cache entry reference can be -// brought back to validity by calling refresh(). -// -class partition_range_cursor final { - std::reference_wrapper _cache; - row_cache::partitions_type::iterator _it; - row_cache::partitions_type::iterator _end; - dht::ring_position_view _start_pos; - dht::ring_position_view _end_pos; - stdx::optional _last; - uint64_t _last_reclaim_count; - size_t _last_modification_count; -private: - void set_position(cache_entry& e) { - // FIXME: make ring_position_view convertible to ring_position, so we can use e.position() - if (e.is_dummy_entry()) { - _last = {}; - _start_pos = dht::ring_position_view::max(); - } else { - _last = e.key(); - _start_pos = dht::ring_position_view(*_last); - } - } -public: - // Creates a cursor positioned at the lower bound of the range. - // The cache entry reference is not valid. - // The range reference must remain live as long as this instance is used. - partition_range_cursor(row_cache& cache, const dht::partition_range& range) - : _cache(cache) - , _start_pos(dht::ring_position_view::for_range_start(range)) - , _end_pos(dht::ring_position_view::for_range_end(range)) - , _last_reclaim_count(std::numeric_limits::max()) - , _last_modification_count(std::numeric_limits::max()) - { } - - // Ensures that cache entry reference is valid. - // The cursor will point at the first entry with position >= the current position. - // Returns true if and only if the position of the cursor changed. - // Strong exception guarantees. - bool refresh() { - auto reclaim_count = _cache.get().get_cache_tracker().region().reclaim_counter(); - auto modification_count = _cache.get().get_cache_tracker().modification_count(); - - if (reclaim_count == _last_reclaim_count && modification_count == _last_modification_count) { - return true; - } - - auto cmp = cache_entry::compare(_cache.get()._schema); - if (cmp(_end_pos, _start_pos)) { // next() may have moved _start_pos past the _end_pos. - _end_pos = _start_pos; - } - _end = _cache.get()._partitions.lower_bound(_end_pos, cmp); - _it = _cache.get()._partitions.lower_bound(_start_pos, cmp); - auto same = !cmp(_start_pos, _it->position()); - set_position(*_it); - _last_reclaim_count = reclaim_count; - _last_modification_count = modification_count; - return same; - } - - // Positions the cursor at the next entry. - // May advance past the requested range. Use in_range() after the call to determine that. - // Call only when in_range() and cache entry reference is valid. - // Strong exception guarantees. - void next() { - auto next = std::next(_it); - set_position(*next); - _it = std::move(next); - } - - // Valid only after refresh() and before _cache._partitions iterators are invalidated. - // Points inside the requested range if in_range(). - cache_entry& entry() { - return *_it; - } - - // Call only when cache entry reference is valid. - bool in_range() { - return _it != _end; - } - - // Returns current position of the cursor. - // Result valid as long as this instance is valid and not advanced. - dht::ring_position_view position() const { - return _start_pos; - } -}; - -future<> read_context::create_sm() { - if (_range_query) { - // FIXME: Singular-range mutation readers don't support fast_forward_to(), so need to use a wide range - // here in case the same reader will need to be fast forwarded later. - _sm_range = dht::partition_range({dht::ring_position(*_key)}, {dht::ring_position(*_key)}); - } else { - _sm_range = dht::partition_range::make_singular({dht::ring_position(*_key)}); - } - return _underlying.fast_forward_to(std::move(_sm_range), *_underlying_snapshot, _phase).then([this] { - return _underlying.read_next_same_phase().then([this] (auto&& smo) { - if (!smo) { - _sm = make_empty_streamed_mutation(_cache.schema(), *_key, streamed_mutation::forwarding::yes); - } else { - _sm = std::move(*smo); - } - }); - }); -} - -static streamed_mutation read_directly_from_underlying(streamed_mutation&& sm, read_context& reader) { - if (reader.schema()->version() != sm.schema()->version()) { - sm = transform(std::move(sm), schema_upgrader(reader.schema())); - } - if (reader.fwd() == streamed_mutation::forwarding::no) { - sm = streamed_mutation_from_forwarding_streamed_mutation(std::move(sm)); - } - return std::move(sm); -} - -// Reader which populates the cache using data from the delegate. -class single_partition_populating_reader final : public mutation_reader::impl { - row_cache& _cache; - mutation_reader _delegate; - lw_shared_ptr _read_context; -public: - single_partition_populating_reader(row_cache& cache, - lw_shared_ptr context) - : _cache(cache) - , _read_context(std::move(context)) - { } - - virtual future operator()() override { - if (!_read_context) { - return make_ready_future(streamed_mutation_opt()); - } - auto src_and_phase = _cache.snapshot_of(_read_context->range().start()->value()); - auto phase = src_and_phase.phase; - _delegate = _cache.create_underlying_reader(*_read_context, src_and_phase.snapshot, _read_context->range()); - return _delegate().then([this, phase] (auto sm) mutable -> streamed_mutation_opt { - auto ctx = std::move(_read_context); - if (!sm) { - return std::move(sm); - } - if (phase == _cache.phase_of(ctx->range().start()->value())) { - return _cache._read_section(_cache._tracker.region(), [&] { - cache_entry& e = _cache.find_or_create(sm->decorated_key(), sm->partition_tombstone(), phase); - return e.read(_cache, *ctx, std::move(*sm), phase); - }); - } else { - _cache._tracker.on_mispopulate(); - return read_directly_from_underlying(std::move(*sm), *ctx); - } - }); - } -}; - -void cache_tracker::clear_continuity(cache_entry& ce) { - ce.set_continuous(false); -} - -void row_cache::on_partition_hit() { - _tracker.on_partition_hit(); -} - -void row_cache::on_partition_miss() { - _tracker.on_partition_miss(); -} - -void row_cache::on_row_hit() { - _stats.hits.mark(); - _tracker.on_row_hit(); -} - -void row_cache::on_mispopulate() { - _tracker.on_mispopulate(); -} - -void row_cache::on_row_miss() { - _stats.misses.mark(); - _tracker.on_row_miss(); -} - -void row_cache::on_row_insert() { - ++_tracker._stats.row_insertions; -} - -class range_populating_reader { - row_cache& _cache; - autoupdating_underlying_reader& _reader; - stdx::optional _last_key; - read_context& _read_context; -private: - bool can_set_continuity() const { - return _last_key && _reader.creation_phase() == _cache.phase_of(_reader.population_range_start()); - } - void handle_end_of_stream() { - if (!can_set_continuity()) { - _cache.on_mispopulate(); - return; - } - if (!_reader.range().end() || !_reader.range().end()->is_inclusive()) { - cache_entry::compare cmp(_cache._schema); - auto it = _reader.range().end() ? _cache._partitions.find(_reader.range().end()->value(), cmp) - : std::prev(_cache._partitions.end()); - if (it != _cache._partitions.end()) { - if (it == _cache._partitions.begin()) { - if (!_last_key->_key) { - it->set_continuous(true); - } else { - _cache.on_mispopulate(); - } - } else { - auto prev = std::prev(it); - if (prev->key().equal(*_cache._schema, *_last_key->_key)) { - it->set_continuous(true); - } else { - _cache.on_mispopulate(); - } - } - } - } - } -public: - range_populating_reader(row_cache& cache, read_context& ctx) - : _cache(cache) - , _reader(ctx.underlying()) - , _read_context(ctx) - {} - - future operator()() { - return _reader().then([this] (streamed_mutation_opt smopt) mutable -> streamed_mutation_opt { - { - if (!smopt) { - handle_end_of_stream(); - return std::move(smopt); - } - _cache.on_partition_miss(); - if (_reader.creation_phase() == _cache.phase_of(smopt->decorated_key())) { - return _cache._read_section(_cache._tracker.region(), [&] { - cache_entry& e = _cache.find_or_create(smopt->decorated_key(), smopt->partition_tombstone(), _reader.creation_phase(), - can_set_continuity() ? &*_last_key : nullptr); - _last_key = row_cache::previous_entry_pointer(smopt->decorated_key()); - return e.read(_cache, _read_context, std::move(*smopt), _reader.creation_phase()); - }); - } else { - _cache._tracker.on_mispopulate(); - _last_key = row_cache::previous_entry_pointer(smopt->decorated_key()); - return read_directly_from_underlying(std::move(*smopt), _read_context); - } - } - }); - } - - future<> fast_forward_to(dht::partition_range&& pr) { - if (!pr.start()) { - _last_key = row_cache::previous_entry_pointer(); - } else if (!pr.start()->is_inclusive() && pr.start()->value().has_key()) { - _last_key = row_cache::previous_entry_pointer(pr.start()->value().as_decorated_key()); - } else { - // Inclusive start bound, cannot set continuity flag. - _last_key = {}; - } - - return _reader.fast_forward_to(std::move(pr)); - } -}; - -class scanning_and_populating_reader final : public mutation_reader::impl { - const dht::partition_range* _pr; - row_cache& _cache; - lw_shared_ptr _read_context; - partition_range_cursor _primary; - range_populating_reader _secondary_reader; - bool _secondary_in_progress = false; - bool _advance_primary = false; - stdx::optional _lower_bound; - dht::partition_range _secondary_range; -private: - streamed_mutation read_from_entry(cache_entry& ce) { - _cache.upgrade_entry(ce); - _cache._tracker.touch(ce); - _cache.on_partition_hit(); - return ce.read(_cache, *_read_context); - } - - streamed_mutation_opt do_read_from_primary() { - return _cache._read_section(_cache._tracker.region(), [this] { - return with_linearized_managed_bytes([&] () -> streamed_mutation_opt { - auto not_moved = _primary.refresh(); - - if (_advance_primary && not_moved) { - _primary.next(); - not_moved = false; - } - _advance_primary = false; - - if (not_moved || _primary.entry().continuous()) { - if (!_primary.in_range()) { - return stdx::nullopt; - } - cache_entry& e = _primary.entry(); - auto sm = read_from_entry(e); - _lower_bound = dht::partition_range::bound{e.key(), false}; - // Delay the call to next() so that we don't see stale continuity on next invocation. - _advance_primary = true; - return streamed_mutation_opt(std::move(sm)); - } else { - if (_primary.in_range()) { - cache_entry& e = _primary.entry(); - _secondary_range = dht::partition_range(_lower_bound ? std::move(_lower_bound) : _pr->start(), - dht::partition_range::bound{e.key(), false}); - _lower_bound = dht::partition_range::bound{e.key(), true}; - _secondary_in_progress = true; - return stdx::nullopt; - } else { - dht::ring_position_comparator cmp(*_read_context->schema()); - auto range = _pr->trim_front(std::move(_lower_bound), cmp); - if (!range) { - return stdx::nullopt; - } - _lower_bound = dht::partition_range::bound{dht::ring_position::max()}; - _secondary_range = std::move(*range); - _secondary_in_progress = true; - return stdx::nullopt; - } - } - }); - }); - } - - future read_from_primary() { - auto smo = do_read_from_primary(); - if (!_secondary_in_progress) { - return make_ready_future(std::move(smo)); - } - return _secondary_reader.fast_forward_to(std::move(_secondary_range)).then([this] { - return read_from_secondary(); - }); - } - - future read_from_secondary() { - return _secondary_reader().then([this] (streamed_mutation_opt smopt) { - if (smopt) { - return make_ready_future(std::move(smopt)); - } else { - _secondary_in_progress = false; - return read_from_primary(); - } - }); - } -public: - scanning_and_populating_reader(row_cache& cache, - const dht::partition_range& range, - lw_shared_ptr context) - : _pr(&range) - , _cache(cache) - , _read_context(std::move(context)) - , _primary(cache, range) - , _secondary_reader(cache, *_read_context) - { } - - future operator()() { - if (_secondary_in_progress) { - return read_from_secondary(); - } else { - return read_from_primary(); - } - } - - future<> fast_forward_to(const dht::partition_range& pr) { - _secondary_in_progress = false; - _advance_primary = false; - _pr = ≺ - _primary = partition_range_cursor{_cache, pr}; - _lower_bound = {}; - return make_ready_future<>(); - } -}; - -mutation_reader -row_cache::make_scanning_reader(const dht::partition_range& range, lw_shared_ptr context) { - return make_mutation_reader(*this, range, std::move(context)); -} - -mutation_reader -row_cache::make_reader(schema_ptr s, - const dht::partition_range& range, - const query::partition_slice& slice, - const io_priority_class& pc, - tracing::trace_state_ptr trace_state, - streamed_mutation::forwarding fwd, - mutation_reader::forwarding fwd_mr) -{ - auto ctx = make_lw_shared(*this, std::move(s), range, slice, pc, trace_state, fwd, fwd_mr); - - if (!ctx->is_range_query()) { - return _read_section(_tracker.region(), [&] { - return with_linearized_managed_bytes([&] { - auto i = _partitions.find(ctx->range().start()->value(), cache_entry::compare(_schema)); - if (i != _partitions.end()) { - cache_entry& e = *i; - _tracker.touch(e); - upgrade_entry(e); - on_partition_hit(); - return make_reader_returning(e.read(*this, *ctx)); - } else { - on_partition_miss(); - return make_mutation_reader(*this, std::move(ctx)); - } - }); - }); - } - - return make_scanning_reader(range, std::move(ctx)); -} - -row_cache::~row_cache() { - with_allocator(_tracker.allocator(), [this] { - _partitions.clear_and_dispose([this, deleter = current_deleter()] (auto&& p) mutable { - if (!p->is_dummy_entry()) { - _tracker.on_erase(); - } - deleter(p); - }); - }); -} - -void row_cache::clear_now() noexcept { - with_allocator(_tracker.allocator(), [this] { - auto it = _partitions.erase_and_dispose(_partitions.begin(), partitions_end(), [this, deleter = current_deleter()] (auto&& p) mutable { - _tracker.on_erase(); - deleter(p); - }); - _tracker.clear_continuity(*it); - }); -} - -template -//requires requires(CreateEntry create, VisitEntry visit, row_cache::partitions_type::iterator it) { -// { create(it) } -> row_cache::partitions_type::iterator; -// { visit(it) } -> void; -// } -cache_entry& row_cache::do_find_or_create_entry(const dht::decorated_key& key, - const previous_entry_pointer* previous, CreateEntry&& create_entry, VisitEntry&& visit_entry) -{ - return with_allocator(_tracker.allocator(), [&] () -> cache_entry& { - return with_linearized_managed_bytes([&] () -> cache_entry& { - auto i = _partitions.lower_bound(key, cache_entry::compare(_schema)); - if (i == _partitions.end() || !i->key().equal(*_schema, key)) { - i = create_entry(i); - } else { - visit_entry(i); - } - - if (!previous) { - return *i; - } - - if ((!previous->_key && i == _partitions.begin()) - || (previous->_key && i != _partitions.begin() - && std::prev(i)->key().equal(*_schema, *previous->_key))) { - i->set_continuous(true); - } else { - on_mispopulate(); - } - - return *i; - }); - }); -} - -cache_entry& row_cache::find_or_create(const dht::decorated_key& key, tombstone t, row_cache::phase_type phase, const previous_entry_pointer* previous) { - return do_find_or_create_entry(key, previous, [&] (auto i) { // create - auto entry = current_allocator().construct(cache_entry::incomplete_tag{}, _schema, key, t); - _tracker.insert(*entry); - return _partitions.insert(i, *entry); - }, [&] (auto i) { // visit - _tracker.on_miss_already_populated(); - cache_entry& e = *i; - e.partition().open_version(*e.schema(), phase).partition().apply(t); - _tracker.touch(e); - upgrade_entry(e); - }); -} - -void row_cache::populate(const mutation& m, const previous_entry_pointer* previous) { - _populate_section(_tracker.region(), [&] { - do_find_or_create_entry(m.decorated_key(), previous, [&] (auto i) { - cache_entry* entry = current_allocator().construct( - m.schema(), m.decorated_key(), m.partition()); - upgrade_entry(*entry); - _tracker.insert(*entry); - return _partitions.insert(i, *entry); - }, [&] (auto i) { - throw std::runtime_error(sprint("cache already contains entry for {}", m.key())); - }); - }); -} - -mutation_source& row_cache::snapshot_for_phase(phase_type phase) { - if (phase == _underlying_phase) { - return _underlying; - } else { - if (phase + 1 < _underlying_phase) { - throw std::runtime_error(sprint("attempted to read from retired phase {} (current={})", phase, _underlying_phase)); - } - return *_prev_snapshot; - } -} - -row_cache::snapshot_and_phase row_cache::snapshot_of(dht::ring_position_view pos) { - dht::ring_position_less_comparator less(*_schema); - if (!_prev_snapshot_pos || less(pos, *_prev_snapshot_pos)) { - return {_underlying, _underlying_phase}; - } - return {*_prev_snapshot, _underlying_phase - 1}; -} - -row_cache::phase_type row_cache::phase_of(dht::ring_position_view pos) { - dht::ring_position_less_comparator less(*_schema); - if (!_prev_snapshot_pos || less(pos, *_prev_snapshot_pos)) { - return _underlying_phase; - } - return _underlying_phase - 1; -} - -template -future<> row_cache::do_update(memtable& m, Updater updater) { - m.on_detach_from_region_group(); - _tracker.region().merge(m); // Now all data in memtable belongs to cache - auto attr = seastar::thread_attributes(); - attr.scheduling_group = &_update_thread_scheduling_group; - STAP_PROBE(scylla, row_cache_update_start); - auto t = seastar::thread(attr, [this, &m, updater = std::move(updater)] () mutable { - auto cleanup = defer([&] { - with_allocator(_tracker.allocator(), [&m, this] () { - logalloc::reclaim_lock _(_tracker.region()); - bool blow_cache = false; - // Note: clear_and_dispose() ought not to look up any keys, so it doesn't require - // with_linearized_managed_bytes(), but invalidate() does. - m.partitions.clear_and_dispose([this, deleter = current_deleter(), &blow_cache] (memtable_entry* entry) { - with_linearized_managed_bytes([&] { - try { - invalidate_locked(entry->key()); - } catch (...) { - blow_cache = true; - } - deleter(entry); - }); - }); - if (blow_cache) { - // We failed to invalidate the key, presumably due to with_linearized_managed_bytes() - // running out of memory. Recover using clear_now(), which doesn't throw. - clear_now(); - } - }); - }); - auto permit = get_units(_update_sem, 1).get0(); - ++_underlying_phase; - _prev_snapshot = std::exchange(_underlying, _snapshot_source()); - _prev_snapshot_pos = dht::ring_position::min(); - auto cleanup_prev_snapshot = defer([this] { - _prev_snapshot_pos = {}; - _prev_snapshot = {}; - }); - while (!m.partitions.empty()) { - with_allocator(_tracker.allocator(), [this, &m, &updater] () { - unsigned quota = 30; - auto cmp = cache_entry::compare(_schema); - { - _update_section(_tracker.region(), [&] { - STAP_PROBE(scylla, row_cache_update_one_batch_start); - unsigned quota_before = quota; - // FIXME: we should really be checking should_yield() here instead of - // need_preempt() + quota. However, should_yield() is currently quite - // expensive and we need to amortize it somehow. - do { - auto i = m.partitions.begin(); - STAP_PROBE(scylla, row_cache_update_partition_start); - with_linearized_managed_bytes([&] { - { - memtable_entry& mem_e = *i; - // FIXME: Optimize knowing we lookup in-order. - auto cache_i = _partitions.lower_bound(mem_e.key(), cmp); - updater(cache_i, mem_e); - i = m.partitions.erase(i); - current_allocator().destroy(&mem_e); - --quota; - } - }); - STAP_PROBE(scylla, row_cache_update_partition_end); - } while (!m.partitions.empty() && quota && !need_preempt()); - with_allocator(standard_allocator(), [&] { - if (m.partitions.empty()) { - _prev_snapshot_pos = {}; - } else { - _prev_snapshot_pos = dht::ring_position(m.partitions.begin()->key()); - } - }); - STAP_PROBE1(scylla, row_cache_update_one_batch_end, quota_before - quota); - }); - if (quota == 0 && seastar::thread::should_yield()) { - return; - } - } - }); - seastar::thread::yield(); - } - }); - STAP_PROBE(scylla, row_cache_update_end); - return do_with(std::move(t), [] (seastar::thread& t) { - return t.join(); - }); -} - -future<> row_cache::update(memtable& m, partition_presence_checker is_present) { - return do_update(m, [this, is_present = std::move(is_present)] (row_cache::partitions_type::iterator cache_i, memtable_entry& mem_e) mutable { - // If cache doesn't contain the entry we cannot insert it because the mutation may be incomplete. - // FIXME: keep a bitmap indicating which sstables we do cover, so we don't have to - // search it. - if (cache_i != partitions_end() && cache_i->key().equal(*_schema, mem_e.key())) { - cache_entry& entry = *cache_i; - upgrade_entry(entry); - entry.partition().apply_to_incomplete(*_schema, std::move(mem_e.partition()), *mem_e.schema()); - _tracker.touch(entry); - _tracker.on_merge(); - } else if (cache_i->continuous() || is_present(mem_e.key()) == partition_presence_checker_result::definitely_doesnt_exist) { - cache_entry* entry = current_allocator().construct( - mem_e.schema(), std::move(mem_e.key()), std::move(mem_e.partition())); - entry->set_continuous(cache_i->continuous()); - _tracker.insert(*entry); - _partitions.insert(cache_i, *entry); - } - }); -} - -future<> row_cache::update_invalidating(memtable& m) { - return do_update(m, [this] (row_cache::partitions_type::iterator cache_i, memtable_entry& mem_e) { - if (cache_i != partitions_end() && cache_i->key().equal(*_schema, mem_e.key())) { - // FIXME: Invalidate only affected row ranges. - // This invalidates all row ranges and the static row, leaving only the partition tombstone continuous, - // which has to always be continuous. - cache_entry& e = *cache_i; - e.partition() = partition_entry(mutation_partition::make_incomplete(*e.schema(), mem_e.partition().partition_tombstone())); - } else { - _tracker.clear_continuity(*cache_i); - } - }); -} - -void row_cache::refresh_snapshot() { - _underlying = _snapshot_source(); -} - -void row_cache::touch(const dht::decorated_key& dk) { - _read_section(_tracker.region(), [&] { - with_linearized_managed_bytes([&] { - auto i = _partitions.find(dk, cache_entry::compare(_schema)); - if (i != _partitions.end()) { - _tracker.touch(*i); - } - }); - }); -} - -void row_cache::invalidate_locked(const dht::decorated_key& dk) { - auto pos = _partitions.lower_bound(dk, cache_entry::compare(_schema)); - if (pos == partitions_end() || !pos->key().equal(*_schema, dk)) { - _tracker.clear_continuity(*pos); - } else { - auto it = _partitions.erase_and_dispose(pos, - [this, &dk, deleter = current_deleter()](auto&& p) mutable { - _tracker.on_erase(); - deleter(p); - }); - _tracker.clear_continuity(*it); - } -} - -future<> row_cache::invalidate(const dht::decorated_key& dk) { - return invalidate(dht::partition_range::make_singular(dk)); -} - -future<> row_cache::invalidate(const dht::partition_range& range) { - return invalidate(dht::partition_range_vector({range})); -} - -future<> row_cache::invalidate(dht::partition_range_vector&& ranges) { - return get_units(_update_sem, 1).then([this, ranges = std::move(ranges)] (auto permit) mutable { - _underlying = _snapshot_source(); - ++_underlying_phase; - auto on_failure = defer([this] { this->clear_now(); }); - with_linearized_managed_bytes([&] { - for (auto&& range : ranges) { - this->invalidate_unwrapped(range); - } - }); - on_failure.cancel(); - }); -} - -void row_cache::evict(const dht::partition_range& range) { - invalidate_unwrapped(range); -} - -void row_cache::invalidate_unwrapped(const dht::partition_range& range) { - logalloc::reclaim_lock _(_tracker.region()); - - auto cmp = cache_entry::compare(_schema); - auto begin = _partitions.lower_bound(dht::ring_position_view::for_range_start(range), cmp); - auto end = _partitions.lower_bound(dht::ring_position_view::for_range_end(range), cmp); - with_allocator(_tracker.allocator(), [this, begin, end] { - auto it = _partitions.erase_and_dispose(begin, end, [this, deleter = current_deleter()] (auto&& p) mutable { - _tracker.on_erase(); - deleter(p); - }); - assert(it != _partitions.end()); - _tracker.clear_continuity(*it); - }); -} - -row_cache::row_cache(schema_ptr s, snapshot_source src, cache_tracker& tracker) - : _tracker(tracker) - , _schema(std::move(s)) - , _partitions(cache_entry::compare(_schema)) - , _underlying(src()) - , _snapshot_source(std::move(src)) -{ - with_allocator(_tracker.allocator(), [this] { - cache_entry* entry = current_allocator().construct(cache_entry::dummy_entry_tag()); - _partitions.insert(*entry); - }); -} - -cache_entry::cache_entry(cache_entry&& o) noexcept - : _schema(std::move(o._schema)) - , _key(std::move(o._key)) - , _pe(std::move(o._pe)) - , _flags(o._flags) - , _lru_link() - , _cache_link() -{ - if (o._lru_link.is_linked()) { - auto prev = o._lru_link.prev_; - o._lru_link.unlink(); - cache_tracker::lru_type::node_algorithms::link_after(prev, _lru_link.this_ptr()); - } - - { - using container_type = row_cache::partitions_type; - container_type::node_algorithms::replace_node(o._cache_link.this_ptr(), _cache_link.this_ptr()); - container_type::node_algorithms::init(o._cache_link.this_ptr()); - } -} - -void row_cache::set_schema(schema_ptr new_schema) noexcept { - _schema = std::move(new_schema); -} - -streamed_mutation cache_entry::read(row_cache& rc, read_context& reader) { - auto source_and_phase = rc.snapshot_of(_key); - reader.enter_partition(_key, source_and_phase.snapshot, source_and_phase.phase); - return do_read(rc, reader); -} - -streamed_mutation cache_entry::read(row_cache& rc, read_context& reader, - streamed_mutation&& sm, row_cache::phase_type phase) { - reader.enter_partition(std::move(sm), phase); - return do_read(rc, reader); -} - -// Assumes reader is in the corresponding partition -streamed_mutation cache_entry::do_read(row_cache& rc, read_context& reader) { - auto snp = _pe.read(_schema, reader.phase()); - auto ckr = query::clustering_key_filter_ranges::get_ranges(*_schema, reader.slice(), _key.key()); - auto sm = make_cache_streamed_mutation(_schema, _key, std::move(ckr), rc, reader.shared_from_this(), std::move(snp)); - if (reader.schema()->version() != _schema->version()) { - sm = transform(std::move(sm), schema_upgrader(reader.schema())); - } - if (reader.fwd() == streamed_mutation::forwarding::yes) { - sm = make_forwardable(std::move(sm)); - } - return std::move(sm); -} - -const schema_ptr& row_cache::schema() const { - return _schema; -} - -void row_cache::upgrade_entry(cache_entry& e) { - if (e._schema != _schema) { - auto& r = _tracker.region(); - assert(!r.reclaiming_enabled()); - with_allocator(r.allocator(), [this, &e] { - with_linearized_managed_bytes([&] { - e.partition().upgrade(e._schema, _schema); - e._schema = _schema; - }); - }); - } -} - -std::ostream& operator<<(std::ostream& out, row_cache& rc) { - rc._read_section(rc._tracker.region(), [&] { - out << "{row_cache: " << ::join(", ", rc._partitions.begin(), rc._partitions.end()) << "}"; - }); - return out; -} - -std::ostream& operator<<(std::ostream& out, cache_entry& e) { - return out << "{cache_entry: " << e.position() - << ", cont=" << e.continuous() - << ", dummy=" << e.is_dummy_entry() - << ", " << e.partition() - << "}"; -} diff --git a/scylla/row_cache.hh b/scylla/row_cache.hh deleted file mode 100644 index 18f3248..0000000 --- a/scylla/row_cache.hh +++ /dev/null @@ -1,493 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include -#include - -#include "core/memory.hh" -#include - -#include "mutation_reader.hh" -#include "mutation_partition.hh" -#include "utils/logalloc.hh" -#include "utils/phased_barrier.hh" -#include "utils/histogram.hh" -#include "partition_version.hh" -#include "utils/estimated_histogram.hh" -#include "tracing/trace_state.hh" -#include - -namespace bi = boost::intrusive; - -class row_cache; -class memtable_entry; - -namespace cache { - -class autoupdating_underlying_reader; -class cache_streamed_mutation; -class read_context; -class lsa_manager; - -} - -// Intrusive set entry which holds partition data. -// -// TODO: Make memtables use this format too. -class cache_entry { - // We need auto_unlink<> option on the _cache_link because when entry is - // evicted from cache via LRU we don't have a reference to the container - // and don't want to store it with each entry. As for the _lru_link, we - // have a global LRU, so technically we could not use auto_unlink<> on - // _lru_link, but it's convenient to do so too. We may also want to have - // multiple eviction spaces in the future and thus multiple LRUs. - using lru_link_type = bi::list_member_hook>; - using cache_link_type = bi::set_member_hook>; - - schema_ptr _schema; - dht::decorated_key _key; - partition_entry _pe; - // True when we know that there is nothing between this entry and the next one in cache - struct { - bool _continuous : 1; - bool _dummy_entry : 1; - } _flags{}; - lru_link_type _lru_link; - cache_link_type _cache_link; - friend class size_calculator; - - streamed_mutation do_read(row_cache&, cache::read_context& reader); -public: - friend class row_cache; - friend class cache_tracker; - - struct dummy_entry_tag{}; - struct incomplete_tag{}; - - cache_entry(dummy_entry_tag) - : _key{dht::token(), partition_key::make_empty()} - { - _flags._dummy_entry = true; - } - - // Creates an entry which is fully discontinuous, except for the partition tombstone. - cache_entry(incomplete_tag, schema_ptr s, const dht::decorated_key& key, tombstone t) - : cache_entry(s, key, mutation_partition::make_incomplete(*s, t)) - { } - - cache_entry(schema_ptr s, const dht::decorated_key& key, const mutation_partition& p) - : _schema(std::move(s)) - , _key(key) - , _pe(p) - { - _pe.version()->partition().ensure_last_dummy(*_schema); - } - - cache_entry(schema_ptr s, dht::decorated_key&& key, mutation_partition&& p) noexcept - : _schema(std::move(s)) - , _key(std::move(key)) - , _pe(std::move(p)) - { - _pe.version()->partition().ensure_last_dummy(*_schema); - } - - // It is assumed that pe is fully continuous - cache_entry(schema_ptr s, dht::decorated_key&& key, partition_entry&& pe) noexcept - : _schema(std::move(s)) - , _key(std::move(key)) - , _pe(std::move(pe)) - { - // If we can assume that _pe is fully continuous, we don't need to check all versions - // to determine what the continuity is. - // This doesn't change value and doesn't invalidate iterators, so can be called even with a snapshot. - _pe.version()->partition().ensure_last_dummy(*_schema); - } - - cache_entry(cache_entry&&) noexcept; - - bool is_evictable() { return _lru_link.is_linked(); } - const dht::decorated_key& key() const { return _key; } - dht::ring_position_view position() const { - if (is_dummy_entry()) { - return dht::ring_position_view::max(); - } - return _key; - } - const partition_entry& partition() const { return _pe; } - partition_entry& partition() { return _pe; } - const schema_ptr& schema() const { return _schema; } - schema_ptr& schema() { return _schema; } - streamed_mutation read(row_cache&, cache::read_context& reader); - streamed_mutation read(row_cache&, cache::read_context& reader, streamed_mutation&& underlying, utils::phased_barrier::phase_type); - bool continuous() const { return _flags._continuous; } - void set_continuous(bool value) { _flags._continuous = value; } - - bool is_dummy_entry() const { return _flags._dummy_entry; } - - struct compare { - dht::ring_position_less_comparator _c; - - compare(schema_ptr s) - : _c(*s) - {} - - bool operator()(const dht::decorated_key& k1, const cache_entry& k2) const { - return _c(k1, k2.position()); - } - - bool operator()(dht::ring_position_view k1, const cache_entry& k2) const { - return _c(k1, k2.position()); - } - - bool operator()(const cache_entry& k1, const cache_entry& k2) const { - return _c(k1.position(), k2.position()); - } - - bool operator()(const cache_entry& k1, const dht::decorated_key& k2) const { - return _c(k1.position(), k2); - } - - bool operator()(const cache_entry& k1, dht::ring_position_view k2) const { - return _c(k1.position(), k2); - } - - bool operator()(dht::ring_position_view k1, dht::ring_position_view k2) const { - return _c(k1, k2); - } - }; - - friend std::ostream& operator<<(std::ostream&, cache_entry&); -}; - -// Tracks accesses and performs eviction of cache entries. -class cache_tracker final { -public: - using lru_type = bi::list, - bi::constant_time_size>; // we need this to have bi::auto_unlink on hooks. -public: - friend class row_cache; - friend class cache::read_context; - friend class cache::autoupdating_underlying_reader; - friend class cache::cache_streamed_mutation; - struct stats { - uint64_t partition_hits; - uint64_t partition_misses; - uint64_t row_hits; - uint64_t row_misses; - uint64_t partition_insertions; - uint64_t row_insertions; - uint64_t concurrent_misses_same_key; - uint64_t partition_merges; - uint64_t partition_evictions; - uint64_t partition_removals; - uint64_t partitions; - uint64_t modification_count; - uint64_t mispopulations; - uint64_t underlying_recreations; - uint64_t underlying_partition_skips; - uint64_t underlying_row_skips; - uint64_t reads; - uint64_t reads_with_misses; - uint64_t reads_done; - - uint64_t active_reads() const { - return reads_done - reads; - } - }; -private: - stats _stats{}; - seastar::metrics::metric_groups _metrics; - logalloc::region _region; - lru_type _lru; -private: - void setup_metrics(); -public: - cache_tracker(); - ~cache_tracker(); - void clear(); - void touch(cache_entry&); - void insert(cache_entry&); - void clear_continuity(cache_entry& ce); - void on_erase(); - void on_merge(); - void on_partition_hit(); - void on_partition_miss(); - void on_row_hit(); - void on_row_miss(); - void on_miss_already_populated(); - void on_mispopulate(); - allocation_strategy& allocator(); - logalloc::region& region(); - const logalloc::region& region() const; - uint64_t modification_count() const { return _stats.modification_count; } - uint64_t partitions() const { return _stats.partitions; } - const stats& get_stats() const { return _stats; } -}; - -// Returns a reference to shard-wide cache_tracker. -cache_tracker& global_cache_tracker(); - -// -// A data source which wraps another data source such that data obtained from the underlying data source -// is cached in-memory in order to serve queries faster. -// -// Cache populates itself automatically during misses. -// -// Cache represents a snapshot of the underlying mutation source. When the -// underlying mutation source changes, cache needs to be explicitly synchronized -// to the latest snapshot. This is done by calling update() or invalidate(). -// -class row_cache final { -public: - using phase_type = utils::phased_barrier::phase_type; - using partitions_type = bi::set, - bi::constant_time_size, // we need this to have bi::auto_unlink on hooks - bi::compare>; - friend class cache::autoupdating_underlying_reader; - friend class single_partition_populating_reader; - friend class cache_entry; - friend class cache::cache_streamed_mutation; - friend class cache::lsa_manager; - friend class cache::read_context; - friend class partition_range_cursor; - friend class cache_tester; -public: - struct stats { - utils::timed_rate_moving_average hits; - utils::timed_rate_moving_average misses; - utils::timed_rate_moving_average reads_with_misses; - utils::timed_rate_moving_average reads_with_no_misses; - }; -private: - cache_tracker& _tracker; - stats _stats{}; - schema_ptr _schema; - partitions_type _partitions; // Cached partitions are complete. - - // The snapshots used by cache are versioned. The version number of a snapshot is - // called the "population phase", or simply "phase". Between updates, cache - // represents the same snapshot. - // - // Update doesn't happen atomically. Before it completes, some entries reflect - // the old snapshot, while others reflect the new snapshot. After update - // completes, all entries must reflect the new snapshot. There is a race between the - // update process and populating reads. Since after the update all entries must - // reflect the new snapshot, reads using the old snapshot cannot be allowed to - // insert data which will no longer be reached by the update process. The whole - // range can be therefore divided into two sub-ranges, one which was already - // processed by the update and one which hasn't. Each key can be assigned a - // population phase which determines to which range it belongs, as well as which - // snapshot it reflects. The methods snapshot_of() and phase_of() can - // be used to determine this. - // - // In general, reads are allowed to populate given range only if the phase - // of the snapshot they use matches the phase of all keys in that range - // when the population is committed. This guarantees that the range will - // be reached by the update process or already has been in its entirety. - // In case of phase conflict, current solution is to give up on - // population. Since the update process is a scan, it's sufficient to - // check when committing the population if the start and end of the range - // have the same phases and that it's the same phase as that of the start - // of the range at the time when reading began. - - mutation_source _underlying; - phase_type _underlying_phase = 0; - mutation_source_opt _prev_snapshot; - - // Positions >= than this are using _prev_snapshot, the rest is using _underlying. - stdx::optional _prev_snapshot_pos; - - snapshot_source _snapshot_source; - - // There can be at most one update in progress. - seastar::semaphore _update_sem = {1}; - - logalloc::allocating_section _update_section; - logalloc::allocating_section _populate_section; - logalloc::allocating_section _read_section; - mutation_reader create_underlying_reader(cache::read_context&, mutation_source&, const dht::partition_range&); - mutation_reader make_scanning_reader(const dht::partition_range&, lw_shared_ptr); - void on_partition_hit(); - void on_partition_miss(); - void on_row_hit(); - void on_row_miss(); - void on_row_insert(); - void on_mispopulate(); - void upgrade_entry(cache_entry&); - void invalidate_locked(const dht::decorated_key&); - void invalidate_unwrapped(const dht::partition_range&); - void clear_now() noexcept; - static thread_local seastar::thread_scheduling_group _update_thread_scheduling_group; - - struct previous_entry_pointer { - stdx::optional _key; - - previous_entry_pointer() = default; // Represents dht::ring_position_view::min() - previous_entry_pointer(dht::decorated_key key) : _key(std::move(key)) {}; - - // TODO: Currently inserting an entry to the cache increases - // modification counter. That doesn't seem to be necessary and if we - // didn't do that we could store iterator here to avoid key comparison - // (not to mention avoiding lookups in just_cache_scanning_reader. - }; - - template - //requires requires(CreateEntry create, VisitEntry visit, partitions_type::iterator it) { - // { create(it) } -> partitions_type::iterator; - // { visit(it) } -> void; - // } - // - // Must be run under reclaim lock - cache_entry& do_find_or_create_entry(const dht::decorated_key& key, const previous_entry_pointer* previous, - CreateEntry&& create_entry, VisitEntry&& visit_entry); - - // Ensures that partition entry for given key exists in cache and returns a reference to it. - // Prepares the entry for reading. "phase" must match the current phase of the entry. - // - // Since currently every entry has to have a complete tombstone, it has to be provided here. - // The entry which is returned will have the tombstone applied to it. - // - // Must be run under reclaim lock - cache_entry& find_or_create(const dht::decorated_key& key, tombstone t, row_cache::phase_type phase, const previous_entry_pointer* previous = nullptr); - - partitions_type::iterator partitions_end() { - return std::prev(_partitions.end()); - } - - // Only active phases are accepted. - // Reference valid only until next deferring point. - mutation_source& snapshot_for_phase(phase_type); - - // Returns population phase for given position in the ring. - // snapshot_for_phase() can be called to obtain mutation_source for given phase, but - // only until the next deferring point. - // Should be only called outside update(). - phase_type phase_of(dht::ring_position_view); - - struct snapshot_and_phase { - mutation_source& snapshot; - phase_type phase; - }; - - // Optimized version of: - // - // { snapshot_for_phase(phase_of(pos)), phase_of(pos) }; - // - snapshot_and_phase snapshot_of(dht::ring_position_view pos); - - // Merges the memtable into cache with configurable logic for handling memtable entries. - // The Updater gets invoked for every entry in the memtable with a lower bound iterator - // into _partitions (cache_i), and the memtable entry. - // It is invoked inside allocating section and in the context of cache's allocator. - // All memtable entries will be removed. - template - future<> do_update(memtable& m, Updater func); -public: - ~row_cache(); - row_cache(schema_ptr, snapshot_source, cache_tracker&); - row_cache(row_cache&&) = default; - row_cache(const row_cache&) = delete; - row_cache& operator=(row_cache&&) = default; -public: - // Implements mutation_source for this cache, see mutation_reader.hh - // User needs to ensure that the row_cache object stays alive - // as long as the reader is used. - // The range must not wrap around. - mutation_reader make_reader(schema_ptr, - const dht::partition_range& = query::full_partition_range, - const query::partition_slice& slice = query::full_slice, - const io_priority_class& = default_priority_class(), - tracing::trace_state_ptr trace_state = nullptr, - streamed_mutation::forwarding fwd = streamed_mutation::forwarding::no, - mutation_reader::forwarding fwd_mr = mutation_reader::forwarding::no); - - const stats& stats() const { return _stats; } -public: - // Populate cache from given mutation, which must be fully continuous. - // Intended to be used only in tests. - // Can only be called prior to any reads. - void populate(const mutation& m, const previous_entry_pointer* previous = nullptr); - - // Synchronizes cache with the underlying data source from a memtable which - // has just been flushed to the underlying data source. - // The memtable can be queried during the process, but must not be written. - // After the update is complete, memtable is empty. - future<> update(memtable&, partition_presence_checker underlying_negative); - - // Like update(), synchronizes cache with an incremental change to the underlying - // mutation source, but instead of inserting and merging data, invalidates affected ranges. - // Can be thought of as a more fine-grained version of invalidate(), which invalidates - // as few elements as possible. - future<> update_invalidating(memtable&); - - // Refreshes snapshot. Must only be used if logical state in the underlying data - // source hasn't changed. - void refresh_snapshot(); - - // Moves given partition to the front of LRU if present in cache. - void touch(const dht::decorated_key&); - - // Synchronizes cache with the underlying mutation source - // by invalidating ranges which were modified. This will force - // them to be re-read from the underlying mutation source - // during next read overlapping with the invalidated ranges. - // - // The ranges passed to invalidate() must include all - // data which changed since last synchronization. Failure - // to do so may result in reads seeing partial writes, - // which would violate write atomicity. - // - // Guarantees that readers created after invalidate() - // completes will see all writes from the underlying - // mutation source made prior to the call to invalidate(). - future<> invalidate(const dht::decorated_key&); - future<> invalidate(const dht::partition_range& = query::full_partition_range); - future<> invalidate(dht::partition_range_vector&&); - - // Evicts entries from given range in cache. - // - // Note that this does not synchronize with the underlying source, - // it is assumed that the underlying source didn't change. - // If it did, use invalidate() instead. - void evict(const dht::partition_range& = query::full_partition_range); - - size_t partitions() const { - return _partitions.size(); - } - const cache_tracker& get_cache_tracker() const { - return _tracker; - } - - void set_schema(schema_ptr) noexcept; - const schema_ptr& schema() const; - - friend std::ostream& operator<<(std::ostream&, row_cache&); - - friend class just_cache_scanning_reader; - friend class scanning_and_populating_reader; - friend class range_populating_reader; - friend class cache_tracker; - friend class mark_end_as_continuous; -}; diff --git a/scylla/schema.cc b/scylla/schema.cc deleted file mode 100644 index 372b49c..0000000 --- a/scylla/schema.cc +++ /dev/null @@ -1,1262 +0,0 @@ -/* - * Copyright (C) 2014 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "utils/UUID_gen.hh" -#include "cql3/column_identifier.hh" -#include "cql3/util.hh" -#include "schema.hh" -#include "schema_builder.hh" -#include "md5_hasher.hh" -#include -#include -#include "db/marshal/type_parser.hh" -#include "version.hh" -#include "schema_registry.hh" -#include -#include -#include -#include "view_info.hh" - -constexpr int32_t schema::NAME_LENGTH; - -sstring to_sstring(column_kind k) { - switch (k) { - case column_kind::partition_key: return "PARTITION_KEY"; - case column_kind::clustering_key: return "CLUSTERING_COLUMN"; - case column_kind::static_column: return "STATIC"; - case column_kind::regular_column: return "REGULAR"; - } - throw std::invalid_argument("unknown column kind"); -} - -bool is_compatible(column_kind k1, column_kind k2) { - return k1 == k2; -} - -column_mapping_entry::column_mapping_entry(bytes name, sstring type_name) - : _name(std::move(name)) - , _type(db::marshal::type_parser::parse(type_name)) -{ -} - -column_mapping_entry::column_mapping_entry(const column_mapping_entry& o) - : _name(o._name) - , _type(db::marshal::type_parser::parse(o._type->name())) -{ -} - -column_mapping_entry& column_mapping_entry::operator=(const column_mapping_entry& o) { - if (this != &o) { - auto tmp = o; - this->~column_mapping_entry(); - new (this) column_mapping_entry(std::move(tmp)); - } - return *this; -} - -template -std::vector -get_column_types(const Sequence& column_definitions) { - std::vector result; - for (auto&& col : column_definitions) { - result.push_back(col.type); - } - return result; -} - -std::ostream& operator<<(std::ostream& out, const column_mapping& cm) { - column_id n_static = cm.n_static(); - column_id n_regular = cm.columns().size() - n_static; - - auto pr_entry = [] (column_id i, const column_mapping_entry& e) { - // Without schema we don't know if name is UTF8. If we had schema we could use - // s->regular_column_name_type()->to_string(e.name()). - return sprint("{id=%s, name=0x%s, type=%s}", i, e.name(), e.type()->name()); - }; - - return out << "{static=[" << ::join(", ", boost::irange(0, n_static) | - boost::adaptors::transformed([&] (column_id i) { return pr_entry(i, cm.static_column_at(i)); })) - << "], regular=[" << ::join(", ", boost::irange(0, n_regular) | - boost::adaptors::transformed([&] (column_id i) { return pr_entry(i, cm.regular_column_at(i)); })) - << "]}"; -} - -::shared_ptr -schema::make_column_specification(const column_definition& def) { - auto id = ::make_shared(def.name(), column_name_type(def)); - return ::make_shared(_raw._ks_name, _raw._cf_name, std::move(id), def.type); -} - -v3_columns::v3_columns(std::vector cols, bool is_dense, bool is_compound) - : _is_dense(is_dense) - , _is_compound(is_compound) - , _columns(std::move(cols)) -{ - for (column_definition& def : _columns) { - _columns_by_name[def.name()] = &def; - } -} - -v3_columns v3_columns::from_v2_schema(const schema& s) { - data_type static_column_name_type = utf8_type; - std::vector cols; - - if (s.is_static_compact_table()) { - if (s.has_static_columns()) { - throw std::runtime_error( - sprint("v2 static compact table should not have static columns: %s.%s", s.ks_name(), s.cf_name())); - } - if (s.clustering_key_size()) { - throw std::runtime_error( - sprint("v2 static compact table should not have clustering columns: %s.%s", s.ks_name(), s.cf_name())); - } - static_column_name_type = s.regular_column_name_type(); - for (auto& c : s.all_columns()) { - // Note that for "static" no-clustering compact storage we use static for the defined columns - if (c.kind == column_kind::regular_column) { - auto new_def = c; - new_def.kind = column_kind::static_column; - cols.push_back(new_def); - } else { - cols.push_back(c); - } - } - schema_builder::default_names names(s._raw); - cols.emplace_back(to_bytes(names.clustering_name()), static_column_name_type, column_kind::clustering_key, 0); - cols.emplace_back(to_bytes(names.compact_value_name()), s.make_legacy_default_validator(), column_kind::regular_column, 0); - } else { - cols = s.all_columns(); - } - - for (column_definition& def : cols) { - data_type name_type = def.is_static() ? static_column_name_type : utf8_type; - auto id = ::make_shared(def.name(), name_type); - def.column_specification = ::make_shared(s.ks_name(), s.cf_name(), std::move(id), def.type); - } - - return v3_columns(std::move(cols), s.is_dense(), s.is_compound()); -} - -void v3_columns::apply_to(schema_builder& builder) const { - if (is_static_compact()) { - for (auto& c : _columns) { - if (c.kind == column_kind::regular_column) { - builder.set_default_validation_class(c.type); - } else if (c.kind == column_kind::static_column) { - auto new_def = c; - new_def.kind = column_kind::regular_column; - builder.with_column(new_def); - } else if (c.kind == column_kind::clustering_key) { - builder.set_regular_column_name_type(c.type); - } else { - builder.with_column(c); - } - } - } else { - for (auto& c : _columns) { - if (is_compact() && c.kind == column_kind::regular_column) { - builder.set_default_validation_class(c.type); - } - builder.with_column(c); - } - } -} - -bool v3_columns::is_static_compact() const { - return !_is_dense && !_is_compound; -} - -bool v3_columns::is_compact() const { - return _is_dense || !_is_compound; -} - -const std::unordered_map& v3_columns::columns_by_name() const { - return _columns_by_name; -} - -const std::vector& v3_columns::all_columns() const { - return _columns; -} - -void schema::rebuild() { - _partition_key_type = make_lw_shared>(get_column_types(partition_key_columns())); - _clustering_key_type = make_lw_shared(get_column_types(clustering_key_columns())); - - _columns_by_name.clear(); - _regular_columns_by_name.clear(); - - for (const column_definition& def : all_columns()) { - _columns_by_name[def.name()] = &def; - } - - static_assert(row_column_ids_are_ordered_by_name::value, "row columns don't need to be ordered by name"); - if (!std::is_sorted(regular_columns().begin(), regular_columns().end(), column_definition::name_comparator(regular_column_name_type()))) { - throw std::runtime_error("Regular columns should be sorted by name"); - } - if (!std::is_sorted(static_columns().begin(), static_columns().end(), column_definition::name_comparator(static_column_name_type()))) { - throw std::runtime_error("Static columns should be sorted by name"); - } - - for (const column_definition& def : regular_columns()) { - _regular_columns_by_name[def.name()] = &def; - } - - { - std::vector cm_columns; - for (const column_definition& def : boost::range::join(static_columns(), regular_columns())) { - cm_columns.emplace_back(column_mapping_entry{def.name(), def.type}); - } - _column_mapping = column_mapping(std::move(cm_columns), static_columns_count()); - } - - thrift()._compound = is_compound(); - thrift()._is_dynamic = clustering_key_size() > 0; - - if (is_counter()) { - for (auto&& cdef : boost::range::join(static_columns(), regular_columns())) { - if (!cdef.type->is_counter()) { - throw exceptions::configuration_exception(sprint("Cannot add a non counter column (%s) in a counter column family", cdef.name_as_text())); - } - } - } else { - for (auto&& cdef : all_columns()) { - if (cdef.type->is_counter()) { - throw exceptions::configuration_exception(sprint("Cannot add a counter column (%s) in a non counter column family", cdef.name_as_text())); - } - } - } - - _v3_columns = v3_columns::from_v2_schema(*this); -} - -const column_mapping& schema::get_column_mapping() const { - return _column_mapping; -} - -schema::raw_schema::raw_schema(utils::UUID id) - : _id(id) -{ } - -schema::schema(const raw_schema& raw, stdx::optional raw_view_info) - : _raw(raw) - , _offsets([this] { - if (_raw._columns.size() > std::numeric_limits::max()) { - throw std::runtime_error(sprint("Column count limit (%d) overflowed: %d", - std::numeric_limits::max(), _raw._columns.size())); - } - - auto& cols = _raw._columns; - std::array count = { 0, 0, 0, 0 }; - auto i = cols.begin(); - auto e = cols.end(); - for (auto k : { column_kind::partition_key, column_kind::clustering_key, column_kind::static_column, column_kind::regular_column }) { - auto j = std::stable_partition(i, e, [k](const auto& c) { - return c.kind == k; - }); - count[column_count_type(k)] = std::distance(i, j); - i = j; - } - return std::array { - count[0], - count[0] + count[1], - count[0] + count[1] + count[2], - }; - }()) - , _regular_columns_by_name(serialized_compare(_raw._regular_column_name_type)) -{ - std::sort( - _raw._columns.begin() + column_offset(column_kind::static_column), - _raw._columns.begin() - + column_offset(column_kind::regular_column), - column_definition::name_comparator(static_column_name_type())); - std::sort( - _raw._columns.begin() - + column_offset(column_kind::regular_column), - _raw._columns.end(), column_definition::name_comparator(regular_column_name_type())); - - std::sort(_raw._columns.begin(), - _raw._columns.begin() + column_offset(column_kind::clustering_key), - [] (auto x, auto y) { return x.id < y.id; }); - std::sort(_raw._columns.begin() + column_offset(column_kind::clustering_key), - _raw._columns.begin() + column_offset(column_kind::static_column), - [] (auto x, auto y) { return x.id < y.id; }); - - column_id id = 0; - for (auto& def : _raw._columns) { - def.column_specification = make_column_specification(def); - assert(!def.id || def.id == id - column_offset(def.kind)); - def.id = id - column_offset(def.kind); - - auto dropped_at_it = _raw._dropped_columns.find(def.name_as_text()); - if (dropped_at_it != _raw._dropped_columns.end()) { - def._dropped_at = std::max(def._dropped_at, dropped_at_it->second.timestamp); - } - - def._thrift_bits = column_definition::thrift_bits(); - - { - // is_on_all_components - // TODO : In origin, this predicate is "componentIndex == null", which is true in - // a number of cases, some of which I've most likely missed... - switch (def.kind) { - case column_kind::partition_key: - // In origin, ci == null is true for a PK column where CFMetaData "keyValidator" is non-composite. - // Which is true of #pk == 1 - def._thrift_bits.is_on_all_components = partition_key_size() == 1; - break; - case column_kind::regular_column: - if (_raw._is_dense) { - // regular values in dense tables are alone, so they have no index - def._thrift_bits.is_on_all_components = true; - break; - } - default: - // Or any other column where "comparator" is not compound - def._thrift_bits.is_on_all_components = !thrift().has_compound_comparator(); - break; - } - } - - ++id; - } - - rebuild(); - if (raw_view_info) { - _view_info = std::make_unique<::view_info>(*this, *raw_view_info); - } -} - -schema::schema(std::experimental::optional id, - sstring ks_name, - sstring cf_name, - std::vector partition_key, - std::vector clustering_key, - std::vector regular_columns, - std::vector static_columns, - data_type regular_column_name_type, - sstring comment) - : schema([&] { - raw_schema raw(id ? *id : utils::UUID_gen::get_time_UUID()); - - raw._comment = std::move(comment); - raw._ks_name = std::move(ks_name); - raw._cf_name = std::move(cf_name); - raw._regular_column_name_type = regular_column_name_type; - - auto build_columns = [&raw](std::vector& columns, column_kind kind) { - for (auto& sc : columns) { - if (sc.type->is_multi_cell()) { - raw._collections.emplace(sc.name, sc.type); - } - raw._columns.emplace_back(std::move(sc.name), std::move(sc.type), kind); - } - }; - - build_columns(partition_key, column_kind::partition_key); - build_columns(clustering_key, column_kind::clustering_key); - build_columns(static_columns, column_kind::static_column); - build_columns(regular_columns, column_kind::regular_column); - - return raw; - }(), stdx::nullopt) -{} - -schema::schema(const schema& o) - : _raw(o._raw) - , _offsets(o._offsets) - , _regular_columns_by_name(serialized_compare(_raw._regular_column_name_type)) -{ - rebuild(); - if (o.is_view()) { - _view_info = std::make_unique<::view_info>(*this, o.view_info()->raw()); - } -} - -schema::~schema() { - if (_registry_entry) { - _registry_entry->detach_schema(); - } -} - -schema_registry_entry* -schema::registry_entry() const noexcept { - return _registry_entry; -} - -sstring schema::thrift_key_validator() const { - if (partition_key_size() == 1) { - return partition_key_columns().begin()->type->name(); - } else { - sstring type_params = ::join(", ", partition_key_columns() - | boost::adaptors::transformed(std::mem_fn(&column_definition::type)) - | boost::adaptors::transformed(std::mem_fn(&abstract_type::name))); - return "org.apache.cassandra.db.marshal.CompositeType(" + type_params + ")"; - } -} - -bool -schema::has_multi_cell_collections() const { - return boost::algorithm::any_of(all_columns(), [] (const column_definition& cdef) { - return cdef.type->is_collection() && cdef.type->is_multi_cell(); - }); -} - -bool operator==(const schema& x, const schema& y) -{ - return x._raw._id == y._raw._id - && x._raw._ks_name == y._raw._ks_name - && x._raw._cf_name == y._raw._cf_name - && x._raw._columns == y._raw._columns - && x._raw._comment == y._raw._comment - && x._raw._default_time_to_live == y._raw._default_time_to_live - && x._raw._regular_column_name_type->equals(y._raw._regular_column_name_type) - && x._raw._bloom_filter_fp_chance == y._raw._bloom_filter_fp_chance - && x._raw._compressor_params == y._raw._compressor_params - && x._raw._is_dense == y._raw._is_dense - && x._raw._is_compound == y._raw._is_compound - && x._raw._type == y._raw._type - && x._raw._gc_grace_seconds == y._raw._gc_grace_seconds - && x._raw._dc_local_read_repair_chance == y._raw._dc_local_read_repair_chance - && x._raw._read_repair_chance == y._raw._read_repair_chance - && x._raw._min_compaction_threshold == y._raw._min_compaction_threshold - && x._raw._max_compaction_threshold == y._raw._max_compaction_threshold - && x._raw._min_index_interval == y._raw._min_index_interval - && x._raw._max_index_interval == y._raw._max_index_interval - && x._raw._memtable_flush_period == y._raw._memtable_flush_period - && x._raw._speculative_retry == y._raw._speculative_retry - && x._raw._compaction_strategy == y._raw._compaction_strategy - && x._raw._compaction_strategy_options == y._raw._compaction_strategy_options - && x._raw._compaction_enabled == y._raw._compaction_enabled - && x._raw._caching_options == y._raw._caching_options - && x._raw._dropped_columns == y._raw._dropped_columns - && x._raw._collections == y._raw._collections - && indirect_equal_to>()(x._view_info, y._view_info) - && x._raw._indices_by_name == y._raw._indices_by_name - && x._raw._is_counter == y._raw._is_counter - ; -#if 0 - && Objects.equal(triggers, other.triggers) -#endif -} - -index_metadata::index_metadata(const sstring& name, - const index_options_map& options, - index_metadata_kind kind) - : _id{utils::UUID_gen::get_name_UUID(name)} - , _name{name} - , _kind{kind} - , _options{options} -{} - -bool index_metadata::operator==(const index_metadata& other) const { - return _id == other._id - && _name == other._name - && _kind == other._kind - && _options == other._options; -} - -bool index_metadata::equals_noname(const index_metadata& other) const { - return _kind == other._kind && _options == other._options; -} - -const utils::UUID& index_metadata::id() const { - return _id; -} - -const sstring& index_metadata::name() const { - return _name; -} - -const index_metadata_kind index_metadata::kind() const { - return _kind; -} - -const index_options_map& index_metadata::options() const { - return _options; -} - -sstring index_metadata::get_default_index_name(const sstring& cf_name, - std::experimental::optional root) { - if (root) { - return cf_name + "_" + root.value() + "_idx"; - } - return cf_name + "_idx"; -} - -column_definition::column_definition(bytes name, data_type type, column_kind kind, column_id component_index, api::timestamp_type dropped_at) - : _name(std::move(name)) - , _dropped_at(dropped_at) - , _is_atomic(type->is_atomic()) - , _is_counter(type->is_counter()) - , type(std::move(type)) - , id(component_index) - , kind(kind) -{} - -std::ostream& operator<<(std::ostream& os, const column_definition& cd) { - os << "ColumnDefinition{"; - os << "name=" << cd.name_as_text(); - os << ", type=" << cd.type->name(); - os << ", kind=" << to_sstring(cd.kind); - os << ", componentIndex=" << (cd.has_component_index() ? std::to_string(cd.component_index()) : "null"); - os << ", droppedAt=" << cd._dropped_at; - os << "}"; - return os; -} - -const column_definition* -schema::get_column_definition(const bytes& name) const { - auto i = _columns_by_name.find(name); - if (i == _columns_by_name.end()) { - return nullptr; - } - return i->second; -} - -const column_definition& -schema::column_at(column_kind kind, column_id id) const { - return _raw._columns.at(column_offset(kind) + id); -} - -std::ostream& operator<<(std::ostream& os, const schema& s) { - os << "org.apache.cassandra.config.CFMetaData@" << &s << "["; - os << "cfId=" << s._raw._id; - os << ",ksName=" << s._raw._ks_name; - os << ",cfName=" << s._raw._cf_name; - os << ",cfType=" << cf_type_to_sstring(s._raw._type); - os << ",comparator=" << cell_comparator::to_sstring(s); - os << ",comment=" << s._raw._comment; - os << ",readRepairChance=" << s._raw._read_repair_chance; - os << ",dcLocalReadRepairChance=" << s._raw._dc_local_read_repair_chance; - os << ",gcGraceSeconds=" << s._raw._gc_grace_seconds; - os << ",keyValidator=" << s.thrift_key_validator(); - os << ",minCompactionThreshold=" << s._raw._min_compaction_threshold; - os << ",maxCompactionThreshold=" << s._raw._max_compaction_threshold; - os << ",columnMetadata=["; - int n = 0; - for (auto& cdef : s._raw._columns) { - if (n++ != 0) { - os << ", "; - } - os << cdef; - } - os << "]"; - os << ",compactionStrategyClass=class org.apache.cassandra.db.compaction." << sstables::compaction_strategy::name(s._raw._compaction_strategy); - os << ",compactionStrategyOptions={"; - n = 0; - for (auto& p : s._raw._compaction_strategy_options) { - os << p.first << "=" << p.second; - os << ", "; - } - os << "enabled=" << std::boolalpha << s._raw._compaction_enabled; - os << "}"; - os << ",compressionParameters={"; - n = 0; - for (auto& p : s._raw._compressor_params.get_options() ) { - if (n++ != 0) { - os << ", "; - } - os << p.first << "=" << p.second; - } - os << "}"; - os << ",bloomFilterFpChance=" << s._raw._bloom_filter_fp_chance; - os << ",memtableFlushPeriod=" << s._raw._memtable_flush_period; - os << ",caching=" << s._raw._caching_options.to_sstring(); - os << ",defaultTimeToLive=" << s._raw._default_time_to_live.count(); - os << ",minIndexInterval=" << s._raw._min_index_interval; - os << ",maxIndexInterval=" << s._raw._max_index_interval; - os << ",speculativeRetry=" << s._raw._speculative_retry.to_sstring(); - os << ",triggers=[]"; - os << ",isDense=" << std::boolalpha << s._raw._is_dense; - os << ",version=" << s.version(); - os << ",droppedColumns={"; - n = 0; - for (auto& dc : s._raw._dropped_columns) { - if (n++ != 0) { - os << ", "; - } - os << dc.first << " : { " << dc.second.type->name() << ", " << dc.second.timestamp << " }"; - } - os << "}"; - os << ",collections={"; - n = 0; - for (auto& c : s._raw._collections) { - if (n++ != 0) { - os << ", "; - } - os << c.first << " : " << c.second->name(); - } - os << "}"; - os << ",indices={"; - n = 0; - for (auto& c : s._raw._indices_by_name) { - if (n++ != 0) { - os << ", "; - } - os << c.first << " : " << c.second.id(); - } - os << "}"; - if (s.is_view()) { - os << ", viewInfo=" << *s.view_info(); - } - os << "]"; - return os; -} - -const sstring& -column_definition::name_as_text() const { - return column_specification->name->text(); -} - -const bytes& -column_definition::name() const { - return _name; -} - -sstring column_definition::name_as_cql_string() const { - return cql3::util::maybe_quote(name_as_text()); -} - -bool column_definition::is_on_all_components() const { - return _thrift_bits.is_on_all_components; -} - -bool operator==(const column_definition& x, const column_definition& y) -{ - return x._name == y._name - && x.type->equals(y.type) - && x.id == y.id - && x.kind == y.kind - && x._dropped_at == y._dropped_at; -} - -// Based on org.apache.cassandra.config.CFMetaData#generateLegacyCfId -utils::UUID -generate_legacy_id(const sstring& ks_name, const sstring& cf_name) { - return utils::UUID_gen::get_name_UUID(ks_name + cf_name); -} - -bool thrift_schema::has_compound_comparator() const { - return _compound; -} - -bool thrift_schema::is_dynamic() const { - return _is_dynamic; -} - -schema_builder::schema_builder(const sstring& ks_name, const sstring& cf_name, - std::experimental::optional id, data_type rct) - : _raw(id ? *id : utils::UUID_gen::get_time_UUID()) -{ - _raw._ks_name = ks_name; - _raw._cf_name = cf_name; - _raw._regular_column_name_type = rct; -} - -schema_builder::schema_builder(const schema_ptr s) - : schema_builder(s->_raw) -{ - if (s->is_view()) { - _view_info = s->view_info()->raw(); - } -} - -schema_builder::schema_builder(const schema::raw_schema& raw) - : _raw(raw) -{ - static_assert(schema::row_column_ids_are_ordered_by_name::value, "row columns don't need to be ordered by name"); - // Schema builder may add or remove columns and their ids need to be - // recomputed in build(). - for (auto& def : _raw._columns | boost::adaptors::filtered([] (auto& def) { return !def.is_primary_key(); })) { - def.id = 0; - } -} - -column_definition& schema_builder::find_column(const cql3::column_identifier& c) { - auto i = std::find_if(_raw._columns.begin(), _raw._columns.end(), [c](auto& p) { - return p.name() == c.name(); - }); - if (i != _raw._columns.end()) { - return *i; - } - throw std::invalid_argument(sprint("No such column %s", c.name())); -} - -schema_builder& schema_builder::with_column(const column_definition& c) { - return with_column(bytes(c.name()), data_type(c.type), column_kind(c.kind), c.position()); -} - -schema_builder& schema_builder::with_column(bytes name, data_type type, column_kind kind) { - // component_index will be determined by schema cosntructor - return with_column(name, type, kind, 0); -} - -schema_builder& schema_builder::with_column(bytes name, data_type type, column_kind kind, column_id component_index) { - _raw._columns.emplace_back(name, type, kind, component_index); - if (type->is_multi_cell()) { - with_collection(name, type); - } else if (type->is_counter()) { - set_is_counter(true); - } - return *this; -} - -schema_builder& schema_builder::without_column(bytes name) -{ - auto it = boost::range::find_if(_raw._columns, [&] (auto& column) { - return column.name() == name; - }); - assert(it != _raw._columns.end()); - without_column(it->name_as_text(), it->type, api::new_timestamp()); - _raw._columns.erase(it); - return *this; -} - -schema_builder& schema_builder::without_column(sstring name, api::timestamp_type timestamp) { - return without_column(std::move(name), bytes_type, timestamp); -} - -schema_builder& schema_builder::without_column(sstring name, data_type type, api::timestamp_type timestamp) -{ - auto ret = _raw._dropped_columns.emplace(name, schema::dropped_column{type, timestamp}); - if (!ret.second && ret.first->second.timestamp < timestamp) { - ret.first->second.type = type; - ret.first->second.timestamp = timestamp; - } - return *this; -} - -schema_builder& schema_builder::with_column_rename(bytes from, bytes to) -{ - auto it = std::find_if(_raw._columns.begin(), _raw._columns.end(), [&] (auto& col) { - return col.name() == from; - }); - assert(it != _raw._columns.end()); - auto& def = *it; - column_definition new_def(to, def.type, def.kind, def.component_index()); - _raw._columns.erase(it); - return with_column(new_def); -} - -schema_builder& schema_builder::with_altered_column_type(bytes name, data_type new_type) -{ - auto it = boost::find_if(_raw._columns, [&name] (auto& c) { return c.name() == name; }); - assert(it != _raw._columns.end()); - it->type = new_type; - - if (new_type->is_multi_cell()) { - auto c_it = _raw._collections.find(name); - assert(c_it != _raw._collections.end()); - c_it->second = new_type; - } - return *this; -} - -schema_builder& schema_builder::with_collection(bytes name, data_type type) -{ - _raw._collections.emplace(name, type); - return *this; -} - -schema_builder& schema_builder::with(compact_storage cs) { - _compact_storage = cs; - return *this; -} - -schema_builder& schema_builder::with_version(table_schema_version v) { - _version = v; - return *this; -} - -static const sstring default_partition_key_name = "key"; -static const sstring default_clustering_name = "column"; -static const sstring default_compact_value_name = "value"; - -schema_builder::default_names::default_names(const schema_builder& builder) - : default_names(builder._raw) -{} - -schema_builder::default_names::default_names(const schema::raw_schema& raw) - : _raw(raw) - , _partition_index(0) - , _clustering_index(1) - , _compact_index(0) -{} - -sstring schema_builder::default_names::unique_name(const sstring& base, size_t& idx, size_t off) const { - for (;;) { - auto candidate = idx == 0 ? base : base + std::to_string(idx + off); - ++idx; - auto i = std::find_if(_raw._columns.begin(), _raw._columns.end(), [b = to_bytes(candidate)](const column_definition& c) { - return c.name() == b; - }); - if (i == _raw._columns.end()) { - return candidate; - } - } -} - -sstring schema_builder::default_names::partition_key_name() { - // For compatibility sake, we call the first alias 'key' rather than 'key1'. This - // is inconsistent with column alias, but it's probably not worth risking breaking compatibility now. - return unique_name(default_partition_key_name, _partition_index, 1); -} - -sstring schema_builder::default_names::clustering_name() { - return unique_name(default_clustering_name, _clustering_index, 0); -} - -sstring schema_builder::default_names::compact_value_name() { - return unique_name(default_compact_value_name, _compact_index, 0); -} - -void schema_builder::prepare_dense_schema(schema::raw_schema& raw) { - auto is_dense = raw._is_dense; - auto is_compound = raw._is_compound; - auto is_compact_table = is_dense || !is_compound; - - if (is_compact_table) { - auto count_kind = [&raw](column_kind kind) { - return std::count_if(raw._columns.begin(), raw._columns.end(), [kind](const column_definition& c) { - return c.kind == kind; - }); - }; - - default_names names(raw); - - if (is_dense) { - auto regular_cols = count_kind(column_kind::regular_column); - // In Origin, dense CFs always have at least one regular column - if (regular_cols == 0) { - raw._columns.emplace_back(to_bytes(names.compact_value_name()), - empty_type, - column_kind::regular_column, 0); - } else if (regular_cols > 1) { - throw exceptions::configuration_exception( - sprint( - "Expecting exactly one regular column. Found %d", - regular_cols)); - } - } - } -} - -schema_builder& schema_builder::with_view_info(utils::UUID base_id, sstring base_name, bool include_all_columns, sstring where_clause) { - _view_info = raw_view_info(std::move(base_id), std::move(base_name), include_all_columns, std::move(where_clause)); - return *this; -} - -schema_builder& schema_builder::with_index(const index_metadata& im) { - _raw._indices_by_name.emplace(im.name(), im); - return *this; -} - -schema_builder& schema_builder::without_index(const sstring& name) { - const auto& it = _raw._indices_by_name.find(name); - if (it != _raw._indices_by_name.end()) { - _raw._indices_by_name.erase(name); - } - return *this; -} - -schema_ptr schema_builder::build() { - schema::raw_schema new_raw = _raw; // Copy so that build() remains idempotent. - - if (_version) { - new_raw._version = *_version; - } else { - new_raw._version = utils::UUID_gen::get_time_UUID(); - } - - if (new_raw._is_counter) { - new_raw._default_validation_class = counter_type; - } - - if (_compact_storage) { - // Dense means that no part of the comparator stores a CQL column name. This means - // COMPACT STORAGE with at least one columnAliases (otherwise it's a thrift "static" CF). - auto clustering_key_size = std::count_if(new_raw._columns.begin(), new_raw._columns.end(), [](auto&& col) { - return col.kind == column_kind::clustering_key; - }); - new_raw._is_dense = (*_compact_storage == compact_storage::yes) && (clustering_key_size > 0); - - if (clustering_key_size == 0) { - if (*_compact_storage == compact_storage::yes) { - new_raw._is_compound = false; - } else { - new_raw._is_compound = true; - } - } else { - if ((*_compact_storage == compact_storage::yes) && clustering_key_size == 1) { - new_raw._is_compound = false; - } else { - new_raw._is_compound = true; - } - } - } - - prepare_dense_schema(new_raw); - return make_lw_shared(schema(new_raw, _view_info)); -} - -schema_ptr schema_builder::build(compact_storage cp) { - return with(cp).build(); -} - -// Useful functions to manipulate the schema's comparator field -namespace cell_comparator { - -static constexpr auto _composite_str = "org.apache.cassandra.db.marshal.CompositeType"; -static constexpr auto _collection_str = "org.apache.cassandra.db.marshal.ColumnToCollectionType"; - -static sstring compound_name(const schema& s) { - sstring compound(_composite_str); - - compound += "("; - if (s.clustering_key_size()) { - for (auto &t : s.clustering_key_columns()) { - compound += t.type->name() + ","; - } - } - - if (!s.is_dense()) { - compound += s.regular_column_name_type()->name() + ","; - } - - if (!s.collections().empty()) { - compound += _collection_str; - compound += "("; - for (auto& c : s.collections()) { - auto ct = static_pointer_cast(c.second); - compound += sprint("%s:%s,", to_hex(c.first), ct->name()); - } - compound.back() = ')'; - compound += ","; - } - // last one will be a ',', just replace it. - compound.back() = ')'; - return compound; -} - -sstring to_sstring(const schema& s) { - if (s.is_compound()) { - return compound_name(s); - } else if (s.clustering_key_size() == 1) { - assert(s.is_dense() || s.is_static_compact_table()); - return s.clustering_key_columns().front().type->name(); - } else { - return s.regular_column_name_type()->name(); - } -} - -bool check_compound(sstring comparator) { - static sstring compound(_composite_str); - return comparator.compare(0, compound.size(), compound) == 0; -} - -void read_collections(schema_builder& builder, sstring comparator) -{ - // The format of collection entries in the comparator is: - // org.apache.cassandra.db.marshal.ColumnToCollectionType(:, ...) - - auto find_closing_parenthesis = [] (sstring_view str, size_t start) { - auto pos = start; - auto nest_level = 0; - do { - pos = str.find_first_of("()", pos); - if (pos == sstring::npos) { - throw marshal_exception(); - } - if (str[pos] == ')') { - nest_level--; - } else if (str[pos] == '(') { - nest_level++; - } - pos++; - } while (nest_level > 0); - return pos - 1; - }; - - auto collection_str_length = strlen(_collection_str); - - auto pos = comparator.find(_collection_str); - if (pos == sstring::npos) { - return; - } - pos += collection_str_length + 1; - while (pos < comparator.size()) { - size_t end = comparator.find('(', pos); - if (end == sstring::npos) { - throw marshal_exception(); - } - end = find_closing_parenthesis(comparator, end) + 1; - - auto colon = comparator.find(':', pos); - if (colon == sstring::npos || colon > end) { - throw marshal_exception(); - } - - auto name = from_hex(sstring_view(comparator.c_str() + pos, colon - pos)); - - colon++; - auto type_str = sstring_view(comparator.c_str() + colon, end - colon); - auto type = db::marshal::type_parser::parse(type_str); - - builder.with_collection(name, type); - - if (end < comparator.size() && comparator[end] == ',') { - pos = end + 1; - } else if (end < comparator.size() && comparator[end] == ')') { - pos = sstring::npos; - } else { - throw marshal_exception(); - } - } -} - -} - -schema::const_iterator -schema::regular_begin() const { - return regular_columns().begin(); -} - -schema::const_iterator -schema::regular_end() const { - return regular_columns().end(); -} - -struct column_less_comparator { - bool operator()(const column_definition& def, const bytes& name) { - return def.name() < name; - } - bool operator()(const bytes& name, const column_definition& def) { - return name < def.name(); - } -}; - -schema::const_iterator -schema::regular_lower_bound(const bytes& name) const { - return boost::lower_bound(regular_columns(), name, column_less_comparator()); -} - -schema::const_iterator -schema::regular_upper_bound(const bytes& name) const { - return boost::upper_bound(regular_columns(), name, column_less_comparator()); -} - -schema::const_iterator -schema::static_begin() const { - return static_columns().begin(); -} - -schema::const_iterator -schema::static_end() const { - return static_columns().end(); -} - -schema::const_iterator -schema::static_lower_bound(const bytes& name) const { - return boost::lower_bound(static_columns(), name, column_less_comparator()); -} - -schema::const_iterator -schema::static_upper_bound(const bytes& name) const { - return boost::upper_bound(static_columns(), name, column_less_comparator()); -} -data_type -schema::column_name_type(const column_definition& def) const { - if (def.kind == column_kind::regular_column) { - return _raw._regular_column_name_type; - } - return utf8_type; -} - -const column_definition& -schema::regular_column_at(column_id id) const { - if (id > regular_columns_count()) { - throw std::out_of_range("column_id"); - } - return _raw._columns.at(column_offset(column_kind::regular_column) + id); -} - -const column_definition& -schema::clustering_column_at(column_id id) const { - if (id >= clustering_key_size()) { - throw std::out_of_range(sprint("clustering column id %d >= %d", id, clustering_key_size())); - } - return _raw._columns.at(column_offset(column_kind::clustering_key) + id); -} - -const column_definition& -schema::static_column_at(column_id id) const { - if (id > static_columns_count()) { - throw std::out_of_range("column_id"); - } - return _raw._columns.at(column_offset(column_kind::static_column) + id); -} - -bool -schema::is_last_partition_key(const column_definition& def) const { - return &_raw._columns.at(partition_key_size() - 1) == &def; -} - -bool -schema::has_static_columns() const { - return !static_columns().empty(); -} - -column_count_type -schema::partition_key_size() const { - return column_offset(column_kind::clustering_key); -} - -column_count_type -schema::clustering_key_size() const { - return column_offset(column_kind::static_column) - column_offset(column_kind::clustering_key); -} - -column_count_type -schema::static_columns_count() const { - return column_offset(column_kind::regular_column) - column_offset(column_kind::static_column); -} - -column_count_type -schema::regular_columns_count() const { - return _raw._columns.size() - column_offset(column_kind::regular_column); -} - -schema::const_iterator_range_type -schema::partition_key_columns() const { - return boost::make_iterator_range(_raw._columns.begin() + column_offset(column_kind::partition_key) - , _raw._columns.begin() + column_offset(column_kind::clustering_key)); -} - -schema::const_iterator_range_type -schema::clustering_key_columns() const { - return boost::make_iterator_range(_raw._columns.begin() + column_offset(column_kind::clustering_key) - , _raw._columns.begin() + column_offset(column_kind::static_column)); -} - -schema::const_iterator_range_type -schema::static_columns() const { - return boost::make_iterator_range(_raw._columns.begin() + column_offset(column_kind::static_column) - , _raw._columns.begin() + column_offset(column_kind::regular_column)); -} - -schema::const_iterator_range_type -schema::regular_columns() const { - return boost::make_iterator_range(_raw._columns.begin() + column_offset(column_kind::regular_column) - , _raw._columns.end()); -} - -schema::select_order_range schema::all_columns_in_select_order() const { - auto is_static_compact_table = this->is_static_compact_table(); - auto no_non_pk_columns = is_compact_table() - // Origin: && CompactTables.hasEmptyCompactValue(this); - && regular_columns_count() == 1 - && [](const column_definition& c) { - // We use empty_type now to match origin, but earlier incarnations - // set name empty instead. check either. - return c.type == empty_type || c.name().empty(); - }(regular_column_at(0)); - auto pk_range = const_iterator_range_type(_raw._columns.begin(), - _raw._columns.begin() + (is_static_compact_table ? - column_offset(column_kind::clustering_key) : - column_offset(column_kind::static_column))); - auto ck_v_range = no_non_pk_columns ? static_columns() - : const_iterator_range_type(static_columns().begin(), all_columns().end()); - return boost::range::join(pk_range, ck_v_range); -} - -uint32_t -schema::position(const column_definition& column) const { - if (column.is_primary_key()) { - return column.id; - } - return clustering_key_size(); -} - -stdx::optional schema::find_index_noname(const index_metadata& target) const { - const auto& it = boost::find_if(_raw._indices_by_name, [&] (auto&& e) { - return e.second.equals_noname(target); - }); - if (it != _raw._indices_by_name.end()) { - return it->second; - } - return {}; -} - -std::vector schema::indices() const { - return boost::copy_range>(_raw._indices_by_name | boost::adaptors::map_values); -} - -const std::unordered_map& schema::all_indices() const { - return _raw._indices_by_name; -} - -bool schema::has_index(const sstring& index_name) const { - return _raw._indices_by_name.count(index_name) > 0; -} - -std::vector schema::index_names() const { - return boost::copy_range>(_raw._indices_by_name | boost::adaptors::map_keys); -} - -data_type schema::make_legacy_default_validator() const { - return _raw._default_validation_class; -} - -bool schema::is_synced() const { - return _registry_entry && _registry_entry->is_synced(); -} - -bool schema::equal_columns(const schema& other) const { - return boost::equal(all_columns(), other.all_columns()); -} - -raw_view_info::raw_view_info(utils::UUID base_id, sstring base_name, bool include_all_columns, sstring where_clause) - : _base_id(std::move(base_id)) - , _base_name(std::move(base_name)) - , _include_all_columns(include_all_columns) - , _where_clause(where_clause) -{ } - -bool operator==(const raw_view_info& x, const raw_view_info& y) { - return x._base_id == y._base_id - && x._base_name == y._base_name - && x._include_all_columns == y._include_all_columns - && x._where_clause == y._where_clause; -} - -std::ostream& operator<<(std::ostream& os, const raw_view_info& view) { - os << "ViewInfo{"; - os << "baseTableId=" << view._base_id; - os << ", baseTableName=" << view._base_name; - os << ", includeAllColumns=" << view._include_all_columns; - os << ", whereClause=" << view._where_clause; - os << "}"; - return os; -} - -std::ostream& operator<<(std::ostream& os, const view_ptr& view) { - return view ? os << *view : os << "null"; -} diff --git a/scylla/schema.hh b/scylla/schema.hh deleted file mode 100644 index e639298..0000000 --- a/scylla/schema.hh +++ /dev/null @@ -1,777 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include -#include -#include -#include -#include - -#include "cql3/column_specification.hh" -#include "core/shared_ptr.hh" -#include "types.hh" -#include "compound.hh" -#include "gc_clock.hh" -#include "unimplemented.hh" -#include "utils/UUID.hh" -#include "compress.hh" -#include "compaction_strategy.hh" -#include "caching_options.hh" -#include "stdx.hh" - -using column_count_type = uint32_t; - -// Column ID, unique within column_kind -using column_id = column_count_type; - -// Cluster-wide identifier of schema version of particular table. -// -// The version changes the value not only on structural changes but also -// temporal. For example, schemas with the same set of columns but created at -// different times should have different versions. This allows nodes to detect -// if the version they see was already synchronized with or not even if it has -// the same structure as the past versions. -// -// Schema changes merged in any order should result in the same final version. -// -// When table_schema_version changes, schema_tables::calculate_schema_digest() should -// also change when schema mutations are applied. -using table_schema_version = utils::UUID; - -class schema; -class schema_registry_entry; -class schema_builder; - -// Useful functions to manipulate the schema's comparator field -namespace cell_comparator { -sstring to_sstring(const schema& s); -bool check_compound(sstring comparator); -void read_collections(schema_builder& builder, sstring comparator); -} - -// make sure these match the order we like columns back from schema -enum class column_kind { partition_key, clustering_key, static_column, regular_column }; - -sstring to_sstring(column_kind k); -bool is_compatible(column_kind k1, column_kind k2); - -enum class cf_type : uint8_t { - standard, - super, -}; - -inline sstring cf_type_to_sstring(cf_type t) { - if (t == cf_type::standard) { - return "Standard"; - } else if (t == cf_type::super) { - return "Super"; - } - throw std::invalid_argument(sprint("unknown type: %d\n", uint8_t(t))); -} - -inline cf_type sstring_to_cf_type(sstring name) { - if (name == "Standard") { - return cf_type::standard; - } else if (name == "Super") { - return cf_type::super; - } - throw std::invalid_argument(sprint("unknown type: %s\n", name)); -} - -struct speculative_retry { - enum class type { - NONE, CUSTOM, PERCENTILE, ALWAYS - }; -private: - type _t; - double _v; -public: - speculative_retry(type t, double v) : _t(t), _v(v) {} - - sstring to_sstring() const { - if (_t == type::NONE) { - return "NONE"; - } else if (_t == type::ALWAYS) { - return "ALWAYS"; - } else if (_t == type::CUSTOM) { - return sprint("%.2fms", _v); - } else if (_t == type::PERCENTILE) { - return sprint("%.1fPERCENTILE", 100 * _v); - } else { - throw std::invalid_argument(sprint("unknown type: %d\n", uint8_t(_t))); - } - } - static speculative_retry from_sstring(sstring str) { - std::transform(str.begin(), str.end(), str.begin(), ::toupper); - - sstring ms("MS"); - sstring percentile("PERCENTILE"); - - auto convert = [&str] (sstring& t) { - try { - return boost::lexical_cast(str.substr(0, str.size() - t.size())); - } catch (boost::bad_lexical_cast& e) { - throw std::invalid_argument(sprint("cannot convert %s to speculative_retry\n", str)); - } - }; - - type t; - double v = 0; - if (str == "NONE") { - t = type::NONE; - } else if (str == "ALWAYS") { - t = type::ALWAYS; - } else if (str.compare(str.size() - ms.size(), ms.size(), ms) == 0) { - t = type::CUSTOM; - v = convert(ms); - } else if (str.compare(str.size() - percentile.size(), percentile.size(), percentile) == 0) { - t = type::PERCENTILE; - v = convert(percentile) / 100; - } else { - throw std::invalid_argument(sprint("cannot convert %s to speculative_retry\n", str)); - } - return speculative_retry(t, v); - } - type get_type() const { - return _t; - } - double get_value() const { - return _v; - } - bool operator==(const speculative_retry& other) const { - return _t == other._t && _v == other._v; - } - bool operator!=(const speculative_retry& other) const { - return !(*this == other); - } -}; - -typedef std::unordered_map index_options_map; - -enum class index_metadata_kind { - keys, - custom, - composites, -}; - -class index_metadata final { - utils::UUID _id; - sstring _name; - index_metadata_kind _kind; - index_options_map _options; -public: - index_metadata(const sstring& name, const index_options_map& options, index_metadata_kind kind); - bool operator==(const index_metadata& other) const; - bool equals_noname(const index_metadata& other) const; - const utils::UUID& id() const; - const sstring& name() const; - const index_metadata_kind kind() const; - const index_options_map& options() const; - static sstring get_default_index_name(const sstring& cf_name, std::experimental::optional root); -}; - -class column_definition final { -public: - struct name_comparator { - data_type type; - name_comparator(data_type type) : type(type) {} - bool operator()(const column_definition& cd1, const column_definition& cd2) const { - return type->less(cd1.name(), cd2.name()); - } - }; -private: - bytes _name; - api::timestamp_type _dropped_at; - bool _is_atomic; - bool _is_counter; - - struct thrift_bits { - thrift_bits() - : is_on_all_components(0) - {} - uint8_t is_on_all_components : 1; - // more...? - }; - - thrift_bits _thrift_bits; - friend class schema; -public: - column_definition(bytes name, data_type type, column_kind kind, - column_id component_index = 0, - api::timestamp_type dropped_at = api::missing_timestamp); - - data_type type; - - // Unique within (kind, schema instance). - // schema::position() and component_index() depend on the fact that for PK columns this is - // equivalent to component index. - column_id id; - - column_kind kind; - ::shared_ptr column_specification; - - bool is_static() const { return kind == column_kind::static_column; } - bool is_regular() const { return kind == column_kind::regular_column; } - bool is_partition_key() const { return kind == column_kind::partition_key; } - bool is_clustering_key() const { return kind == column_kind::clustering_key; } - bool is_primary_key() const { return kind == column_kind::partition_key || kind == column_kind::clustering_key; } - bool is_atomic() const { return _is_atomic; } - bool is_multi_cell() const { return !_is_atomic; } - bool is_counter() const { return _is_counter; } - const sstring& name_as_text() const; - const bytes& name() const; - sstring name_as_cql_string() const; - friend std::ostream& operator<<(std::ostream& os, const column_definition& cd); - friend std::ostream& operator<<(std::ostream& os, const column_definition* cd) { - return cd != nullptr ? os << *cd : os << "(null)"; - } - bool has_component_index() const { - return is_primary_key(); - } - uint32_t component_index() const { - assert(has_component_index()); - return id; - } - uint32_t position() const { - if (has_component_index()) { - return component_index(); - } - return 0; - } - bool is_on_all_components() const; - bool is_part_of_cell_name() const { - return is_regular() || is_static(); - } - api::timestamp_type dropped_at() const { return _dropped_at; } - friend bool operator==(const column_definition&, const column_definition&); -}; - -class schema_builder; - -/* - * Sub-schema for thrift aspects. Should be kept isolated (and starved) - */ -class thrift_schema { - bool _compound = true; - bool _is_dynamic = false; -public: - bool has_compound_comparator() const; - bool is_dynamic() const; - friend class schema; -}; - -bool operator==(const column_definition&, const column_definition&); - -static constexpr int DEFAULT_MIN_COMPACTION_THRESHOLD = 4; -static constexpr int DEFAULT_MAX_COMPACTION_THRESHOLD = 32; -static constexpr int DEFAULT_MIN_INDEX_INTERVAL = 128; -static constexpr int DEFAULT_GC_GRACE_SECONDS = 864000; - -// Unsafe to access across shards. -// Safe to copy across shards. -class column_mapping_entry { - bytes _name; - data_type _type; -public: - column_mapping_entry(bytes name, data_type type) - : _name(std::move(name)), _type(std::move(type)) { } - column_mapping_entry(bytes name, sstring type_name); - column_mapping_entry(const column_mapping_entry&); - column_mapping_entry& operator=(const column_mapping_entry&); - column_mapping_entry(column_mapping_entry&&) = default; - column_mapping_entry& operator=(column_mapping_entry&&) = default; - const bytes& name() const { return _name; } - const data_type& type() const { return _type; } - const sstring& type_name() const { return _type->name(); } -}; - -// Encapsulates information needed for converting mutations between different schema versions. -// -// Unsafe to access across shards. -// Safe to copy across shards. -class column_mapping { -private: - // Contains _n_static definitions for static columns followed by definitions for regular columns, - // both ordered by consecutive column_ids. - // Primary key column sets are not mutable so we don't need to map them. - std::vector _columns; - column_count_type _n_static = 0; -public: - column_mapping() {} - column_mapping(std::vector columns, column_count_type n_static) - : _columns(std::move(columns)) - , _n_static(n_static) - { } - const std::vector& columns() const { return _columns; } - column_count_type n_static() const { return _n_static; } - const column_mapping_entry& column_at(column_kind kind, column_id id) const { - assert(kind == column_kind::regular_column || kind == column_kind::static_column); - return kind == column_kind::regular_column ? regular_column_at(id) : static_column_at(id); - } - const column_mapping_entry& static_column_at(column_id id) const { - if (id >= _n_static) { - throw std::out_of_range(sprint("static column id %d >= %d", id, _n_static)); - } - return _columns[id]; - } - const column_mapping_entry& regular_column_at(column_id id) const { - auto n_regular = _columns.size() - _n_static; - if (id >= n_regular) { - throw std::out_of_range(sprint("regular column id %d >= %d", id, n_regular)); - } - return _columns[id + _n_static]; - } - friend std::ostream& operator<<(std::ostream& out, const column_mapping& cm); -}; - -/** - * Augments a schema with fields related to materialized views. - * Effectively immutable. - */ -class raw_view_info final { - utils::UUID _base_id; - sstring _base_name; - bool _include_all_columns; - sstring _where_clause; -public: - raw_view_info(utils::UUID base_id, sstring base_name, bool include_all_columns, sstring where_clause); - - const utils::UUID& base_id() const { - return _base_id; - } - - const sstring& base_name() const { - return _base_name; - } - - bool include_all_columns() const { - return _include_all_columns; - } - - const sstring& where_clause() const { - return _where_clause; - } - - friend bool operator==(const raw_view_info&, const raw_view_info&); - friend std::ostream& operator<<(std::ostream& os, const raw_view_info& view); -}; - -bool operator==(const raw_view_info&, const raw_view_info&); -std::ostream& operator<<(std::ostream& os, const raw_view_info& view); - -class view_info; - -// Represents a column set which is compactible with Cassandra 3.x. -// -// This layout differs from the layout Scylla uses in schema/schema_builder for static compact tables. -// For such tables, Scylla expects all columns to be of regular type and no clustering columns, -// whereas in v3 those columns are static and there is a clustering column with type matching the -// cell name comparator and a regular column with type matching the default validator. -// See issues #2555 and #1474. -class v3_columns { - bool _is_dense = false; - bool _is_compound = false; - std::vector _columns; - std::unordered_map _columns_by_name; -public: - v3_columns(std::vector columns, bool is_dense, bool is_compound); - v3_columns() = default; - v3_columns(v3_columns&&) = default; - v3_columns& operator=(v3_columns&&) = default; - v3_columns(const v3_columns&) = delete; - static v3_columns from_v2_schema(const schema&); -public: - const std::vector& all_columns() const; - const std::unordered_map& columns_by_name() const; - bool is_static_compact() const; - bool is_compact() const; - void apply_to(schema_builder&) const; -}; - -/* - * Effectively immutable. - * Not safe to access across cores because of shared_ptr's. - * Use global_schema_ptr for safe across-shard access. - */ -class schema final : public enable_lw_shared_from_this { - friend class v3_columns; -public: - struct dropped_column { - data_type type; - api::timestamp_type timestamp; - bool operator==(const dropped_column& rhs) const { - return type == rhs.type && timestamp == rhs.timestamp; - } - }; -private: - // More complex fields are derived from these inside rebuild(). - // Contains only fields which can be safely default-copied. - struct raw_schema { - raw_schema(utils::UUID id); - utils::UUID _id; - sstring _ks_name; - sstring _cf_name; - // regular columns are sorted by name - // static columns are sorted by name, but present only when there's any clustering column - std::vector _columns; - sstring _comment; - gc_clock::duration _default_time_to_live = gc_clock::duration::zero(); - data_type _regular_column_name_type; - data_type _default_validation_class = bytes_type; - double _bloom_filter_fp_chance = 0.01; - compression_parameters _compressor_params; - bool _is_dense = false; - bool _is_compound = true; - bool _is_counter = false; - cf_type _type = cf_type::standard; - int32_t _gc_grace_seconds = DEFAULT_GC_GRACE_SECONDS; - double _dc_local_read_repair_chance = 0.1; - double _read_repair_chance = 0.0; - double _crc_check_chance = 1; - int32_t _min_compaction_threshold = DEFAULT_MIN_COMPACTION_THRESHOLD; - int32_t _max_compaction_threshold = DEFAULT_MAX_COMPACTION_THRESHOLD; - int32_t _min_index_interval = DEFAULT_MIN_INDEX_INTERVAL; - int32_t _max_index_interval = 2048; - int32_t _memtable_flush_period = 0; - speculative_retry _speculative_retry = ::speculative_retry(speculative_retry::type::PERCENTILE, 0.99); - // FIXME: SizeTiered doesn't really work yet. Being it marked here only means that this is the strategy - // we will use by default - when we have the choice. - sstables::compaction_strategy_type _compaction_strategy = sstables::compaction_strategy_type::size_tiered; - std::map _compaction_strategy_options; - bool _compaction_enabled = true; - caching_options _caching_options; - table_schema_version _version; - std::unordered_map _dropped_columns; - std::map _collections; - std::unordered_map _indices_by_name; - }; - raw_schema _raw; - thrift_schema _thrift; - v3_columns _v3_columns; - mutable schema_registry_entry* _registry_entry = nullptr; - std::unique_ptr<::view_info> _view_info; - - const std::array _offsets; - - inline column_count_type column_offset(column_kind k) const { - return k == column_kind::partition_key ? 0 : _offsets[column_count_type(k) - 1]; - } - - std::unordered_map _columns_by_name; - std::map _regular_columns_by_name; - lw_shared_ptr> _partition_key_type; - lw_shared_ptr> _clustering_key_type; - column_mapping _column_mapping; - friend class schema_builder; -public: - using row_column_ids_are_ordered_by_name = std::true_type; - - typedef std::vector columns_type; - typedef typename columns_type::iterator iterator; - typedef typename columns_type::const_iterator const_iterator; - typedef boost::iterator_range iterator_range_type; - typedef boost::iterator_range const_iterator_range_type; - - static constexpr int32_t NAME_LENGTH = 48; - - - struct column { - bytes name; - data_type type; - }; -private: - ::shared_ptr make_column_specification(const column_definition& def); - void rebuild(); - schema(const raw_schema&, stdx::optional); -public: - // deprecated, use schema_builder. - schema(std::experimental::optional id, - sstring ks_name, - sstring cf_name, - std::vector partition_key, - std::vector clustering_key, - std::vector regular_columns, - std::vector static_columns, - data_type regular_column_name_type, - sstring comment = {}); - schema(const schema&); - ~schema(); - table_schema_version version() const { - return _raw._version; - } - double bloom_filter_fp_chance() const { - return _raw._bloom_filter_fp_chance; - } - sstring thrift_key_validator() const; - const compression_parameters& get_compressor_params() const { - return _raw._compressor_params; - } - bool is_dense() const { - return _raw._is_dense; - } - - bool is_compound() const { - return _raw._is_compound; - } - - bool is_cql3_table() const { - return !is_super() && !is_dense() && is_compound(); - } - bool is_compact_table() const { - return !is_cql3_table(); - } - bool is_static_compact_table() const { - return !is_super() && !is_dense() && !is_compound(); - } - - thrift_schema& thrift() { - return _thrift; - } - const thrift_schema& thrift() const { - return _thrift; - } - const utils::UUID& id() const { - return _raw._id; - } - const sstring& comment() const { - return _raw._comment; - } - bool is_counter() const { - return _raw._is_counter; - } - - const cf_type type() const { - return _raw._type; - } - - bool is_super() const { - return _raw._type == cf_type::super; - } - - gc_clock::duration gc_grace_seconds() const { - auto seconds = std::chrono::seconds(_raw._gc_grace_seconds); - return std::chrono::duration_cast(seconds); - } - - double dc_local_read_repair_chance() const { - return _raw._dc_local_read_repair_chance; - } - - double read_repair_chance() const { - return _raw._read_repair_chance; - } - double crc_check_chance() const { - return _raw._crc_check_chance; - } - - int32_t min_compaction_threshold() const { - return _raw._min_compaction_threshold; - } - - int32_t max_compaction_threshold() const { - return _raw._max_compaction_threshold; - } - - int32_t min_index_interval() const { - return _raw._min_index_interval; - } - - int32_t max_index_interval() const { - return _raw._max_index_interval; - } - - int32_t memtable_flush_period() const { - return _raw._memtable_flush_period; - } - - sstables::compaction_strategy_type configured_compaction_strategy() const { - return _raw._compaction_strategy; - } - - sstables::compaction_strategy_type compaction_strategy() const { - return _raw._compaction_enabled ? _raw._compaction_strategy : sstables::compaction_strategy_type::null; - } - - const std::map& compaction_strategy_options() const { - return _raw._compaction_strategy_options; - } - - bool compaction_enabled() const { - return _raw._compaction_enabled; - } - - const ::speculative_retry& speculative_retry() const { - return _raw._speculative_retry; - } - - const ::caching_options& caching_options() const { - return _raw._caching_options; - } - - const column_definition* get_column_definition(const bytes& name) const; - const column_definition& column_at(column_kind, column_id) const; - const_iterator regular_begin() const; - const_iterator regular_end() const; - const_iterator regular_lower_bound(const bytes& name) const; - const_iterator regular_upper_bound(const bytes& name) const; - const_iterator static_begin() const; - const_iterator static_end() const; - const_iterator static_lower_bound(const bytes& name) const; - const_iterator static_upper_bound(const bytes& name) const; - data_type column_name_type(const column_definition& def) const; - const column_definition& clustering_column_at(column_id id) const; - const column_definition& regular_column_at(column_id id) const; - const column_definition& static_column_at(column_id id) const; - bool is_last_partition_key(const column_definition& def) const; - bool has_multi_cell_collections() const; - bool has_static_columns() const; - column_count_type partition_key_size() const; - column_count_type clustering_key_size() const; - column_count_type static_columns_count() const; - column_count_type regular_columns_count() const; - // Returns a range of column definitions - const_iterator_range_type partition_key_columns() const; - // Returns a range of column definitions - const_iterator_range_type clustering_key_columns() const; - // Returns a range of column definitions - const_iterator_range_type static_columns() const; - // Returns a range of column definitions - const_iterator_range_type regular_columns() const; - // Returns a range of column definitions - - typedef boost::range::joined_range - select_order_range; - - select_order_range all_columns_in_select_order() const; - uint32_t position(const column_definition& column) const; - - const columns_type& all_columns() const { - return _raw._columns; - } - - const std::unordered_map& columns_by_name() const { - return _columns_by_name; - } - - const auto& dropped_columns() const { - return _raw._dropped_columns; - } - - const auto& collections() const { - return _raw._collections; - } - - gc_clock::duration default_time_to_live() const { - return _raw._default_time_to_live; - } - - data_type make_legacy_default_validator() const; - - const sstring& ks_name() const { - return _raw._ks_name; - } - const sstring& cf_name() const { - return _raw._cf_name; - } - const lw_shared_ptr>& partition_key_type() const { - return _partition_key_type; - } - const lw_shared_ptr>& clustering_key_type() const { - return _clustering_key_type; - } - const lw_shared_ptr>& clustering_key_prefix_type() const { - return _clustering_key_type; - } - const data_type& regular_column_name_type() const { - return _raw._regular_column_name_type; - } - const data_type& static_column_name_type() const { - return utf8_type; - } - const std::unique_ptr<::view_info>& view_info() const { - return _view_info; - } - bool is_view() const { - return bool(_view_info); - } - // Returns all index names of this schema. - std::vector index_names() const; - // Returns all indices of this schema. - std::vector indices() const; - const std::unordered_map& all_indices() const; - // Search for an index with a given name. - bool has_index(const sstring& index_name) const; - // Search for an existing index with same kind and options. - stdx::optional find_index_noname(const index_metadata& target) const; - friend std::ostream& operator<<(std::ostream& os, const schema& s); - friend bool operator==(const schema&, const schema&); - const column_mapping& get_column_mapping() const; - friend class schema_registry_entry; - // May be called from different shard - schema_registry_entry* registry_entry() const noexcept; - // Returns true iff this schema version was synced with on current node. - // Schema version is said to be synced with when its mutations were merged - // into current node's schema, so that current node's schema is at least as - // recent as this version. - bool is_synced() const; - bool equal_columns(const schema&) const; -public: - const v3_columns& v3() const { - return _v3_columns; - } -}; - -bool operator==(const schema&, const schema&); - -using schema_ptr = lw_shared_ptr; - -/** - * Wraper for schema_ptr used by functions that except an engaged view_info field. - */ -class view_ptr final { - schema_ptr _schema; -public: - explicit view_ptr(schema_ptr schema) noexcept : _schema(schema) { - if (schema) { - assert(_schema->is_view()); - } - } - - const schema& operator*() const noexcept { return *_schema; } - const schema* operator->() const noexcept { return _schema.operator->(); } - const schema* get() const noexcept { return _schema.get(); } - - operator schema_ptr() const noexcept { - return _schema; - } - - explicit operator bool() const noexcept { - return bool(_schema); - } - - friend std::ostream& operator<<(std::ostream& os, const view_ptr& s); -}; - -std::ostream& operator<<(std::ostream& os, const view_ptr& view); - -utils::UUID generate_legacy_id(const sstring& ks_name, const sstring& cf_name); diff --git a/scylla/schema_builder.hh b/scylla/schema_builder.hh deleted file mode 100644 index 832f194..0000000 --- a/scylla/schema_builder.hh +++ /dev/null @@ -1,273 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "schema.hh" -#include "database_fwd.hh" - -struct schema_builder { -public: - enum class compact_storage { no, yes }; -private: - schema::raw_schema _raw; - std::experimental::optional _compact_storage; - std::experimental::optional _version; - std::experimental::optional _view_info; - schema_builder(const schema::raw_schema&); -public: - schema_builder(const sstring& ks_name, const sstring& cf_name, - std::experimental::optional = { }, - data_type regular_column_name_type = utf8_type); - schema_builder(const schema_ptr); - - schema_builder& set_uuid(const utils::UUID& id) { - _raw._id = id; - return *this; - } - const utils::UUID& uuid() const { - return _raw._id; - } - schema_builder& set_regular_column_name_type(const data_type& t) { - _raw._regular_column_name_type = t; - return *this; - } - schema_builder& set_default_validation_class(const data_type& t) { - _raw._default_validation_class = t; - return *this; - } - const data_type& regular_column_name_type() const { - return _raw._regular_column_name_type; - } - const sstring& ks_name() const { - return _raw._ks_name; - } - const sstring& cf_name() const { - return _raw._cf_name; - } - schema_builder& set_comment(const sstring& s) { - _raw._comment = s; - return *this; - } - const sstring& comment() const { - return _raw._comment; - } - schema_builder& set_default_time_to_live(gc_clock::duration t) { - _raw._default_time_to_live = t; - return *this; - } - gc_clock::duration default_time_to_live() const { - return _raw._default_time_to_live; - } - - schema_builder& set_gc_grace_seconds(int32_t gc_grace_seconds) { - _raw._gc_grace_seconds = gc_grace_seconds; - return *this; - } - - int32_t get_gc_grace_seconds() const { - return _raw._gc_grace_seconds; - } - - schema_builder& set_dc_local_read_repair_chance(double chance) { - _raw._dc_local_read_repair_chance = chance; - return *this; - } - - double get_dc_local_read_repair_chance() const { - return _raw._dc_local_read_repair_chance; - } - - schema_builder& set_read_repair_chance(double chance) { - _raw._read_repair_chance = chance; - return *this; - } - - double get_read_repair_chance() const { - return _raw._read_repair_chance; - } - - schema_builder& set_crc_check_chance(double chance) { - _raw._crc_check_chance = chance; - return *this; - } - - double get_crc_check_chance() const { - return _raw._crc_check_chance; - } - - schema_builder& set_min_compaction_threshold(int32_t t) { - _raw._min_compaction_threshold = t; - return *this; - } - - int32_t get_min_compaction_threshold() const { - return _raw._min_compaction_threshold; - } - - schema_builder& set_max_compaction_threshold(int32_t t) { - _raw._max_compaction_threshold = t; - return *this; - } - - int32_t get_max_compaction_threshold() const { - return _raw._max_compaction_threshold; - } - - schema_builder& set_compaction_enabled(bool enabled) { - _raw._compaction_enabled = enabled; - return *this; - } - - bool compaction_enabled() const { - return _raw._compaction_enabled; - } - - schema_builder& set_min_index_interval(int32_t t) { - _raw._min_index_interval = t; - return *this; - } - - int32_t get_min_index_interval() const { - return _raw._min_index_interval; - } - - schema_builder& set_max_index_interval(int32_t t) { - _raw._max_index_interval = t; - return *this; - } - - int32_t get_max_index_interval() const { - return _raw._max_index_interval; - } - - schema_builder& set_memtable_flush_period(int32_t t) { - _raw._memtable_flush_period = t; - return *this; - } - - int32_t get_memtable_flush_period() const { - return _raw._memtable_flush_period; - } - - schema_builder& set_speculative_retry(sstring retry_sstring) { - _raw._speculative_retry = speculative_retry::from_sstring(retry_sstring); - return *this; - } - - const speculative_retry& get_speculative_retry() const { - return _raw._speculative_retry; - } - - schema_builder& set_bloom_filter_fp_chance(double fp) { - _raw._bloom_filter_fp_chance = fp; - return *this; - } - double get_bloom_filter_fp_chance() const { - return _raw._bloom_filter_fp_chance; - } - schema_builder& set_compressor_params(const compression_parameters& cp) { - _raw._compressor_params = cp; - return *this; - } - - schema_builder& set_compaction_strategy(sstables::compaction_strategy_type type) { - _raw._compaction_strategy = type; - return *this; - } - - schema_builder& set_compaction_strategy_options(std::map options) { - _raw._compaction_strategy_options = std::move(options); - return *this; - } - - schema_builder& set_caching_options(caching_options c) { - _raw._caching_options = std::move(c); - return *this; - } - - schema_builder& set_is_dense(bool is_dense) { - _raw._is_dense = is_dense; - return *this; - } - - schema_builder& set_is_compound(bool is_compound) { - _raw._is_compound = is_compound; - return *this; - } - - schema_builder& set_is_counter(bool is_counter) { - _raw._is_counter = is_counter; - return *this; - } - - class default_names { - public: - default_names(const schema_builder&); - default_names(const schema::raw_schema&); - - sstring partition_key_name(); - sstring clustering_name(); - sstring compact_value_name(); - private: - sstring unique_name(const sstring&, size_t&, size_t) const; - const schema::raw_schema& _raw; - size_t _partition_index, _clustering_index, _compact_index; - }; - - column_definition& find_column(const cql3::column_identifier&); - schema_builder& with_column(const column_definition& c); - schema_builder& with_column(bytes name, data_type type, column_kind kind = column_kind::regular_column); - schema_builder& with_column(bytes name, data_type type, column_kind kind, column_id component_index); - schema_builder& without_column(bytes name); - schema_builder& without_column(sstring name, api::timestamp_type timestamp); - schema_builder& without_column(sstring name, data_type, api::timestamp_type timestamp); - schema_builder& with_column_rename(bytes from, bytes to); - schema_builder& with_altered_column_type(bytes name, data_type new_type); - - // Adds information about collection that existed in the past but the column - // has since been removed. For adding colllections that are still alive - // use with_column(). - schema_builder& with_collection(bytes name, data_type type); - - schema_builder& with(compact_storage); - schema_builder& with_version(table_schema_version); - - schema_builder& with_view_info(utils::UUID base_id, sstring base_name, bool include_all_columns, sstring where_clause); - schema_builder& with_view_info(const schema& base_schema, bool include_all_columns, sstring where_clause) { - return with_view_info(base_schema.id(), base_schema.cf_name(), include_all_columns, where_clause); - } - - schema_builder& with_index(const index_metadata& im); - schema_builder& without_index(const sstring& name); - - default_names get_default_names() const { - return default_names(_raw); - } - - // Equivalent to with(cp).build() - schema_ptr build(compact_storage cp); - - schema_ptr build(); -private: - friend class default_names; - void prepare_dense_schema(schema::raw_schema& raw); -}; diff --git a/scylla/schema_mutations.cc b/scylla/schema_mutations.cc deleted file mode 100644 index cda8466..0000000 --- a/scylla/schema_mutations.cc +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Copyright 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "schema_mutations.hh" -#include "canonical_mutation.hh" -#include "db/schema_tables.hh" -#include "md5_hasher.hh" - -schema_mutations::schema_mutations(canonical_mutation columnfamilies, - canonical_mutation columns, - bool is_view, - stdx::optional indices, - stdx::optional dropped_columns, - stdx::optional scylla_tables) - : _columnfamilies(columnfamilies.to_mutation(is_view ? db::schema_tables::views() : db::schema_tables::tables())) - , _columns(columns.to_mutation(db::schema_tables::columns())) - , _indices(indices ? stdx::optional{indices.value().to_mutation(db::schema_tables::indexes())} : stdx::nullopt) - , _dropped_columns(dropped_columns ? stdx::optional{dropped_columns.value().to_mutation(db::schema_tables::dropped_columns())} : stdx::nullopt) - , _scylla_tables(scylla_tables ? stdx::optional{scylla_tables.value().to_mutation(db::schema_tables::scylla_tables())} : stdx::nullopt) -{} - -void schema_mutations::copy_to(std::vector& dst) const { - dst.push_back(_columnfamilies); - dst.push_back(_columns); - if (_indices) { - dst.push_back(_indices.value()); - } - if (_dropped_columns) { - dst.push_back(_dropped_columns.value()); - } - if (_scylla_tables) { - dst.push_back(_scylla_tables.value()); - } -} - -table_schema_version schema_mutations::digest() const { - if (_scylla_tables) { - auto rs = query::result_set(*_scylla_tables); - if (!rs.empty()) { - auto&& row = rs.row(0); - if (row.has("version")) { - auto val = row.get("version"); - if (val) { - return *val; - } - } - } - } - - md5_hasher h; - db::schema_tables::feed_hash_for_schema_digest(h, _columnfamilies); - db::schema_tables::feed_hash_for_schema_digest(h, _columns); - if (_indices && !_indices.value().partition().empty()) { - db::schema_tables::feed_hash_for_schema_digest(h, _indices.value()); - } - if (_dropped_columns && !_dropped_columns.value().partition().empty()) { - db::schema_tables::feed_hash_for_schema_digest(h, _dropped_columns.value()); - } - if (_scylla_tables) { - db::schema_tables::feed_hash_for_schema_digest(h, _scylla_tables.value()); - } - return utils::UUID_gen::get_name_UUID(h.finalize()); -} - -static stdx::optional compact(const stdx::optional& m) { - if (!m) { - return m; - } - return db::schema_tables::compact_for_schema_digest(*m); -} - -static stdx::optional compact(const mutation& m) { - return db::schema_tables::compact_for_schema_digest(m); -} - -bool schema_mutations::operator==(const schema_mutations& other) const { - return compact(_columnfamilies) == compact(other._columnfamilies) - && compact(_columns) == compact(other._columns) - && compact(_indices) == compact(other._indices) - && compact(_dropped_columns) == compact(other._dropped_columns) - && compact(_scylla_tables) == compact(other._scylla_tables) - ; -} - -bool schema_mutations::operator!=(const schema_mutations& other) const { - return !(*this == other); -} - -bool schema_mutations::live() const { - return _columnfamilies.live_row_count() > 0 || _columns.live_row_count() > 0; -} - -bool schema_mutations::is_view() const { - return _columnfamilies.schema() == db::schema_tables::views(); -} diff --git a/scylla/schema_mutations.hh b/scylla/schema_mutations.hh deleted file mode 100644 index 90e8fd7..0000000 --- a/scylla/schema_mutations.hh +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Copyright 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include -#include "mutation.hh" -#include "schema.hh" -#include "canonical_mutation.hh" - -// Commutative representation of table schema -// Equality ignores tombstones. -class schema_mutations { - mutation _columnfamilies; - mutation _columns; - stdx::optional _indices; - stdx::optional _dropped_columns; - stdx::optional _scylla_tables; -public: - schema_mutations(mutation columnfamilies, mutation columns, stdx::optional indices, stdx::optional dropped_columns, - stdx::optional scylla_tables) - : _columnfamilies(std::move(columnfamilies)) - , _columns(std::move(columns)) - , _indices(std::move(indices)) - , _dropped_columns(std::move(dropped_columns)) - , _scylla_tables(std::move(scylla_tables)) - { } - schema_mutations(canonical_mutation columnfamilies, - canonical_mutation columns, - bool is_view, - stdx::optional indices, - stdx::optional dropped_columns, - stdx::optional scylla_tables); - - schema_mutations(schema_mutations&&) = default; - schema_mutations& operator=(schema_mutations&&) = default; - schema_mutations(const schema_mutations&) = default; - schema_mutations& operator=(const schema_mutations&) = default; - - void copy_to(std::vector& dst) const; - - const mutation& columnfamilies_mutation() const { - return _columnfamilies; - } - - const mutation& columns_mutation() const { - return _columns; - } - - const stdx::optional& scylla_tables() const { - return _scylla_tables; - } - - const stdx::optional& indices_mutation() const { - return _indices; - } - const stdx::optional& dropped_columns_mutation() const { - return _dropped_columns; - } - - canonical_mutation columnfamilies_canonical_mutation() const { - return canonical_mutation(_columnfamilies); - } - - canonical_mutation columns_canonical_mutation() const { - return canonical_mutation(_columns); - } - - stdx::optional indices_canonical_mutation() const { - if (_indices) { - return canonical_mutation(_indices.value()); - } - return {}; - } - stdx::optional dropped_columns_canonical_mutation() const { - if (_dropped_columns) { - return canonical_mutation(_dropped_columns.value()); - } - return {}; - } - stdx::optional scylla_tables_canonical_mutation() const { - if (_scylla_tables) { - return canonical_mutation(_scylla_tables.value()); - } - return {}; - } - - bool is_view() const; - - table_schema_version digest() const; - - bool operator==(const schema_mutations&) const; - bool operator!=(const schema_mutations&) const; - - // Returns true iff any mutations contain any live cells - bool live() const; -}; - diff --git a/scylla/schema_registry.cc b/scylla/schema_registry.cc deleted file mode 100644 index dd0a12c..0000000 --- a/scylla/schema_registry.cc +++ /dev/null @@ -1,303 +0,0 @@ -/* - * Copyright 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include - -#include "schema_registry.hh" -#include "log.hh" - - -static logging::logger slogger("schema_registry"); - -static thread_local schema_registry registry; - -schema_version_not_found::schema_version_not_found(table_schema_version v) - : std::runtime_error{sprint("Schema version %s not found", v)} -{ } - -schema_version_loading_failed::schema_version_loading_failed(table_schema_version v) - : std::runtime_error{sprint("Failed to load schema version %s", v)} -{ } - -schema_registry_entry::~schema_registry_entry() { - if (_schema) { - _schema->_registry_entry = nullptr; - } -} - -schema_registry_entry::schema_registry_entry(table_schema_version v, schema_registry& r) - : _state(state::INITIAL) - , _version(v) - , _registry(r) - , _sync_state(sync_state::NOT_SYNCED) -{ - _erase_timer.set_callback([this] { - slogger.debug("Dropping {}", _version); - assert(!_schema); - try { - _registry._entries.erase(_version); - } catch (...) { - slogger.error("Failed to erase schema version {}: {}", _version, std::current_exception()); - } - }); -} - -schema_ptr schema_registry::learn(const schema_ptr& s) { - if (s->registry_entry()) { - return std::move(s); - } - auto i = _entries.find(s->version()); - if (i != _entries.end()) { - return i->second->get_schema(); - } - slogger.debug("Learning about version {} of {}.{}", s->version(), s->ks_name(), s->cf_name()); - auto e_ptr = make_lw_shared(s->version(), *this); - auto loaded_s = e_ptr->load(frozen_schema(s)); - _entries.emplace(s->version(), e_ptr); - return loaded_s; -} - -schema_registry_entry& schema_registry::get_entry(table_schema_version v) const { - auto i = _entries.find(v); - if (i == _entries.end()) { - throw schema_version_not_found(v); - } - schema_registry_entry& e = *i->second; - if (e._state != schema_registry_entry::state::LOADED) { - throw schema_version_not_found(v); - } - return e; -} - -schema_ptr schema_registry::get(table_schema_version v) const { - return get_entry(v).get_schema(); -} - -frozen_schema schema_registry::get_frozen(table_schema_version v) const { - return get_entry(v).frozen(); -} - -future schema_registry::get_or_load(table_schema_version v, const async_schema_loader& loader) { - auto i = _entries.find(v); - if (i == _entries.end()) { - auto e_ptr = make_lw_shared(v, *this); - auto f = e_ptr->start_loading(loader); - _entries.emplace(v, e_ptr); - return f; - } - schema_registry_entry& e = *i->second; - if (e._state == schema_registry_entry::state::LOADING) { - return e._schema_promise.get_shared_future(); - } - return make_ready_future(e.get_schema()); -} - -schema_ptr schema_registry::get_or_null(table_schema_version v) const { - auto i = _entries.find(v); - if (i == _entries.end()) { - return nullptr; - } - schema_registry_entry& e = *i->second; - if (e._state != schema_registry_entry::state::LOADED) { - return nullptr; - } - return e.get_schema(); -} - -schema_ptr schema_registry::get_or_load(table_schema_version v, const schema_loader& loader) { - auto i = _entries.find(v); - if (i == _entries.end()) { - auto e_ptr = make_lw_shared(v, *this); - auto s = e_ptr->load(loader(v)); - _entries.emplace(v, e_ptr); - return s; - } - schema_registry_entry& e = *i->second; - if (e._state == schema_registry_entry::state::LOADING) { - return e.load(loader(v)); - } - return e.get_schema(); -} - -schema_ptr schema_registry_entry::load(frozen_schema fs) { - _frozen_schema = std::move(fs); - auto s = get_schema(); - if (_state == state::LOADING) { - _schema_promise.set_value(s); - _schema_promise = {}; - } - _state = state::LOADED; - slogger.trace("Loaded {} = {}", _version, *s); - return s; -} - -future schema_registry_entry::start_loading(async_schema_loader loader) { - _loader = std::move(loader); - auto f = _loader(_version); - auto sf = _schema_promise.get_shared_future(); - _state = state::LOADING; - slogger.trace("Loading {}", _version); - f.then_wrapped([self = shared_from_this(), this] (future&& f) { - _loader = {}; - if (_state != state::LOADING) { - slogger.trace("Loading of {} aborted", _version); - return; - } - try { - try { - load(f.get0()); - } catch (...) { - std::throw_with_nested(schema_version_loading_failed(_version)); - } - } catch (...) { - slogger.debug("Loading of {} failed: {}", _version, std::current_exception()); - _schema_promise.set_exception(std::current_exception()); - _registry._entries.erase(_version); - } - }); - return sf; -} - -schema_ptr schema_registry_entry::get_schema() { - if (!_schema) { - slogger.trace("Activating {}", _version); - auto s = _frozen_schema->unfreeze(); - if (s->version() != _version) { - throw std::runtime_error(sprint("Unfrozen schema version doesn't match entry version (%s): %s", _version, *s)); - } - _erase_timer.cancel(); - s->_registry_entry = this; - _schema = &*s; - return s; - } else { - return _schema->shared_from_this(); - } -} - -void schema_registry_entry::detach_schema() noexcept { - slogger.trace("Deactivating {}", _version); - _schema = nullptr; - _erase_timer.arm(_registry.grace_period()); -} - -frozen_schema schema_registry_entry::frozen() const { - assert(_state >= state::LOADED); - return *_frozen_schema; -} - -future<> schema_registry_entry::maybe_sync(std::function()> syncer) { - switch (_sync_state) { - case schema_registry_entry::sync_state::SYNCED: - return make_ready_future<>(); - case schema_registry_entry::sync_state::SYNCING: - return _synced_promise.get_shared_future(); - case schema_registry_entry::sync_state::NOT_SYNCED: { - slogger.debug("Syncing {}", _version); - _synced_promise = {}; - auto f = do_with(std::move(syncer), [] (auto& syncer) { - return syncer(); - }); - auto sf = _synced_promise.get_shared_future(); - _sync_state = schema_registry_entry::sync_state::SYNCING; - f.then_wrapped([this, self = shared_from_this()] (auto&& f) { - if (_sync_state != sync_state::SYNCING) { - return; - } - if (f.failed()) { - slogger.debug("Syncing of {} failed", _version); - _sync_state = schema_registry_entry::sync_state::NOT_SYNCED; - _synced_promise.set_exception(f.get_exception()); - } else { - slogger.debug("Synced {}", _version); - _sync_state = schema_registry_entry::sync_state::SYNCED; - _synced_promise.set_value(); - } - }); - return sf; - } - default: - assert(0); - } -} - -bool schema_registry_entry::is_synced() const { - return _sync_state == sync_state::SYNCED; -} - -void schema_registry_entry::mark_synced() { - if (_sync_state == sync_state::SYNCING) { - _synced_promise.set_value(); - } - _sync_state = sync_state::SYNCED; - slogger.debug("Marked {} as synced", _version); -} - -schema_registry& local_schema_registry() { - return registry; -} - -global_schema_ptr::global_schema_ptr(const global_schema_ptr& o) - : global_schema_ptr(o.get()) -{ } - -global_schema_ptr::global_schema_ptr(global_schema_ptr&& o) { - auto current = engine().cpu_id(); - if (o._cpu_of_origin != current) { - throw std::runtime_error("Attempted to move global_schema_ptr across shards"); - } - _ptr = std::move(o._ptr); - _cpu_of_origin = current; -} - -schema_ptr global_schema_ptr::get() const { - if (engine().cpu_id() == _cpu_of_origin) { - return _ptr; - } else { - // 'e' points to a foreign entry, but we know it won't be evicted - // because _ptr is preventing this. - const schema_registry_entry& e = *_ptr->registry_entry(); - schema_ptr s = local_schema_registry().get_or_null(e.version()); - if (!s) { - s = local_schema_registry().get_or_load(e.version(), [&e](table_schema_version) { - return e.frozen(); - }); - } - if (e.is_synced()) { - s->registry_entry()->mark_synced(); - } - return s; - } -} - -global_schema_ptr::global_schema_ptr(const schema_ptr& ptr) - : _ptr([&ptr]() { - // _ptr must always have an associated registry entry, - // if ptr doesn't, we need to load it into the registry. - schema_registry_entry* e = ptr->registry_entry(); - if (e) { - return ptr; - } - return local_schema_registry().get_or_load(ptr->version(), [&ptr] (table_schema_version) { - return frozen_schema(ptr); - }); - }()) - , _cpu_of_origin(engine().cpu_id()) -{ } diff --git a/scylla/schema_registry.hh b/scylla/schema_registry.hh deleted file mode 100644 index edb8fb2..0000000 --- a/scylla/schema_registry.hh +++ /dev/null @@ -1,170 +0,0 @@ -/* - * Copyright 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include -#include -#include -#include -#include "schema.hh" -#include "frozen_schema.hh" - -class schema_registry; - -using async_schema_loader = std::function(table_schema_version)>; -using schema_loader = std::function; - -class schema_version_not_found : public std::runtime_error { -public: - schema_version_not_found(table_schema_version v); -}; - -class schema_version_loading_failed : public std::runtime_error { -public: - schema_version_loading_failed(table_schema_version v); -}; - -// -// Presence in schema_registry is controlled by different processes depending on -// life cycle stage: -// 1) Initially it's controlled by the loader. When loading fails, entry is removed by the loader. -// 2) When loading succeeds, the entry is controlled by live schema_ptr. It remains present as long as -// there's any live schema_ptr. -// 3) When last schema_ptr dies, entry is deactivated. Currently it is removed immediately, later we may -// want to keep it around for some time to reduce cache misses. -// -// In addition to the above the entry is controlled by lw_shared_ptr<> to cope with races between loaders. -// -class schema_registry_entry : public enable_lw_shared_from_this { - using erase_clock = seastar::lowres_clock; - - enum class state { - INITIAL, LOADING, LOADED - }; - - state _state; - table_schema_version _version; // always valid - schema_registry& _registry; // always valid - - async_schema_loader _loader; // valid when state == LOADING - shared_promise _schema_promise; // valid when state == LOADING - - std::experimental::optional _frozen_schema; // engaged when state == LOADED - // valid when state == LOADED - // This is != nullptr when there is an alive schema_ptr associated with this entry. - const ::schema* _schema = nullptr; - - enum class sync_state { NOT_SYNCED, SYNCING, SYNCED }; - sync_state _sync_state; - shared_promise<> _synced_promise; // valid when _sync_state == SYNCING - timer _erase_timer; - - friend class schema_registry; -public: - schema_registry_entry(table_schema_version v, schema_registry& r); - schema_registry_entry(schema_registry_entry&&) = delete; - schema_registry_entry(const schema_registry_entry&) = delete; - ~schema_registry_entry(); - schema_ptr load(frozen_schema); - future start_loading(async_schema_loader); - schema_ptr get_schema(); // call only when state >= LOADED - // Can be called from other shards - bool is_synced() const; - // Initiates asynchronous schema sync or returns ready future when is already synced. - future<> maybe_sync(std::function()> sync); - // Marks this schema version as synced. Syncing cannot be in progress. - void mark_synced(); - // Can be called from other shards - frozen_schema frozen() const; - // Can be called from other shards - table_schema_version version() const { return _version; } -public: - // Called by class schema - void detach_schema() noexcept; -}; - -// -// Keeps track of different versions of table schemas. A per-shard object. -// -// For every schema_ptr obtained through getters, as long as the schema pointed to is -// alive the registry will keep its entry. To ensure remote nodes can query current node -// for schema version, make sure that schema_ptr for the request is alive around the call. -// -class schema_registry { - std::unordered_map> _entries; - friend class schema_registry_entry; - schema_registry_entry& get_entry(table_schema_version) const; - // Duration for which unused entries are kept alive to avoid - // too frequent re-requests and syncs. - schema_registry_entry::erase_clock::duration grace_period() const { - return std::chrono::seconds(1); - } -public: - // Looks up schema by version or loads it using supplied loader. - schema_ptr get_or_load(table_schema_version, const schema_loader&); - - // Looks up schema by version or returns an empty pointer if not available. - schema_ptr get_or_null(table_schema_version) const; - - // Like get_or_load() which takes schema_loader but the loader may be - // deferring. The loader is copied must be alive only until this method - // returns. If the loader fails, the future resolves with - // schema_version_loading_failed. - future get_or_load(table_schema_version, const async_schema_loader&); - - // Looks up schema version. Throws schema_version_not_found when not found - // or loading is in progress. - schema_ptr get(table_schema_version) const; - - // Looks up schema version. Throws schema_version_not_found when not found - // or loading is in progress. - frozen_schema get_frozen(table_schema_version) const; - - // Attempts to add given schema to the registry. If the registry already - // knows about the schema, returns existing entry, otherwise returns back - // the schema which was passed as argument. Users should prefer to use the - // schema_ptr returned by this method instead of the one passed to it, - // because doing so ensures that the entry will be kept in the registry as - // long as the schema is actively used. - schema_ptr learn(const schema_ptr&); -}; - -schema_registry& local_schema_registry(); - -// Schema pointer which can be safely accessed/passed across shards via -// const&. Useful for ensuring that schema version obtained on one shard is -// automatically propagated to other shards, no matter how long the processing -// chain will last. -class global_schema_ptr { - schema_ptr _ptr; - unsigned _cpu_of_origin; -public: - // Note: the schema_ptr must come from the current shard and can't be nullptr. - global_schema_ptr(const schema_ptr&); - // The other may come from a different shard. - global_schema_ptr(const global_schema_ptr& other); - // The other must come from current shard. - global_schema_ptr(global_schema_ptr&& other); - // May be invoked across shards. Always returns an engaged pointer. - schema_ptr get() const; - operator schema_ptr() const { return get(); } -}; diff --git a/scylla/schema_upgrader.hh b/scylla/schema_upgrader.hh deleted file mode 100644 index 2683cec..0000000 --- a/scylla/schema_upgrader.hh +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (C) 2017 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "streamed_mutation.hh" -#include "converting_mutation_partition_applier.hh" - -// A StreamedMutationTransformer which transforms the stream to a different schema -class schema_upgrader { - schema_ptr _prev; - schema_ptr _new; -private: - row transform(row&& r, column_kind kind) { - row new_row; - r.for_each_cell([&] (column_id id, atomic_cell_or_collection& cell) { - const column_definition& col = _prev->column_at(kind, id); - const column_definition* new_col = _new->get_column_definition(col.name()); - if (new_col) { - converting_mutation_partition_applier::append_cell(new_row, kind, *new_col, col.type, std::move(cell)); - } - }); - return new_row; - } -public: - schema_upgrader(schema_ptr s) - : _new(std::move(s)) - { } - schema_ptr operator()(schema_ptr old) { - _prev = std::move(old); - return _new; - } - mutation_fragment consume(static_row&& row) { - return mutation_fragment(static_row(transform(std::move(row.cells()), column_kind::static_column))); - } - mutation_fragment consume(clustering_row&& row) { - return mutation_fragment(clustering_row(row.key(), row.tomb(), row.marker(), - transform(std::move(row.cells()), column_kind::regular_column))); - } - mutation_fragment consume(range_tombstone&& rt) { - return std::move(rt); - } - mutation_fragment operator()(mutation_fragment&& mf) { - return std::move(mf).consume(*this); - } -}; - -GCC6_CONCEPT( -static_assert(StreamedMutationTranformer()); -) diff --git a/scylla/scripts/git-archive-all b/scylla/scripts/git-archive-all deleted file mode 100755 index 3552c6b..0000000 --- a/scylla/scripts/git-archive-all +++ /dev/null @@ -1,494 +0,0 @@ -#! /usr/bin/env python -# coding=utf-8 - -from __future__ import print_function -from __future__ import unicode_literals - -__version__ = "1.9" - -import logging -from os import extsep, path, readlink, curdir -from subprocess import CalledProcessError, Popen, PIPE -import sys -import tarfile -from zipfile import ZipFile, ZipInfo, ZIP_DEFLATED - - -class GitArchiver(object): - """ - GitArchiver - - Scan a git repository and export all tracked files, and submodules. - Checks for .gitattributes files in each directory and uses 'export-ignore' - pattern entries for ignore files in the archive. - - >>> archiver = GitArchiver(main_repo_abspath='my/repo/path') - >>> archiver.create('output.zip') - """ - LOG = logging.getLogger('GitArchiver') - - def __init__(self, prefix='', exclude=True, force_sub=False, extra=None, main_repo_abspath=None): - """ - @param prefix: Prefix used to prepend all paths in the resulting archive. - Extra file paths are only prefixed if they are not relative. - E.g. if prefix is 'foo' and extra is ['bar', '/baz'] the resulting archive will look like this: - / - baz - foo/ - bar - @type prefix: string - - @param exclude: Determines whether archiver should follow rules specified in .gitattributes files. - @type exclude: bool - - @param force_sub: Determines whether submodules are initialized and updated before archiving. - @type force_sub: bool - - @param extra: List of extra paths to include in the resulting archive. - @type extra: list - - @param main_repo_abspath: Absolute path to the main repository (or one of subdirectories). - If given path is path to a subdirectory (but not a submodule directory!) it will be replaced - with abspath to top-level directory of the repository. - If None, current cwd is used. - @type main_repo_abspath: string - """ - if extra is None: - extra = [] - - if main_repo_abspath is None: - main_repo_abspath = path.abspath('') - elif not path.isabs(main_repo_abspath): - raise ValueError("You MUST pass absolute path to the main git repository.") - - try: - self.run_shell("[ -d .git ] || git rev-parse --git-dir > /dev/null 2>&1", main_repo_abspath) - except Exception as e: - raise ValueError("Not a git repository (or any of the parent directories).") - - main_repo_abspath = path.abspath(self.read_git_shell('git rev-parse --show-toplevel', main_repo_abspath).rstrip()) - - self.prefix = prefix - self.exclude = exclude - self.extra = extra - self.force_sub = force_sub - self.main_repo_abspath = main_repo_abspath - - def create(self, output_path, dry_run=False, output_format=None): - """ - Create the archive at output_file_path. - - Type of the archive is determined either by extension of output_file_path or by output_format. - Supported formats are: gz, zip, bz2, xz, tar, tgz, txz - - @param output_path: Output file path. - @type output_path: string - - @param dry_run: Determines whether create should do nothing but print what it would archive. - @type dry_run: bool - - @param output_format: Determines format of the output archive. If None, format is determined from extension - of output_file_path. - @type output_format: string - """ - if output_format is None: - file_name, file_ext = path.splitext(output_path) - output_format = file_ext[len(extsep):].lower() - self.LOG.debug("Output format is not explicitly set, determined format is {}.".format(output_format)) - - if not dry_run: - if output_format == 'zip': - archive = ZipFile(path.abspath(output_path), 'w') - - def add_file(file_path, arcname): - if not path.islink(file_path): - archive.write(file_path, arcname, ZIP_DEFLATED) - else: - i = ZipInfo(arcname) - i.create_system = 3 - i.external_attr = 0xA1ED0000 - archive.writestr(i, readlink(file_path)) - elif output_format in ['tar', 'bz2', 'gz', 'xz', 'tgz', 'txz']: - if output_format == 'tar': - t_mode = 'w' - elif output_format == 'tgz': - t_mode = 'w:gz' - elif output_format == 'txz': - t_mode = 'w:xz' - else: - t_mode = 'w:{}'.format(output_format) - - archive = tarfile.open(path.abspath(output_path), t_mode) - add_file = lambda file_path, arcname: archive.add(file_path, arcname) - else: - raise RuntimeError("Unknown format: {}".format(output_format)) - - def archiver(file_path, arcname): - self.LOG.debug("Compressing {} => {}...".format(file_path, arcname)) - add_file(file_path, arcname) - else: - archive = None - archiver = lambda file_path, arcname: self.LOG.info("{} => {}".format(file_path, arcname)) - - self.archive_all_files(archiver) - - if archive is not None: - archive.close() - - def get_exclude_patterns(self, repo_abspath, repo_file_paths): - """ - Returns exclude patterns for a given repo. It looks for .gitattributes files in repo_file_paths. - - Resulting dictionary will contain exclude patterns per path (relative to the repo_abspath). - E.g. {('.', 'Catalyst', 'Editions', 'Base'), ['Foo*', '*Bar']} - - @type repo_abspath: string - @param repo_abspath: Absolute path to the git repository. - - @type repo_file_paths: list - @param repo_file_paths: List of paths relative to the repo_abspath that are under git control. - - @rtype: dict - @return: Dictionary representing exclude patterns. - Keys are tuples of strings. Values are lists of strings. - Returns None if self.exclude is not set. - """ - if not self.exclude: - return None - - def read_attributes(attributes_abspath): - patterns = [] - if path.isfile(attributes_abspath): - attributes = open(attributes_abspath, 'r').readlines() - patterns = [] - for line in attributes: - tokens = line.strip().split() - if "export-ignore" in tokens[1:]: - patterns.append(tokens[0]) - return patterns - - exclude_patterns = {(): []} - - # There may be no gitattributes. - try: - global_attributes_abspath = self.read_shell("git config --get core.attributesfile", repo_abspath).rstrip() - exclude_patterns[()] = read_attributes(global_attributes_abspath) - except: - # And it's valid to not have them. - pass - - for attributes_abspath in [path.join(repo_abspath, f) for f in repo_file_paths if f.endswith(".gitattributes")]: - # Each .gitattributes affects only files within its directory. - key = tuple(self.get_path_components(repo_abspath, path.dirname(attributes_abspath))) - exclude_patterns[key] = read_attributes(attributes_abspath) - - local_attributes_abspath = path.join(repo_abspath, ".git", "info", "attributes") - key = tuple(self.get_path_components(repo_abspath, repo_abspath)) - - if key in exclude_patterns: - exclude_patterns[key].extend(read_attributes(local_attributes_abspath)) - else: - exclude_patterns[key] = read_attributes(local_attributes_abspath) - - return exclude_patterns - - def is_file_excluded(self, repo_abspath, repo_file_path, exclude_patterns): - """ - Checks whether file at a given path is excluded. - - @type repo_abspath: string - @param repo_abspath: Absolute path to the git repository. - - @type repo_file_path: string - @param repo_file_path: Path to a file within repo_abspath. - - @type exclude_patterns: dict - @param exclude_patterns: Exclude patterns with format specified for get_exclude_patterns. - - @rtype: bool - @return: True if file should be excluded. Otherwise False. - """ - if exclude_patterns is None or not len(exclude_patterns): - return False - - from fnmatch import fnmatch - - file_name = path.basename(repo_file_path) - components = self.get_path_components(repo_abspath, path.join(repo_abspath, path.dirname(repo_file_path))) - - is_excluded = False - # We should check all patterns specified in intermediate directories to the given file. - # At the end we should also check for the global patterns (key '()' or empty tuple). - while not is_excluded: - key = tuple(components) - if key in exclude_patterns: - patterns = exclude_patterns[key] - for p in patterns: - if fnmatch(file_name, p) or fnmatch(repo_file_path, p): - self.LOG.debug("Exclude pattern matched {}: {}".format(p, repo_file_path)) - is_excluded = True - - if not len(components): - break - - components.pop() - - return is_excluded - - def archive_all_files(self, archiver): - """ - Archive all files using archiver. - - @param archiver: Function that accepts 2 arguments: abspath to file on the system and relative path within archive. - """ - for file_path in self.extra: - archiver(path.abspath(file_path), path.join(self.prefix, file_path)) - - for file_path in self.walk_git_files(): - archiver(path.join(self.main_repo_abspath, file_path), path.join(self.prefix, file_path)) - - def walk_git_files(self, repo_path=''): - """ - An iterator method that yields a file path relative to main_repo_abspath - for each file that should be included in the archive. - Skips those that match the exclusion patterns found in - any discovered .gitattributes files along the way. - - Recurs into submodules as well. - - @type repo_path: string - @param repo_path: Path to the git submodule repository relative to main_repo_abspath. - - @rtype: iterator - @return: Iterator to traverse files under git control relative to main_repo_abspath. - """ - repo_abspath = path.join(self.main_repo_abspath, repo_path) - repo_file_paths = self.read_git_shell("git ls-files --cached --full-name --no-empty-directory", repo_abspath).splitlines() - exclude_patterns = self.get_exclude_patterns(repo_abspath, repo_file_paths) - - for repo_file_path in repo_file_paths: - # Git puts path in quotes if file path has unicode characters. - repo_file_path = repo_file_path.strip('"') # file path relative to current repo - file_name = path.basename(repo_file_path) - main_repo_file_path = path.join(repo_path, repo_file_path) # file path relative to the main repo - - # Only list symlinks and files that don't start with git. - if file_name.startswith(".git") or (not path.islink(main_repo_file_path) and path.isdir(main_repo_file_path)): - continue - - if self.is_file_excluded(repo_abspath, repo_file_path, exclude_patterns): - continue - - yield main_repo_file_path - - if self.force_sub: - self.run_shell("git submodule init", repo_abspath) - self.run_shell("git submodule update", repo_abspath) - - for submodule_path in self.read_shell("git submodule --quiet foreach 'pwd -P'", repo_abspath).splitlines(): - # Shell command returns absolute paths to submodules. - submodule_path = path.relpath(submodule_path, self.main_repo_abspath) - for file_path in self.walk_git_files(submodule_path): - yield file_path - - @staticmethod - def get_path_components(repo_abspath, abspath): - """ - Split given abspath into components relative to repo_abspath. - These components are primarily used as unique keys of files and folders within a repository. - - E.g. if repo_abspath is '/Documents/Hobby/ParaView/' and abspath is - '/Documents/Hobby/ParaView/Catalyst/Editions/Base/', function will return: - ['.', 'Catalyst', 'Editions', 'Base'] - - First element is always '.' (concrete symbol depends on OS). - - @param repo_abspath: Absolute path to the git repository. Normalized via os.path.normpath. - @type repo_abspath: string - - @param abspath: Absolute path to a file within repo_abspath. Normalized via os.path.normpath. - @type abspath: string - - @return: List of path components. - @rtype: list - """ - repo_abspath = path.normpath(repo_abspath) - abspath = path.normpath(abspath) - - if not path.isabs(repo_abspath): - raise ValueError("repo_abspath MUST be absolute path.") - - if not path.isabs(abspath): - raise ValueError("abspath MUST be absoulte path.") - - if not path.commonprefix([repo_abspath, abspath]): - raise ValueError("abspath (\"{}\") MUST have common prefix with repo_abspath (\"{}\")".format(abspath, repo_abspath)) - - components = [] - - while not abspath == repo_abspath: - abspath, tail = path.split(abspath) - - if tail: - components.insert(0, tail) - - components.insert(0, curdir) - return components - - @staticmethod - def run_shell(cmd, cwd=None): - """ - Runs shell command. - - @type cmd: string - @param cmd: Command to be executed. - - @type cwd: string - @param cwd: Working directory. - - @rtype: int - @return: Return code of the command. - - @raise CalledProcessError: Raises exception if return code of the command is non-zero. - """ - p = Popen(cmd, shell=True, cwd=cwd) - p.wait() - - if p.returncode: - raise CalledProcessError(returncode=p.returncode, cmd=cmd) - - return p.returncode - - @staticmethod - def read_shell(cmd, cwd=None, encoding='utf-8'): - """ - Runs shell command and reads output. - - @type cmd: string - @param cmd: Command to be executed. - - @type cwd: string - @param cwd: Working directory. - - @type encoding: string - @param encoding: Encoding used to decode bytes returned by Popen into string. - - @rtype: string - @return: Output of the command. - - @raise CalledProcessError: Raises exception if return code of the command is non-zero. - """ - p = Popen(cmd, shell=True, stdout=PIPE, cwd=cwd) - output, _ = p.communicate() - output = output.decode(encoding) - - if p.returncode: - if sys.version_info > (2,6): - raise CalledProcessError(returncode=p.returncode, cmd=cmd, output=output) - else: - raise CalledProcessError(returncode=p.returncode, cmd=cmd) - - return output - - @staticmethod - def read_git_shell(cmd, cwd=None): - """ - Runs git shell command, reads output and decodes it into unicode string - - @type cmd: string - @param cmd: Command to be executed. - - @type cwd: string - @param cwd: Working directory. - - @rtype: string - @return: Output of the command. - - @raise CalledProcessError: Raises exception if return code of the command is non-zero. - """ - p = Popen(cmd, shell=True, stdout=PIPE, cwd=cwd) - output, _ = p.communicate() - output = output.decode('unicode_escape').encode('raw_unicode_escape').decode('utf-8') - - if p.returncode: - if sys.version_info > (2,6): - raise CalledProcessError(returncode=p.returncode, cmd=cmd, output=output) - else: - raise CalledProcessError(returncode=p.returncode, cmd=cmd) - - return output - - -if __name__ == '__main__': - from optparse import OptionParser - - parser = OptionParser(usage="usage: %prog [-v] [--prefix PREFIX] [--no-exclude] [--force-submodules] [--extra EXTRA1 [EXTRA2]] [--dry-run] OUTPUT_FILE", - version="%prog {}".format(__version__)) - - parser.add_option('--prefix', - type='string', - dest='prefix', - default=None, - help="prepend PREFIX to each filename in the archive. OUTPUT_FILE name is used by default to avoid tarbomb. You can set it to '' in order to explicitly request tarbomb") - - parser.add_option('-v', '--verbose', - action='store_true', - dest='verbose', - help='enable verbose mode') - - parser.add_option('--no-exclude', - action='store_false', - dest='exclude', - default=True, - help="don't read .gitattributes files for patterns containing export-ignore attrib") - - parser.add_option('--force-submodules', - action='store_true', - dest='force_sub', - help="force a git submodule init && git submodule update at each level before iterating submodules") - - parser.add_option('--extra', - action='append', - dest='extra', - default=[], - help="any additional files to include in the archive") - - parser.add_option('--dry-run', - action='store_true', - dest='dry_run', - help="don't actually archive anything, just show what would be done") - - options, args = parser.parse_args() - - if len(args) != 1: - parser.error("You must specify exactly one output file") - - output_file_path = args[0] - - if path.isdir(output_file_path): - parser.error("You cannot use directory as output") - - # avoid tarbomb - if options.prefix is not None: - options.prefix = path.join(options.prefix, '') - else: - import re - - output_name = path.basename(output_file_path) - output_name = re.sub('(\.zip|\.tar|\.tgz|\.txz|\.gz|\.bz2|\.xz|\.tar\.gz|\.tar\.bz2|\.tar\.xz)$', '', output_name) or "Archive" - options.prefix = path.join(output_name, '') - - try: - handler = logging.StreamHandler(sys.stdout) - handler.setFormatter(logging.Formatter('%(message)s')) - GitArchiver.LOG.addHandler(handler) - GitArchiver.LOG.setLevel(logging.DEBUG if options.verbose else logging.INFO) - archiver = GitArchiver(options.prefix, - options.exclude, - options.force_sub, - options.extra) - archiver.create(output_file_path, options.dry_run) - except Exception as e: - parser.exit(2, "{}\n".format(e)) - - sys.exit(0) diff --git a/scylla/scripts/scylla_current_repo b/scylla/scripts/scylla_current_repo deleted file mode 100755 index 6b84b1c..0000000 --- a/scylla/scripts/scylla_current_repo +++ /dev/null @@ -1,37 +0,0 @@ -#!/bin/bash - -VERSION=$(./SCYLLA-VERSION-GEN) -SCYLLA_VERSION=$(cat build/SCYLLA-VERSION-FILE) -SCYLLA_RELEASE=$(cat build/SCYLLA-RELEASE-FILE) - -. /etc/os-release - -if [ "$SCYLLA_VERSION" = "666.development" ]; then - if [ "$ID" = "ubuntu" ]; then - CODENAME=`lsb_release -c|awk '{print $2}'` - if [ "$CODENAME" = "trusty" ]; then - CODENAME=ubuntu - fi - echo http://downloads.scylladb.com/deb/unstable/$CODENAME/master/latest/scylla.list - elif [ "$ID" = "centos" ]; then - echo http://downloads.scylladb.com/rpm/unstable/centos/master/latest/scylla.repo - elif [ "$ID" = "fedora" ]; then - echo http://downloads.scylladb.com/rpm/unstable/fedora/master/latest/scylla.repo - else - echo "Unsupported distribution." - exit 1 - fi -else - REPO_VERSION=$(echo $SCYLLA_VERSION |sed -e "s/^\([0-9]*\.[0-9]*\).*/\1/") - if [ "$ID" = "ubuntu" ]; then - CODENAME=`lsb_release -c|awk '{print $2}'` - echo http://downloads.scylladb.com/deb/ubuntu/scylla-$REPO_VERSION-$CODENAME.list - elif [ "$ID" = "centos" ]; then - echo http://downloads.scylladb.com/rpm/centos/scylla-$REPO_VERSION.repo - elif [ "$ID" = "fedora" ]; then - echo http://downloads.scylladb.com/rpm/fedora/scylla-$REPO_VERSION.repo - else - echo "Unsupported distribution." - exit 1 - fi -fi diff --git a/scylla/scripts/scylla_install_pkg b/scylla/scripts/scylla_install_pkg deleted file mode 100755 index e3a8fce..0000000 --- a/scylla/scripts/scylla_install_pkg +++ /dev/null @@ -1,106 +0,0 @@ -#!/bin/bash -e -# -# Copyright (C) 2015 ScyllaDB - -if [ "`id -u`" -ne 0 ]; then - echo "Requires root permission." - exit 1 -fi - -print_usage() { - echo "scylla_install_pkg --local-pkg /home/scylla/rpms --repo [URL]" - echo " --local-pkg install locally built .rpm/.deb on specified directory" - echo " --repo repository for both install and update, specify .repo/.list file URL" - echo " --repo-for-install repository for install, specify .repo/.list file URL" - echo " --repo-for-update repository for update, specify .repo/.list file URL" - exit 1 -} - -LOCAL_PKG= -UNSTABLE=0 -REPO_FOR_INSTALL= -REPO_FOR_UPDATE= -while [ $# -gt 0 ]; do - case "$1" in - "--local-pkg") - LOCAL_PKG=$2 - shift 2 - ;; - "--repo") - REPO_FOR_INSTALL=$2 - REPO_FOR_UPDATE=$2 - shift 2 - ;; - "--repo-for-install") - REPO_FOR_INSTALL=$2 - shift 2 - ;; - "--repo-for-update") - REPO_FOR_UPDATE=$2 - shift 2 - ;; - *) - print_usage - shift 1 - ;; - esac -done - -. /etc/os-release - -if [ -f /etc/debian_version ]; then - echo "#!/bin/sh" >> /usr/sbin/policy-rc.d - echo "exit 101" >> /usr/sbin/policy-rc.d - chmod +x /usr/sbin/policy-rc.d - cp /etc/hosts /etc/hosts.orig - echo 127.0.0.1 `hostname` >> /etc/hosts - if [ "$REPO_FOR_INSTALL" != "" ]; then - curl -o /etc/apt/sources.list.d/scylla_install.list $REPO_FOR_INSTALL - fi - apt-get update - if [ "$LOCAL_PKG" = "" ]; then - apt-get install -y --force-yes scylla - else - if [ ! -f /usr/bin/gdebi ]; then - apt-get install -y --force-yes gdebi-core - fi - echo Y | gdebi $LOCAL_PKG/scylla-kernel-conf*.deb - echo Y | gdebi $LOCAL_PKG/scylla-conf*.deb - echo Y | gdebi $LOCAL_PKG/scylla-server_*.deb - echo Y | gdebi $LOCAL_PKG/scylla-server-dbg*.deb - echo Y | gdebi $LOCAL_PKG/scylla-jmx*.deb - echo Y | gdebi $LOCAL_PKG/scylla-tools*.deb - echo Y | gdebi $LOCAL_PKG/scylla_*.deb - fi - mv /etc/hosts.orig /etc/hosts - rm /usr/sbin/policy-rc.d - rm /etc/apt/sources.list.d/scylla_install.list - if [ "$REPO_FOR_UPDATE" != "" ]; then - curl -o /etc/apt/sources.list.d/scylla.list $REPO_FOR_UPDATE - fi - apt-get update -else - if [ "$REPO_FOR_INSTALL" != "" ]; then - curl -o /etc/yum.repos.d/scylla_install.repo $REPO_FOR_INSTALL - fi - - if [ "$ID" = "centos" ]; then - yum install -y epel-release - elif [ "$ID" = "rhel" ]; then - rpm -ivh http://download.fedoraproject.org/pub/epel/7/x86_64/e/epel-release-7-7.noarch.rpm - else - echo "Unsupported distribution" - exit 1 - fi - - if [ "$LOCAL_PKG" = "" ]; then - yum install -y scylla - else - yum install -y $LOCAL_PKG/scylla*.*.rpm - fi - - rm /etc/yum.repos.d/scylla_install.repo - if [ "$REPO_FOR_UPDATE" != "" ]; then - curl -o /etc/yum.repos.d/scylla.repo $REPO_FOR_UPDATE - fi -fi diff --git a/scylla/scripts/update-version b/scylla/scripts/update-version deleted file mode 100755 index 7e6b1d5..0000000 --- a/scylla/scripts/update-version +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/sh - -if [ $# -eq 0 ]; then - echo "usage: $0 version" - exit 1 -fi - -version=$1 - -sed -i -e "s/^VERSION.*/VERSION=$version/g" SCYLLA-VERSION-GEN -git commit -m "release: prepare for $version" SCYLLA-VERSION-GEN diff --git a/scylla/scylla-blocktune b/scylla/scylla-blocktune deleted file mode 100755 index f6c9516..0000000 --- a/scylla/scylla-blocktune +++ /dev/null @@ -1,109 +0,0 @@ -#!/usr/bin/python3 - -# -# Copyright (C) 2016 ScyllaDB -# - -# -# This file is part of Scylla. -# -# Scylla is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# Scylla is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with Scylla. If not, see . -# - -import argparse, os, os.path - -# try to write data to a sysfs path, expect problems -def try_write(path, data): - try: - open(path, 'w').write(data) - except Exception: - print("warning: unable to tune {} to {}".format(path, data)) - -# update a sysfs path if it does not satisfy a check -# function (default = check that the data is already there) -def tune_path(path, data, check=None): - def default_check(current): - return current == data - if check is None: - check = default_check - if not os.path.exists(path): - return - if check(open(path).read().strip()): - print('already tuned: {}'.format(path)) - return - print('tuning: {} {}'.format(path, data)) - try_write(path, data + '\n') - -tuned_blockdevs = set() - -# tune a blockdevice (sysfs node); updates I/O scheduler -# and merge behavior. Tunes dependent devices -def tune_blockdev(path): - from os.path import join, exists, dirname, realpath - path = realpath(path) - print('tuning {}'.format(path)) - if path in tuned_blockdevs: - return - tuned_blockdevs.add(path) - def check_sched(current): - return current == 'none' or '[noop]' in current - tune_path(join(path, 'queue', 'scheduler'), 'noop', check_sched) - tune_path(join(path, 'queue', 'nomerges'), '2') - slaves = join(path, 'slaves') - if exists(slaves): - for slave in os.listdir(slaves): - tune_blockdev(join(slaves, slave)) - if exists(join(path, 'partition')): - tune_blockdev(dirname(path)) - -# tunes a /dev/foo blockdev -def tune_dev(path): - dev = os.stat(path).st_rdev - devfile = '/sys/dev/block/{}:{}'.format(dev // 256, dev % 256) - tune_blockdev(devfile) - -# tunes a filesystem -# FIXME: btrfs -def tune_fs(path): - dev = os.stat(path).st_dev - devfile = '/sys/dev/block/{}:{}'.format(dev // 256, dev % 256) - tune_blockdev(devfile) - -# tunes all filesystems referenced from a scylla.yaml -def tune_yaml(path): - import yaml - y = yaml.load(open(path)) - for fs in y['data_file_directories']: - tune_fs(fs) - tune_fs(y['commitlog_directory']) - -ap = argparse.ArgumentParser('Tune filesystems for ScyllaDB') -ap.add_argument('--filesystem', metavar='PATH', action='append', dest='fs', default=[], - help='Tune filesystem containing PATH') -ap.add_argument('--dev', metavar='PATH', action='append', dest='dev', default=[], - help='Tune device node PATH') -ap.add_argument('--config', metavar='YAML', action='append', dest='yaml', default=[], - help='Process given scylla.yaml') - -args = ap.parse_args() - -if not args.yaml and not args.fs and not args.dev: - tune_yaml('/etc/scylla/scylla.yaml') -else: - for yaml in args.yaml: - tune_yaml(yaml) - for fs in args.fs: - tune_fs(fs) - for dev in args.dev: - tune_dev(dev) diff --git a/scylla/scylla-housekeeping b/scylla/scylla-housekeeping deleted file mode 100755 index def59f6..0000000 --- a/scylla/scylla-housekeeping +++ /dev/null @@ -1,164 +0,0 @@ -#!/usr/bin/python -# -# Copyright (C) 2016 ScyllaDB -# - -# -# This file is part of Scylla. -# -# Scylla is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# Scylla is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with Scylla. If not, see . -# -from __future__ import print_function - -import argparse -import json -import urllib -import urllib2 -import requests -import ConfigParser -import os -import sys -import subprocess -import uuid -import re -import glob -from pkg_resources import parse_version - -VERSION = "1.0" -quiet = False -# Temporary url for the review -version_url = "https://i6a5h9l1kl.execute-api.us-east-1.amazonaws.com/prod/check_version" - -def trace(*vals): - print(''.join(vals)) - -def traceln(*vals): - trace(*(vals + ('\n',))) - -def help(args): - parser.print_help() - -def sh_command(*args): - p = subprocess.Popen(args, stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - out, err = p.communicate() - if err: - raise Exception(err) - return out - -def get_json_from_url(path): - data = sh_command("curl", "-s", "-X", "GET", path) - return json.loads(data) - -def get_api(path): - return get_json_from_url("http://localhost:10000" + path) - -def version_compare(a, b): - return parse_version(a) < parse_version(b) - -def create_uuid_file(fl): - with open(args.uuid_file, 'w') as myfile: - myfile.write(str(uuid.uuid1()) + "\n") - -def sanitize_version(version): - """ - Newer setuptools don't like dashed version strings, trim it to avoid - false negative version_compare() checks. - """ - if version and '-' in version: - return version.split('-', 1)[0] - else: - return version - -def get_repo_file(dir): - files = glob.glob(dir) - files.sort(key=os.path.getmtime, reverse=True) - for name in files: - with open(name, 'r') as myfile: - for line in myfile: - match = re.search(".*http.?://.*/scylladb/([^/\s]+)/deb/([^/\s]+)\s.*", line) - if match: - return match.group(2), match.group(1) - match = re.search(".*http.?://.*/scylladb/([^/]+)/rpm/[^/]+/([^/\s]+)/.*", line) - if match: - return match.group(2), match.group(1) - return None, None - -def check_version(ar): - if config and (not config.has_option("housekeeping", "check-version") or not config.getboolean("housekeeping", "check-version")): - return - if ar.version and ar.version != '': - current_version = sanitize_version(ar.version) - else: - current_version = sanitize_version(get_api('/storage_service/scylla_release_version')) - if current_version == "": - # API is down, nothing to do - return - try: - params = "?version=" + current_version - if ar.mode: - # mode would accept any string. - # use i for install, c (default) for running from the command line - params = params + "&sts=" + ar.mode - if uid: - params = params + "&uu=" + uid - if repo_id: - params = params + "&rid=" + repo_id - if repo_type: - params = params + "&rtype=" + repo_type - latest_version = get_json_from_url(version_url + params)["version"] - except: - traceln("Unable to retrieve version information") - return - if version_compare(current_version, latest_version): - traceln("A new version was found, current version=", current_version, " latest version=", latest_version) - -parser = argparse.ArgumentParser(description='ScyllaDB help report tool', conflict_handler="resolve") -parser.add_argument('-q', '--quiet', action='store_true', default=False, help='Quiet mode') -parser.add_argument('-c', '--config', default="", help='An optional config file. Specifying a missing file will terminate the script') -parser.add_argument('--uuid', default="", help='A uuid for the requests') -parser.add_argument('--uuid-file', default="", help='A uuid file for the requests') -parser.add_argument('--repo-files', default="", help='The repository files that is been used for private repositories') - -subparsers = parser.add_subparsers(help='Available commands') -parser_help = subparsers.add_parser('help', help='Display help information') -parser_help.set_defaults(func=help) -parser_system = subparsers.add_parser('version', help='Check if the current running version is the latest one') -parser_system.add_argument('--mode', default="c", help='Which mode the version check runs') -parser_system.add_argument('--version', default="", help='Use a given version to compare to') -parser_system.set_defaults(func=check_version) - -args = parser.parse_args() -quiet = args.quiet -config = None -repo_id = None -repo_type = None - -if args.config != "": - if not os.path.isfile(args.config): - traceln("Config file ", args.config, " is missing, terminating") - sys.exit(0) - config = ConfigParser.SafeConfigParser() - config.read(args.config) -uid = None -if args.uuid != "": - uid = args.uuid -if args.uuid_file != "": - if not os.path.exists(args.uuid_file): - create_uuid_file(args.uuid_file) - with open(args.uuid_file, 'r') as myfile: - uid = myfile.read().replace('\n', '') -if args.repo_files != "": - repo_type, repo_id = get_repo_file(args.repo_files) -args.func(args) diff --git a/scylla/seastarx.hh b/scylla/seastarx.hh deleted file mode 100644 index 8c42481..0000000 --- a/scylla/seastarx.hh +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright (C) 2017 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include // avoid conflict between ::socket and seastar::socket - -namespace seastar { - -template -class shared_ptr; - -template -shared_ptr make_shared(T&&); - -template -shared_ptr make_shared(A&&... a); - -} - - -using namespace seastar; -using seastar::shared_ptr; -using seastar::make_shared; diff --git a/scylla/serialization_visitors.hh b/scylla/serialization_visitors.hh deleted file mode 100644 index 4c59aa4..0000000 --- a/scylla/serialization_visitors.hh +++ /dev/null @@ -1,156 +0,0 @@ -/* - * Copyright 2016 ScylaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ -#pragma once - -#include "bytes_ostream.hh" -#include "serializer.hh" - -namespace ser { - -// frame represents a place holder for object size which will be known later - -template -struct place_holder { }; - -template -struct frame { }; - -template<> -struct place_holder { - bytes_ostream::place_holder ph; - - place_holder(bytes_ostream::place_holder ph) : ph(ph) { } - - void set(bytes_ostream& out, size_type v) { - auto stream = ph.get_stream(); - serialize(stream, v); - } -}; - -template<> -struct frame : public place_holder { - bytes_ostream::size_type offset; - - frame(bytes_ostream::place_holder ph, bytes_ostream::size_type offset) - : place_holder(ph), offset(offset) { } - - void end(bytes_ostream& out) { - set(out, out.size() - offset); - } -}; - -struct vector_position { - bytes_ostream::position pos; - size_type count; -}; - -//empty frame, behave like a place holder, but is used when no place holder is needed -template -struct empty_frame { - void end(Output&) {} - empty_frame() = default; - empty_frame(const frame&){} -}; - -inline place_holder start_place_holder(bytes_ostream& out) { - auto size_ph = out.write_place_holder(); - return { size_ph}; -} - -inline frame start_frame(bytes_ostream& out) { - auto offset = out.size(); - auto size_ph = out.write_place_holder(); - { - auto out = size_ph.get_stream(); - serialize(out, (size_type)0); - } - return frame { size_ph, offset }; -} - -template -size_type read_frame_size(Input& in) { - auto sz = deserialize(in, boost::type()); - if (sz < sizeof(size_type)) { - throw std::runtime_error("Truncated frame"); - } - return sz - sizeof(size_type); -} - - -template<> -struct place_holder { - void set(seastar::measuring_output_stream&, size_type) { } -}; - -template<> -struct frame : public place_holder { - void end(seastar::measuring_output_stream& out) { } -}; - -inline place_holder start_place_holder(seastar::measuring_output_stream& out) { - serialize(out, size_type()); - return { }; -} - -inline frame start_frame(seastar::measuring_output_stream& out) { - serialize(out, size_type()); - return { }; -} - -template<> -class place_holder { - seastar::simple_output_stream _substream; -public: - place_holder(seastar::simple_output_stream substream) - : _substream(substream) { } - - void set(seastar::simple_output_stream& out, size_type v) { - serialize(_substream, v); - } -}; - -template<> -class frame : public place_holder { - char* _start; -public: - frame(seastar::simple_output_stream ph, char* start) - : place_holder(ph), _start(start) { } - - void end(seastar::simple_output_stream& out) { - set(out, out.begin() - _start); - } -}; - -inline place_holder start_place_holder(seastar::simple_output_stream& out) { - return { out.write_substream(sizeof(size_type)) }; -} - -inline frame start_frame(seastar::simple_output_stream& out) { - auto start = out.begin(); - auto substream = out.write_substream(sizeof(size_type)); - { - auto sstr = substream; - serialize(sstr, size_type(0)); - } - return frame(substream, start); -} - -} diff --git a/scylla/serializer.hh b/scylla/serializer.hh deleted file mode 100644 index 0af62e3..0000000 --- a/scylla/serializer.hh +++ /dev/null @@ -1,160 +0,0 @@ -/* - * Copyright 2016 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ -#pragma once - -#include -#include -#include "core/sstring.hh" -#include -#include -#include "enum_set.hh" -#include "utils/managed_bytes.hh" -#include "bytes_ostream.hh" -#include "core/simple-stream.hh" -#include "boost/variant/variant.hpp" -#include "bytes_ostream.hh" -#include "utils/input_stream.hh" - -namespace ser { -using size_type = uint32_t; - -template -inline T deserialize_integral(Input& input) { - static_assert(std::is_integral::value, "T should be integral"); - T data; - input.read(reinterpret_cast(&data), sizeof(T)); - return le_to_cpu(data); -} - -template -inline void serialize_integral(Output& output, T data) { - static_assert(std::is_integral::value, "T should be integral"); - data = cpu_to_le(data); - output.write(reinterpret_cast(&data), sizeof(T)); -} - -template -struct serializer; - -template -struct integral_serializer { - template - static T read(Input& v) { - return deserialize_integral(v); - } - template - static void write(Output& out, T v) { - serialize_integral(out, v); - } - template - static void skip(Input& v) { - read(v); - } -}; - -template<> struct serializer : public integral_serializer {}; -template<> struct serializer : public integral_serializer {}; -template<> struct serializer : public integral_serializer {}; -template<> struct serializer : public integral_serializer {}; -template<> struct serializer : public integral_serializer {}; -template<> struct serializer : public integral_serializer {}; -template<> struct serializer : public integral_serializer {}; -template<> struct serializer : public integral_serializer {}; -template<> struct serializer : public integral_serializer {}; - -template -void safe_serialize_as_uint32(Output& output, uint64_t data); - -template -inline void serialize(Output& out, const T& v) { - serializer::write(out, v); -}; - -template -inline auto deserialize(Input& in, boost::type t) { - return serializer::read(in); -}; - -template -inline void skip(Input& v, boost::type) { - return serializer::skip(v); -} - -template -size_type get_sizeof(const T& obj); - -template -void set_size(seastar::measuring_output_stream& os, const T& obj); - -template -void set_size(Stream& os, const T& obj); - -template -Buffer serialize_to_buffer(const T& v, size_t head_space = 0); - -template -T deserialize_from_buffer(const Buffer&, boost::type, size_t head_space = 0); - -template -void serialize(Output& out, const boost::variant& v); - -template -boost::variant deserialize(Input& in, boost::type>); - -struct unknown_variant_type { - size_type index; - sstring data; -}; - -template -void serialize(Output& out, const unknown_variant_type& v); - -template -unknown_variant_type deserialize(Input& in, boost::type); - -template -struct normalize { - using type = T; -}; - -template <> -struct normalize { - using type = bytes; -}; - -template <> -struct normalize { - using type = bytes; -}; - -template <> -struct normalize { - using type = bytes; -}; - -template -struct is_equivalent : std::is_same>>::type, typename normalize>>::type> { -}; -} - -/* - * Import the auto generated forward decleration code - */ diff --git a/scylla/serializer_impl.hh b/scylla/serializer_impl.hh deleted file mode 100644 index 7530953..0000000 --- a/scylla/serializer_impl.hh +++ /dev/null @@ -1,506 +0,0 @@ -/* - * Copyright 2016 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "serializer.hh" -#include - -namespace ser { - -template -void set_size(seastar::measuring_output_stream& os, const T& obj) { - serialize(os, uint32_t(0)); -} - -template -void set_size(Stream& os, const T& obj) { - serialize(os, get_sizeof(obj)); -} - - -template -void safe_serialize_as_uint32(Output& out, uint64_t data) { - if (data > std::numeric_limits::max()) { - throw std::runtime_error("Size is too big for serialization"); - } - serialize(out, uint32_t(data)); -} - -template -constexpr bool can_serialize_fast() { - return !std::is_same::value && std::is_integral::value && (sizeof(T) == 1 || __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__); -} - -template -struct serialize_array_helper; - -template -struct serialize_array_helper { - template - static void doit(Output& out, const Container& v) { - out.write(reinterpret_cast(v.data()), v.size() * sizeof(T)); - } -}; - -template -struct serialize_array_helper { - template - static void doit(Output& out, const Container& v) { - for (auto&& e : v) { - serialize(out, e); - } - } -}; - -template -static inline void serialize_array(Output& out, const Container& v) { - serialize_array_helper(), T>::doit(out, v); -} - -template -struct container_traits; - -template -struct container_traits> { - struct back_emplacer { - std::vector& c; - back_emplacer(std::vector& c_) : c(c_) {} - void operator()(T&& v) { - c.emplace_back(std::move(v)); - } - }; - void resize(std::vector& c, size_t size) { - c.resize(size); - } -}; - -template -struct container_traits> { - struct back_emplacer { - std::array& c; - size_t idx = 0; - back_emplacer(std::array& c_) : c(c_) {} - void operator()(T&& v) { - c[idx++] = std::move(v); - } - }; - void resize(std::array& c, size_t size) {} -}; - -template -struct deserialize_array_helper; - -template -struct deserialize_array_helper { - template - static void doit(Input& in, Container& v, size_t sz) { - container_traits t; - t.resize(v, sz); - in.read(reinterpret_cast(v.data()), v.size() * sizeof(T)); - } - template - static void skip(Input& in, size_t sz) { - in.skip(sz * sizeof(T)); - } -}; - -template -struct deserialize_array_helper { - template - static void doit(Input& in, Container& v, size_t sz) { - typename container_traits::back_emplacer be(v); - while (sz--) { - be(deserialize(in, boost::type())); - } - } - template - static void skip(Input& in, size_t sz) { - while (sz--) { - serializer::skip(in); - } - } -}; - -template -static inline void deserialize_array(Input& in, Container& v, size_t sz) { - deserialize_array_helper(), T>::doit(in, v, sz); -} - -template -static inline void skip_array(Input& in, size_t sz) { - deserialize_array_helper(), T>::skip(in, sz); -} - -template -struct serializer> { - template - static std::vector read(Input& in) { - auto sz = deserialize(in, boost::type()); - std::vector v; - v.reserve(sz); - deserialize_array(in, v, sz); - return v; - } - template - static void write(Output& out, const std::vector& v) { - safe_serialize_as_uint32(out, v.size()); - serialize_array(out, v); - } - template - static void skip(Input& in) { - auto sz = deserialize(in, boost::type()); - skip_array(in, sz); - } -}; - -template -struct serializer> { - template - static std::chrono::duration read(Input& in) { - return std::chrono::duration(deserialize(in, boost::type())); - } - template - static void write(Output& out, const std::chrono::duration& d) { - serialize(out, d.count()); - } - template - static void skip(Input& in) { - read(in); - } -}; - -template -struct serializer> { - using value_type = std::chrono::time_point; - - template - static value_type read(Input& in) { - return typename Clock::time_point(Duration(deserialize(in, boost::type()))); - } - template - static void write(Output& out, const value_type& v) { - serialize(out, uint64_t(v.time_since_epoch().count())); - } - template - static void skip(Input& in) { - read(in); - } -}; - -template -struct serializer> { - template - static std::array read(Input& in) { - std::array v; - deserialize_array(in, v, N); - return v; - } - template - static void write(Output& out, const std::array& v) { - serialize_array(out, v); - } - template - static void skip(Input& in) { - skip_array(in, N); - } -}; - -template -struct serializer> { - template - static std::map read(Input& in) { - auto sz = deserialize(in, boost::type()); - std::map m; - while (sz--) { - K k = deserialize(in, boost::type()); - V v = deserialize(in, boost::type()); - m[k] = v; - } - return m; - } - template - static void write(Output& out, const std::map& v) { - safe_serialize_as_uint32(out, v.size()); - for (auto&& e : v) { - serialize(out, e.first); - serialize(out, e.second); - } - } - template - static void skip(Input& in) { - auto sz = deserialize(in, boost::type()); - while (sz--) { - serializer::skip(in); - serializer::skip(in); - } - } -}; - -template -struct serializer> { - template - static bool_class read(Input& in) { - return bool_class(deserialize(in, boost::type())); - } - - template - static void write(Output& out, bool_class v) { - serialize(out, bool(v)); - } - - template - static void skip(Input& in) { - read(in); - } -}; - -template -class deserialized_bytes_proxy { - seastar::memory_input_stream _stream; -public: - explicit deserialized_bytes_proxy(seastar::memory_input_stream stream) - : _stream(std::move(stream)) { } - - [[gnu::always_inline]] - operator bytes() && { - bytes v(bytes::initialized_later(), _stream.size()); - _stream.read(reinterpret_cast(v.begin()), _stream.size()); - return v; - } - - [[gnu::always_inline]] - operator managed_bytes() && { - managed_bytes v(managed_bytes::initialized_later(), _stream.size()); - _stream.read(reinterpret_cast(v.begin()), _stream.size()); - return v; - } - - [[gnu::always_inline]] - operator bytes_ostream() && { - bytes_ostream v; - _stream.copy_to(v); - return v; - } -}; - -template<> -struct serializer { - template - static deserialized_bytes_proxy read(Input& in) { - auto sz = deserialize(in, boost::type()); - return deserialized_bytes_proxy(in.read_substream(sz)); - } - template - static void write(Output& out, bytes_view v) { - safe_serialize_as_uint32(out, uint32_t(v.size())); - out.write(reinterpret_cast(v.begin()), v.size()); - } - template - static void write(Output& out, const bytes& v) { - write(out, static_cast(v)); - } - template - static void write(Output& out, const managed_bytes& v) { - write(out, static_cast(v)); - } - template - static void write(Output& out, const bytes_ostream& v) { - safe_serialize_as_uint32(out, uint32_t(v.size())); - for (bytes_view frag : v.fragments()) { - out.write(reinterpret_cast(frag.begin()), frag.size()); - } - } - template - static void skip(Input& in) { - auto sz = deserialize(in, boost::type()); - in.skip(sz); - } -}; - -template -void serialize(Output& out, const bytes_view& v) { - serializer::write(out, v); -} -template -void serialize(Output& out, const managed_bytes& v) { - serializer::write(out, v); -} -template -void serialize(Output& out, const bytes_ostream& v) { - serializer::write(out, v); -} - -template -struct serializer> { - template - static std::experimental::optional read(Input& in) { - std::experimental::optional v; - auto b = deserialize(in, boost::type()); - if (b) { - v = deserialize(in, boost::type()); - } - return v; - } - template - static void write(Output& out, const std::experimental::optional& v) { - serialize(out, bool(v)); - if (v) { - serialize(out, v.value()); - } - } - template - static void skip(Input& in) { - auto present = deserialize(in, boost::type()); - if (present) { - serializer::skip(in); - } - } -}; - -template<> -struct serializer { - template - static sstring read(Input& in) { - auto sz = deserialize(in, boost::type()); - sstring v(sstring::initialized_later(), sz); - in.read(v.begin(), sz); - return v; - } - template - static void write(Output& out, const sstring& v) { - safe_serialize_as_uint32(out, uint32_t(v.size())); - out.write(v.begin(), v.size()); - } - template - static void skip(Input& in) { - in.skip(deserialize(in, boost::type())); - } -}; - -template -struct serializer> { - template - static std::unique_ptr read(Input& in) { - std::unique_ptr v; - auto b = deserialize(in, boost::type()); - if (b) { - v = std::make_unique(deserialize(in, boost::type())); - } - return v; - } - template - static void write(Output& out, const std::unique_ptr& v) { - serialize(out, bool(v)); - if (v) { - serialize(out, *v); - } - } - template - static void skip(Input& in) { - auto present = deserialize(in, boost::type()); - if (present) { - serializer::skip(in); - } - } -}; - -template -struct serializer> { - template - static enum_set read(Input& in) { - return enum_set::from_mask(deserialize(in, boost::type())); - } - template - static void write(Output& out, enum_set v) { - serialize(out, uint64_t(v.mask())); - } - template - static void skip(Input& in) { - read(in); - } -}; - -template -size_type get_sizeof(const T& obj) { - seastar::measuring_output_stream ms; - serialize(ms, obj); - auto size = ms.size(); - if (size > std::numeric_limits::max()) { - throw std::runtime_error("Object is too big for get_sizeof"); - } - return size; -} - -template -Buffer serialize_to_buffer(const T& v, size_t head_space) { - seastar::measuring_output_stream measure; - ser::serialize(measure, v); - Buffer ret(typename Buffer::initialized_later(), measure.size() + head_space); - seastar::simple_output_stream out(reinterpret_cast(ret.begin()), ret.size(), head_space); - ser::serialize(out, v); - return ret; -} - -template -T deserialize_from_buffer(const Buffer& buf, boost::type type, size_t head_space) { - seastar::simple_input_stream in(reinterpret_cast(buf.begin() + head_space), buf.size() - head_space); - return deserialize(in, std::move(type)); -} - -inline -utils::input_stream as_input_stream(bytes_view b) { - return utils::input_stream::simple(reinterpret_cast(b.begin()), b.size()); -} - -inline -utils::input_stream as_input_stream(const bytes_ostream& b) { - if (b.is_linearized()) { - return as_input_stream(b.view()); - } - return utils::input_stream::fragmented(b.fragments().begin(), b.size()); -} - -template -void serialize(Output& out, const boost::variant& v) {} - -template -boost::variant deserialize(Input& in, boost::type>) { - return boost::variant(); -} - - -template -void serialize(Output& out, const unknown_variant_type& v) { - out.write(v.data.begin(), v.data.size()); -} -template -unknown_variant_type deserialize(Input& in, boost::type) { - return seastar::with_serialized_stream(in, [] (auto& in) { - auto size = deserialize(in, boost::type()); - auto index = deserialize(in, boost::type()); - auto sz = size - sizeof(size_type) * 2; - sstring v(sstring::initialized_later(), sz); - in.read(v.begin(), sz); - return unknown_variant_type{ index, std::move(v) }; - }); -} -} diff --git a/scylla/service/cache_hitrate_calculator.hh b/scylla/service/cache_hitrate_calculator.hh deleted file mode 100644 index 13f45eb..0000000 --- a/scylla/service/cache_hitrate_calculator.hh +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (C) 2016 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "database.hh" -#include "core/timer.hh" -#include "core/sharded.hh" - -namespace service { - -class cache_hitrate_calculator : public seastar::async_sharded_service { - seastar::sharded& _db; - seastar::sharded& _me; - timer _timer; - bool _stopped = false; - float _diff = 0; - - future recalculate_hitrates(); - void recalculate_timer(); -public: - cache_hitrate_calculator(seastar::sharded& db, seastar::sharded& me); - void run_on(size_t master, lowres_clock::duration d = std::chrono::milliseconds(2000)); - - future<> stop(); -}; - -} diff --git a/scylla/service/client_state.cc b/scylla/service/client_state.cc deleted file mode 100644 index 65b8e88..0000000 --- a/scylla/service/client_state.cc +++ /dev/null @@ -1,203 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2016 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "client_state.hh" -#include "auth/auth.hh" -#include "auth/authorizer.hh" -#include "auth/authenticator.hh" -#include "exceptions/exceptions.hh" -#include "validation.hh" -#include "db/system_keyspace.hh" -#include "db/schema_tables.hh" -#include "tracing/trace_keyspace_helper.hh" - -void service::client_state::set_login(::shared_ptr user) { - if (user == nullptr) { - throw std::invalid_argument("Must provide user"); - } - _user = std::move(user); -} - -future<> service::client_state::check_user_exists() { - if (_user->is_anonymous()) { - return make_ready_future(); - } - - return auth::auth::is_existing_user(_user->name()).then([user = _user](bool exists) mutable { - if (!exists) { - throw exceptions::authentication_exception( - sprint("User %s doesn't exist - create it with CREATE USER query first", - user->name())); - } - return make_ready_future(); - }); -} - -void service::client_state::validate_login() const { - if (!_user) { - throw exceptions::unauthorized_exception("You have not logged in"); - } -} - -void service::client_state::ensure_not_anonymous() const { - validate_login(); - if (_user->is_anonymous()) { - throw exceptions::unauthorized_exception("You have to be logged in and not anonymous to perform this request"); - } -} - -void service::client_state::merge(const client_state& other) { - if (other._dirty) { - _keyspace = other._keyspace; - } - if (_user == nullptr) { - _user = other._user; - } - _last_timestamp_micros = std::max(_last_timestamp_micros, other._last_timestamp_micros); -} - -future<> service::client_state::has_all_keyspaces_access( - auth::permission p) const { - if (_is_internal) { - return make_ready_future(); - } - validate_login(); - return ensure_has_permission(p, auth::data_resource()); -} - -future<> service::client_state::has_keyspace_access(const sstring& ks, - auth::permission p) const { - return has_access(ks, p, auth::data_resource(ks)); -} - -future<> service::client_state::has_column_family_access(const sstring& ks, - const sstring& cf, auth::permission p) const { - validation::validate_column_family(ks, cf); - return has_access(ks, p, auth::data_resource(ks, cf)); -} - -future<> service::client_state::has_schema_access(const schema& s, auth::permission p) const { - return has_access(s.ks_name(), p, auth::data_resource(s.ks_name(), s.cf_name())); -} - -future<> service::client_state::has_access(const sstring& ks, auth::permission p, auth::data_resource resource) const { - if (ks.empty()) { - throw exceptions::invalid_request_exception("You have not set a keyspace for this session"); - } - if (_is_internal) { - return make_ready_future(); - } - - validate_login(); - - // we only care about schema modification. - if (auth::permissions::ALTERATIONS.contains(p)) { - // prevent system keyspace modification - auto name = ks; - std::transform(name.begin(), name.end(), name.begin(), ::tolower); - if (is_system_keyspace(name)) { - throw exceptions::unauthorized_exception(ks + " keyspace is not user-modifiable."); - } - - // we want to allow altering AUTH_KS and TRACING_KS. - for (auto& n : { auth::auth::AUTH_KS, tracing::trace_keyspace_helper::KEYSPACE_NAME }) { - if (name == n && p == auth::permission::DROP) { - throw exceptions::unauthorized_exception(sprint("Cannot %s %s", auth::permissions::to_string(p), resource)); - } - } - } - - static thread_local std::set readable_system_resources = [] { - std::set tmp; - for (auto cf : { db::system_keyspace::LOCAL, db::system_keyspace::PEERS }) { - tmp.emplace(db::system_keyspace::NAME, cf); - } - for (auto cf : db::schema_tables::ALL) { - tmp.emplace(db::schema_tables::NAME, cf); - } - return tmp; - }(); - - if (p == auth::permission::SELECT && readable_system_resources.count(resource) != 0) { - return make_ready_future(); - } - if (auth::permissions::ALTERATIONS.contains(p)) { - for (auto& s : { auth::authorizer::get().protected_resources(), - auth::authenticator::get().protected_resources() }) { - if (s.count(resource)) { - throw exceptions::unauthorized_exception( - sprint("%s schema is protected", - resource)); - } - } - } - - return ensure_has_permission(p, std::move(resource)); -} - -future service::client_state::check_has_permission(auth::permission p, auth::data_resource resource) const { - std::experimental::optional parent; - if (resource.has_parent()) { - parent = resource.get_parent(); - } - - return auth::auth::get_permissions(_user, resource).then([this, p, parent = std::move(parent)](auth::permission_set set) { - if (set.contains(p)) { - return make_ready_future(true); - } - if (parent) { - return check_has_permission(p, std::move(*parent)); - } - return make_ready_future(false); - }); -} - -future<> service::client_state::ensure_has_permission(auth::permission p, auth::data_resource resource) const { - return check_has_permission(p, resource).then([this, p, resource](bool ok) { - if (!ok) { - throw exceptions::unauthorized_exception(sprint("User %s has no %s permission on %s or any of its parents", - _user->name(), - auth::permissions::to_string(p), - resource)); - } - }); -} - diff --git a/scylla/service/client_state.hh b/scylla/service/client_state.hh deleted file mode 100644 index e1a11b9..0000000 --- a/scylla/service/client_state.hh +++ /dev/null @@ -1,290 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "exceptions/exceptions.hh" -#include "unimplemented.hh" -#include "timestamp.hh" -#include "db_clock.hh" -#include "database.hh" -#include "auth/authenticated_user.hh" -#include "auth/authenticator.hh" -#include "auth/permission.hh" -#include "tracing/tracing.hh" -#include "tracing/trace_state.hh" - -namespace service { - -/** - * State related to a client connection. - */ -class client_state { -private: - sstring _keyspace; - tracing::trace_state_ptr _trace_state_ptr; - lw_shared_ptr _tracing_session_id; -#if 0 - private static final Logger logger = LoggerFactory.getLogger(ClientState.class); - public static final SemanticVersion DEFAULT_CQL_VERSION = org.apache.cassandra.cql3.QueryProcessor.CQL_VERSION; - - private static final Set READABLE_SYSTEM_RESOURCES = new HashSet<>(); - private static final Set PROTECTED_AUTH_RESOURCES = new HashSet<>(); - - static - { - // We want these system cfs to be always readable to authenticated users since many tools rely on them - // (nodetool, cqlsh, bulkloader, etc.) - for (String cf : Iterables.concat(Arrays.asList(SystemKeyspace.LOCAL, SystemKeyspace.PEERS), LegacySchemaTables.ALL)) - READABLE_SYSTEM_RESOURCES.add(DataResource.columnFamily(SystemKeyspace.NAME, cf)); - - PROTECTED_AUTH_RESOURCES.addAll(DatabaseDescriptor.getAuthenticator().protectedResources()); - PROTECTED_AUTH_RESOURCES.addAll(DatabaseDescriptor.getAuthorizer().protectedResources()); - } - - // Current user for the session - private volatile AuthenticatedUser user; - private volatile String keyspace; -#endif - ::shared_ptr _user; - - // isInternal is used to mark ClientState as used by some internal component - // that should have an ability to modify system keyspace. - bool _is_internal; - bool _is_thrift; - - // The biggest timestamp that was returned by getTimestamp/assigned to a query - api::timestamp_type _last_timestamp_micros = 0; - - bool _dirty = false; - - // Address of a client - socket_address _remote_address; - -public: - struct internal_tag {}; - struct external_tag {}; - - void create_tracing_session(tracing::trace_type type, tracing::trace_state_props_set props) { - _trace_state_ptr = tracing::tracing::get_local_tracing_instance().create_session(type, props); - // store a session ID separately because its lifetime is not always - // coupled with the trace_state because the trace_state may already be - // destroyed when we need a session ID for a response to a client (e.g. - // in case of errors). - if (_trace_state_ptr) { - _tracing_session_id = make_lw_shared(_trace_state_ptr->session_id()); - } - } - - tracing::trace_state_ptr& get_trace_state() { - return _trace_state_ptr; - } - - const tracing::trace_state_ptr& get_trace_state() const { - return _trace_state_ptr; - } - - client_state(external_tag, const socket_address& remote_address = socket_address(), bool thrift = false) - : _is_internal(false) - , _is_thrift(thrift) - , _remote_address(remote_address) { - if (!auth::authenticator::get().require_authentication()) { - _user = ::make_shared(); - } - } - - gms::inet_address get_client_address() const { - return gms::inet_address(_remote_address); - } - - client_state(internal_tag) : _keyspace("system"), _is_internal(true), _is_thrift(false) {} - - void merge(const client_state& other); - - bool is_thrift() const { - return _is_thrift; - } - - bool is_internal() const { - return _is_internal; - } - - /** - * @return a ClientState object for internal C* calls (not limited by any kind of auth). - */ - static client_state for_internal_calls() { - return client_state(internal_tag()); - } - - /** - * @return a ClientState object for external clients (thrift/native protocol users). - */ - static client_state for_external_calls() { - return client_state(external_tag()); - } - static client_state for_external_thrift_calls() { - return client_state(external_tag(), socket_address(), true); - } - - /** - * This clock guarantees that updates for the same ClientState will be ordered - * in the sequence seen, even if multiple updates happen in the same millisecond. - */ - api::timestamp_type get_timestamp() { - auto current = api::new_timestamp(); - auto last = _last_timestamp_micros; - auto result = last >= current ? last + 1 : current; - _last_timestamp_micros = result; - return result; - } - -#if 0 - /** - * Can be use when a timestamp has been assigned by a query, but that timestamp is - * not directly one returned by getTimestamp() (see SP.beginAndRepairPaxos()). - * This ensure following calls to getTimestamp() will return a timestamp strictly - * greated than the one provided to this method. - */ - public void updateLastTimestamp(long tstampMicros) - { - while (true) - { - long last = lastTimestampMicros.get(); - if (tstampMicros <= last || lastTimestampMicros.compareAndSet(last, tstampMicros)) - return; - } - } - - public SocketAddress getRemoteAddress() - { - return remoteAddress; - } -#endif - - const sstring& get_raw_keyspace() const { - return _keyspace; - } - -public: - void set_keyspace(seastar::sharded& db, sstring keyspace) { - // Skip keyspace validation for non-authenticated users. Apparently, some client libraries - // call set_keyspace() before calling login(), and we have to handle that. - if (_user && !db.local().has_keyspace(keyspace)) { - throw exceptions::invalid_request_exception(sprint("Keyspace '%s' does not exist", keyspace)); - } - _keyspace = keyspace; - _dirty = true; - } - - const sstring& get_keyspace() const { - if (_keyspace.empty()) { - throw exceptions::invalid_request_exception("No keyspace has been specified. USE a keyspace, or explicitly specify keyspace.tablename"); - } - return _keyspace; - } - - /** - * Sets active user. Does _not_ validate anything - */ - void set_login(::shared_ptr); - - /** - * Attempts to validate login for the set user. - */ - future<> check_user_exists(); - - future<> has_all_keyspaces_access(auth::permission) const; - future<> has_keyspace_access(const sstring&, auth::permission) const; - future<> has_column_family_access(const sstring&, const sstring&, auth::permission) const; - future<> has_schema_access(const schema& s, auth::permission p) const; - -private: - future<> has_access(const sstring&, auth::permission, auth::data_resource) const; - future check_has_permission(auth::permission, auth::data_resource) const; -public: - future<> ensure_has_permission(auth::permission, auth::data_resource) const; - - void validate_login() const; - void ensure_not_anonymous() const; // unauthorized_exception on error - -#if 0 - public void ensureIsSuper(String message) throws UnauthorizedException - { - if (DatabaseDescriptor.getAuthenticator().requireAuthentication() && (user == null || !user.isSuper())) - throw new UnauthorizedException(message); - } - - private static void validateKeyspace(String keyspace) throws InvalidRequestException - { - if (keyspace == null) - throw new InvalidRequestException("You have not set a keyspace for this session"); - } -#endif - - ::shared_ptr user() const { - return _user; - } - -#if 0 - public static SemanticVersion[] getCQLSupportedVersion() - { - return new SemanticVersion[]{ QueryProcessor.CQL_VERSION }; - } - - private Set authorize(IResource resource) - { - // AllowAllAuthorizer or manually disabled caching. - if (Auth.permissionsCache == null) - return DatabaseDescriptor.getAuthorizer().authorize(user, resource); - - try - { - return Auth.permissionsCache.get(Pair.create(user, resource)); - } - catch (ExecutionException e) - { - throw new RuntimeException(e); - } - } -#endif -}; - -} diff --git a/scylla/service/endpoint_lifecycle_subscriber.hh b/scylla/service/endpoint_lifecycle_subscriber.hh deleted file mode 100644 index 1bf428b..0000000 --- a/scylla/service/endpoint_lifecycle_subscriber.hh +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "gms/inet_address.hh" - -namespace service { - -/** - * Interface on which interested parties can be notified of high level endpoint - * state changes. - * - * Note that while IEndpointStateChangeSubscriber notify about gossip related - * changes (IEndpointStateChangeSubscriber.onJoin() is called when a node join - * gossip), this interface allows to be notified about higher level events. - */ -class endpoint_lifecycle_subscriber { -public: - virtual ~endpoint_lifecycle_subscriber() - { } - - /** - * Called when a new node joins the cluster, i.e. either has just been - * bootstrapped or "instajoins". - * - * @param endpoint the newly added endpoint. - */ - virtual void on_join_cluster(const gms::inet_address& endpoint) = 0; - - /** - * Called when a new node leave the cluster (decommission or removeToken). - * - * @param endpoint the endpoint that is leaving. - */ - virtual void on_leave_cluster(const gms::inet_address& endpoint) = 0; - - /** - * Called when a node is marked UP. - * - * @param endpoint the endpoint marked UP. - */ - virtual void on_up(const gms::inet_address& endpoint) = 0; - - /** - * Called when a node is marked DOWN. - * - * @param endpoint the endpoint marked DOWN. - */ - virtual void on_down(const gms::inet_address& endpoint) = 0; - - /** - * Called when a node has moved (to a new token). - * - * @param endpoint the endpoint that has moved. - */ - virtual void on_move(const gms::inet_address& endpoint) = 0; -}; - -} diff --git a/scylla/service/load_broadcaster.hh b/scylla/service/load_broadcaster.hh deleted file mode 100644 index 7c3c513..0000000 --- a/scylla/service/load_broadcaster.hh +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Modified by ScyllaDB - * Copyright 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "database.hh" -#include "gms/i_endpoint_state_change_subscriber.hh" -#include "gms/gossiper.hh" - -namespace service { -class load_broadcaster : public gms::i_endpoint_state_change_subscriber, public enable_shared_from_this -{ -public: - static constexpr std::chrono::milliseconds BROADCAST_INTERVAL{60 * 1000}; - -private: - distributed& _db; - gms::gossiper& _gossiper; - std::unordered_map _load_info; - timer<> _timer; - future<> _done = make_ready_future<>(); - -public: - load_broadcaster(distributed& db, gms::gossiper& g) : _db(db), _gossiper(g) { - _gossiper.register_(shared_from_this()); - } - ~load_broadcaster() { - _gossiper.unregister_(shared_from_this()); - } - - void on_change(gms::inet_address endpoint, gms::application_state state, const gms::versioned_value& value) { - if (state == gms::application_state::LOAD) { - _load_info[endpoint] = std::stod(value.value); - } - } - - void on_join(gms::inet_address endpoint, gms::endpoint_state ep_state) override { - auto local_value = ep_state.get_application_state(gms::application_state::LOAD); - if (local_value) { - on_change(endpoint, gms::application_state::LOAD, local_value.value()); - } - } - - void before_change(gms::inet_address endpoint, gms::endpoint_state current_state, gms::application_state new_state_key, const gms::versioned_value& newValue) {} - - void on_alive(gms::inet_address endpoint, gms::endpoint_state) override {} - - void on_dead(gms::inet_address endpoint, gms::endpoint_state) override {} - - void on_restart(gms::inet_address endpoint, gms::endpoint_state) override {} - - void on_remove(gms::inet_address endpoint) { - _load_info.erase(endpoint); - } - - const std::unordered_map get_load_info() const { - return _load_info; - } - - void start_broadcasting(); - future<> stop_broadcasting(); -}; -} diff --git a/scylla/service/migration_listener.hh b/scylla/service/migration_listener.hh deleted file mode 100644 index 1d201ba..0000000 --- a/scylla/service/migration_listener.hh +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include - -namespace service { - -class migration_listener { -public: - virtual ~migration_listener() - { } - - // The callback runs inside seastar thread - virtual void on_create_keyspace(const sstring& ks_name) = 0; - virtual void on_create_column_family(const sstring& ks_name, const sstring& cf_name) = 0; - virtual void on_create_user_type(const sstring& ks_name, const sstring& type_name) = 0; - virtual void on_create_function(const sstring& ks_name, const sstring& function_name) = 0; - virtual void on_create_aggregate(const sstring& ks_name, const sstring& aggregate_name) = 0; - virtual void on_create_view(const sstring& ks_name, const sstring& view_name) = 0; - - // The callback runs inside seastar thread - virtual void on_update_keyspace(const sstring& ks_name) = 0; - virtual void on_update_column_family(const sstring& ks_name, const sstring& cf_name, bool columns_changed) = 0; - virtual void on_update_user_type(const sstring& ks_name, const sstring& type_name) = 0; - virtual void on_update_function(const sstring& ks_name, const sstring& function_name) = 0; - virtual void on_update_aggregate(const sstring& ks_name, const sstring& aggregate_name) = 0; - virtual void on_update_view(const sstring& ks_name, const sstring& view_name, bool columns_changed) = 0; - - // The callback runs inside seastar thread - virtual void on_drop_keyspace(const sstring& ks_name) = 0; - virtual void on_drop_column_family(const sstring& ks_name, const sstring& cf_name) = 0; - virtual void on_drop_user_type(const sstring& ks_name, const sstring& type_name) = 0; - virtual void on_drop_function(const sstring& ks_name, const sstring& function_name) = 0; - virtual void on_drop_aggregate(const sstring& ks_name, const sstring& aggregate_name) = 0; - virtual void on_drop_view(const sstring& ks_name, const sstring& view_name) = 0; -}; - -} diff --git a/scylla/service/migration_manager.cc b/scylla/service/migration_manager.cc deleted file mode 100644 index 5700090..0000000 --- a/scylla/service/migration_manager.cc +++ /dev/null @@ -1,925 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "schema_registry.hh" -#include "service/migration_manager.hh" - -#include "service/migration_listener.hh" -#include "message/messaging_service.hh" -#include "service/storage_service.hh" -#include "service/migration_task.hh" -#include "utils/runtime.hh" -#include "gms/gossiper.hh" - -namespace service { - -static logging::logger mlogger("migration_manager"); - -distributed _the_migration_manager; - -using namespace std::chrono_literals; - -const std::chrono::milliseconds migration_manager::migration_delay = 60000ms; - -migration_manager::migration_manager() - : _listeners{} -{ -} - -future<> migration_manager::stop() -{ - uninit_messaging_service(); - return make_ready_future<>(); -} - -void migration_manager::init_messaging_service() -{ - auto& ms = netw::get_local_messaging_service(); - ms.register_definitions_update([this] (const rpc::client_info& cinfo, std::vector m) { - auto src = netw::messaging_service::get_source(cinfo); - do_with(std::move(m), get_local_shared_storage_proxy(), [src] (const std::vector& mutations, shared_ptr& p) { - return service::get_local_migration_manager().merge_schema_from(src, mutations); - }).then_wrapped([src] (auto&& f) { - if (f.failed()) { - mlogger.error("Failed to update definitions from {}: {}", src, f.get_exception()); - } else { - mlogger.debug("Applied definitions update from {}.", src); - } - }); - return netw::messaging_service::no_wait(); - }); - ms.register_migration_request([this] (const rpc::client_info& cinfo) { - auto src = netw::messaging_service::get_source(cinfo); - if (!has_compatible_schema_tables_version(src.addr)) { - mlogger.debug("Ignoring schema request from incompatible node: {}", src); - return make_ready_future>(std::vector()); - } - return db::schema_tables::convert_schema_to_mutations(get_storage_proxy()).finally([p = get_local_shared_storage_proxy()] { - // keep local proxy alive - }); - }); - ms.register_schema_check([] { - return make_ready_future(service::get_local_storage_service().db().local().get_version()); - }); -} - -void migration_manager::uninit_messaging_service() -{ - auto& ms = netw::get_local_messaging_service(); - ms.unregister_migration_request(); - ms.unregister_definitions_update(); - ms.unregister_schema_check(); -} - -void migration_manager::register_listener(migration_listener* listener) -{ - _listeners.emplace_back(listener); -} - -void migration_manager::unregister_listener(migration_listener* listener) -{ - _listeners.erase(std::remove(_listeners.begin(), _listeners.end(), listener), _listeners.end()); -} - -future<> migration_manager::schedule_schema_pull(const gms::inet_address& endpoint, const gms::endpoint_state& state) -{ - const auto& value = state.get_application_state(gms::application_state::SCHEMA); - - if (endpoint != utils::fb_utilities::get_broadcast_address() && value) { - return maybe_schedule_schema_pull(utils::UUID{value->value}, endpoint); - } - return make_ready_future<>(); -} - -bool migration_manager::is_ready_for_bootstrap() { - auto our_version = get_local_storage_proxy().get_db().local().get_version(); - bool match = false; - for (auto& x : gms::get_local_gossiper().endpoint_state_map) { - auto& endpoint = x.first; - auto& eps = x.second; - if (endpoint == utils::fb_utilities::get_broadcast_address() || !eps.is_alive()) { - continue; - } - mlogger.debug("Checking schema state for {}.", endpoint); - auto schema = eps.get_application_state(gms::application_state::SCHEMA); - if (!schema) { - mlogger.debug("Schema state not yet available for {}.", endpoint); - return false; - } - utils::UUID remote_version{schema->value}; - if (our_version != remote_version) { - mlogger.debug("Schema mismatch for {} ({} != {}).", endpoint, our_version, remote_version); - return false; - } else { - match = true; - } - } - return match; -} - -/** - * If versions differ this node sends request with local migration list to the endpoint - * and expecting to receive a list of migrations to apply locally. - */ -future<> migration_manager::maybe_schedule_schema_pull(const utils::UUID& their_version, const gms::inet_address& endpoint) -{ - auto& proxy = get_local_storage_proxy(); - auto& db = proxy.get_db().local(); - if (db.get_version() == their_version || !should_pull_schema_from(endpoint)) { - mlogger.debug("Not pulling schema because versions match or shouldPullSchemaFrom returned false"); - return make_ready_future<>(); - } - - if (db.get_version() == database::empty_version || runtime::get_uptime() < migration_delay) { - // If we think we may be bootstrapping or have recently started, submit MigrationTask immediately - mlogger.debug("Submitting migration task for {}", endpoint); - return submit_migration_task(endpoint); - } else { - // Include a delay to make sure we have a chance to apply any changes being - // pushed out simultaneously. See CASSANDRA-5025 - return sleep(migration_delay).then([this, &proxy, endpoint] { - // grab the latest version of the schema since it may have changed again since the initial scheduling - auto& gossiper = gms::get_local_gossiper(); - auto ep_state = gossiper.get_endpoint_state_for_endpoint(endpoint); - if (!ep_state) { - mlogger.debug("epState vanished for {}, not submitting migration task", endpoint); - return make_ready_future<>(); - } - const auto& value = ep_state->get_application_state(gms::application_state::SCHEMA); - utils::UUID current_version{value->value}; - auto& db = proxy.get_db().local(); - if (db.get_version() == current_version) { - mlogger.debug("not submitting migration task for {} because our versions match", endpoint); - return make_ready_future<>(); - } - mlogger.debug("submitting migration task for {}", endpoint); - return submit_migration_task(endpoint); - }); - } -} - -future<> migration_manager::submit_migration_task(const gms::inet_address& endpoint) -{ - return service::migration_task::run_may_throw(get_storage_proxy(), endpoint); -} - -future<> migration_manager::merge_schema_from(netw::messaging_service::msg_addr id) -{ - auto& ms = netw::get_local_messaging_service(); - return ms.send_migration_request(std::move(id)).then([this, id] (std::vector mutations) { - return do_with(std::move(mutations), [this, id] (auto&& mutations) { - return this->merge_schema_from(id, mutations); - }); - }); -} - -future<> migration_manager::merge_schema_from(netw::messaging_service::msg_addr src, const std::vector& mutations) -{ - mlogger.debug("Applying schema mutations from {}", src); - return map_reduce(mutations, [src](const frozen_mutation& fm) { - // schema table's schema is not syncable so just use get_schema_definition() - return get_schema_definition(fm.schema_version(), src).then([&fm](schema_ptr s) { - s->registry_entry()->mark_synced(); - return fm.unfreeze(std::move(s)); - }); - }, std::vector(), [](std::vector&& all, mutation&& m) { - all.emplace_back(std::move(m)); - return std::move(all); - }).then([](std::vector schema) { - return db::schema_tables::merge_schema(get_storage_proxy(), std::move(schema)); - }); -} - -bool migration_manager::has_compatible_schema_tables_version(const gms::inet_address& endpoint) { - auto& gossiper = gms::get_local_gossiper(); - auto ep_state = gossiper.get_endpoint_state_for_endpoint(endpoint); - if (!ep_state) { - return false; - } - auto&& version_opt = ep_state->get_application_state(gms::application_state::SCHEMA_TABLES_VERSION); - return version_opt && version_opt->value == db::schema_tables::version; -} - -bool migration_manager::should_pull_schema_from(const gms::inet_address& endpoint) { - return has_compatible_schema_tables_version(endpoint) - && !gms::get_local_gossiper().is_gossip_only_member(endpoint); -} - -future<> migration_manager::notify_create_keyspace(const lw_shared_ptr& ksm) { - return seastar::async([this, ksm] { - auto&& name = ksm->name(); - for (auto&& listener : _listeners) { - try { - listener->on_create_keyspace(name); - } catch (...) { - mlogger.warn("Create keyspace notification failed {}: {}", name, std::current_exception()); - } - } - }); -} - -future<> migration_manager::notify_create_column_family(const schema_ptr& cfm) { - return seastar::async([this, cfm] { - auto&& ks_name = cfm->ks_name(); - auto&& cf_name = cfm->cf_name(); - for (auto&& listener : _listeners) { - try { - listener->on_create_column_family(ks_name, cf_name); - } catch (...) { - mlogger.warn("Create column family notification failed {}.{}: {}", ks_name, cf_name, std::current_exception()); - } - } - }); -} - -future<> migration_manager::notify_create_user_type(const user_type& type) { - return seastar::async([this, type] { - auto&& ks_name = type->_keyspace; - auto&& type_name = type->get_name_as_string(); - for (auto&& listener : _listeners) { - try { - listener->on_create_user_type(ks_name, type_name); - } catch (...) { - mlogger.warn("Create user type notification failed {}.{}: {}", ks_name, type_name, std::current_exception()); - } - } - }); -} - -future<> migration_manager::notify_create_view(const view_ptr& view) { - return seastar::async([this, view] { - auto&& ks_name = view->ks_name(); - auto&& view_name = view->cf_name(); - for (auto&& listener : _listeners) { - try { - listener->on_create_view(ks_name, view_name); - } catch (...) { - mlogger.warn("Create view notification failed {}.{}: {}", ks_name, view_name, std::current_exception()); - } - } - }); -} - -#if 0 -public void notifyCreateFunction(UDFunction udf) -{ - for (IMigrationListener listener : listeners) - listener.onCreateFunction(udf.name().keyspace, udf.name().name); -} - -public void notifyCreateAggregate(UDAggregate udf) -{ - for (IMigrationListener listener : listeners) - listener.onCreateAggregate(udf.name().keyspace, udf.name().name); -} -#endif - -future<> migration_manager::notify_update_keyspace(const lw_shared_ptr& ksm) { - return seastar::async([this, ksm] { - auto&& name = ksm->name(); - for (auto&& listener : _listeners) { - try { - listener->on_update_keyspace(name); - } catch (...) { - mlogger.warn("Update keyspace notification failed {}: {}", name, std::current_exception()); - } - } - }); -} - -future<> migration_manager::notify_update_column_family(const schema_ptr& cfm, bool columns_changed) { - return seastar::async([this, cfm, columns_changed] { - auto&& ks_name = cfm->ks_name(); - auto&& cf_name = cfm->cf_name(); - for (auto&& listener : _listeners) { - try { - listener->on_update_column_family(ks_name, cf_name, columns_changed); - } catch (...) { - mlogger.warn("Update column family notification failed {}.{}: {}", ks_name, cf_name, std::current_exception()); - } - } - }); -} - -future<> migration_manager::notify_update_user_type(const user_type& type) { - return seastar::async([this, type] { - auto&& ks_name = type->_keyspace; - auto&& type_name = type->get_name_as_string(); - for (auto&& listener : _listeners) { - try { - listener->on_update_user_type(ks_name, type_name); - } catch (...) { - mlogger.warn("Update user type notification failed {}.{}: {}", ks_name, type_name, std::current_exception()); - } - } - }); -} - -future<> migration_manager::notify_update_view(const view_ptr& view, bool columns_changed) { - return seastar::async([this, view, columns_changed] { - auto&& ks_name = view->ks_name(); - auto&& view_name = view->cf_name(); - for (auto&& listener : _listeners) { - try { - listener->on_update_view(ks_name, view_name, columns_changed); - } catch (...) { - mlogger.warn("Update view notification failed {}.{}: {}", ks_name, view_name, std::current_exception()); - } - } - }); -} - -#if 0 -public void notifyUpdateFunction(UDFunction udf) -{ - for (IMigrationListener listener : listeners) - listener.onUpdateFunction(udf.name().keyspace, udf.name().name); -} - -public void notifyUpdateAggregate(UDAggregate udf) -{ - for (IMigrationListener listener : listeners) - listener.onUpdateAggregate(udf.name().keyspace, udf.name().name); -} -#endif - -future<> migration_manager::notify_drop_keyspace(const sstring& ks_name) { - return seastar::async([this, ks_name] { - for (auto&& listener : _listeners) { - try { - listener->on_drop_keyspace(ks_name); - } catch (...) { - mlogger.warn("Drop keyspace notification failed {}: {}", ks_name, std::current_exception()); - } - } - }); -} - -future<> migration_manager::notify_drop_column_family(const schema_ptr& cfm) { - return seastar::async([this, cfm] { - auto&& cf_name = cfm->cf_name(); - auto&& ks_name = cfm->ks_name(); - for (auto&& listener : _listeners) { - try { - listener->on_drop_column_family(ks_name, cf_name); - } catch (...) { - mlogger.warn("Drop column family notification failed {}.{}: {}", ks_name, cf_name, std::current_exception()); - } - } - }); -} - -future<> migration_manager::notify_drop_user_type(const user_type& type) { - return seastar::async([this, type] { - auto&& ks_name = type->_keyspace; - auto&& type_name = type->get_name_as_string(); - for (auto&& listener : _listeners) { - try { - listener->on_drop_user_type(ks_name, type_name); - } catch (...) { - mlogger.warn("Drop user type notification failed {}.{}: {}", ks_name, type_name, std::current_exception()); - } - } - }); -} - -future<> migration_manager::notify_drop_view(const view_ptr& view) { - return seastar::async([this, view] { - auto&& ks_name = view->ks_name(); - auto&& view_name = view->cf_name(); - for (auto&& listener : _listeners) { - try { - listener->on_drop_view(ks_name, view_name); - } catch (...) { - mlogger.warn("Drop view notification failed {}.{}: {}", ks_name, view_name, std::current_exception()); - } - } - }); -} - -#if 0 -public void notifyDropFunction(UDFunction udf) -{ - for (IMigrationListener listener : listeners) - listener.onDropFunction(udf.name().keyspace, udf.name().name); -} - -public void notifyDropAggregate(UDAggregate udf) -{ - for (IMigrationListener listener : listeners) - listener.onDropAggregate(udf.name().keyspace, udf.name().name); -} -#endif - -future<> migration_manager::announce_keyspace_update(lw_shared_ptr ksm, bool announce_locally) { - return announce_keyspace_update(ksm, api::new_timestamp(), announce_locally); -} - -future<> migration_manager::announce_keyspace_update(lw_shared_ptr ksm, api::timestamp_type timestamp, bool announce_locally) { - ksm->validate(); - auto& proxy = get_local_storage_proxy(); - if (!proxy.get_db().local().has_keyspace(ksm->name())) { - throw exceptions::configuration_exception(sprint("Cannot update non existing keyspace '%s'.", ksm->name())); - } - mlogger.info("Update Keyspace: {}", ksm); - auto mutations = db::schema_tables::make_create_keyspace_mutations(ksm, timestamp); - return announce(std::move(mutations), announce_locally); -} - -future<>migration_manager::announce_new_keyspace(lw_shared_ptr ksm, bool announce_locally) -{ - return announce_new_keyspace(ksm, api::new_timestamp(), announce_locally); -} - -future<> migration_manager::announce_new_keyspace(lw_shared_ptr ksm, api::timestamp_type timestamp, bool announce_locally) -{ - ksm->validate(); - auto& proxy = get_local_storage_proxy(); - if (proxy.get_db().local().has_keyspace(ksm->name())) { - throw exceptions::already_exists_exception{ksm->name()}; - } - mlogger.info("Create new Keyspace: {}", ksm); - auto mutations = db::schema_tables::make_create_keyspace_mutations(ksm, timestamp); - return announce(std::move(mutations), announce_locally); -} - -future<> migration_manager::announce_new_column_family(schema_ptr cfm, bool announce_locally) { -#if 0 - cfm.validate(); -#endif - try { - auto& db = get_local_storage_proxy().get_db().local(); - auto&& keyspace = db.find_keyspace(cfm->ks_name()); - if (db.has_schema(cfm->ks_name(), cfm->cf_name())) { - throw exceptions::already_exists_exception(cfm->ks_name(), cfm->cf_name()); - } - mlogger.info("Create new ColumnFamily: {}", cfm); - return db::schema_tables::make_create_table_mutations(keyspace.metadata(), cfm, api::new_timestamp()) - .then([announce_locally, this] (auto&& mutations) { - return announce(std::move(mutations), announce_locally); - }); - } catch (const no_such_keyspace& e) { - throw exceptions::configuration_exception(sprint("Cannot add table '%s' to non existing keyspace '%s'.", cfm->cf_name(), cfm->ks_name())); - } -} - -future<> migration_manager::announce_column_family_update(schema_ptr cfm, bool from_thrift, std::vector&& view_updates, bool announce_locally) { - warn(unimplemented::cause::VALIDATION); -#if 0 - cfm.validate(); -#endif - try { - auto ts = api::new_timestamp(); - auto& db = get_local_storage_proxy().get_db().local(); - auto&& old_schema = db.find_column_family(cfm->ks_name(), cfm->cf_name()).schema(); // FIXME: Should we lookup by id? -#if 0 - oldCfm.validateCompatility(cfm); -#endif - mlogger.info("Update table '{}.{}' From {} To {}", cfm->ks_name(), cfm->cf_name(), *old_schema, *cfm); - auto&& keyspace = db.find_keyspace(cfm->ks_name()).metadata(); - return db::schema_tables::make_update_table_mutations(keyspace, old_schema, cfm, ts, from_thrift) - .then([announce_locally, keyspace, ts, view_updates = std::move(view_updates)] (auto&& mutations) { - return map_reduce(view_updates, - [keyspace = std::move(keyspace), ts] (auto&& view) { - auto& old_view = keyspace->cf_meta_data().at(view->cf_name()); - mlogger.info("Update view '{}.{}' From {} To {}", view->ks_name(), view->cf_name(), *old_view, *view); - return db::schema_tables::make_update_view_mutations(keyspace, view_ptr(old_view), std::move(view), ts, false); - }, std::move(mutations), - [] (auto&& result, auto&& view_mutations) { - std::move(view_mutations.begin(), view_mutations.end(), std::back_inserter(result)); - return std::move(result); - }) - .then([announce_locally] (auto&& mutations) { - return announce(std::move(mutations), announce_locally); - }); - }); - } catch (const no_such_column_family& e) { - throw exceptions::configuration_exception(sprint("Cannot update non existing table '%s' in keyspace '%s'.", - cfm->cf_name(), cfm->ks_name())); - } -} - -static future<> do_announce_new_type(user_type new_type, bool announce_locally) { - auto& db = get_local_storage_proxy().get_db().local(); - auto&& keyspace = db.find_keyspace(new_type->_keyspace); - return db::schema_tables::make_create_type_mutations(keyspace.metadata(), new_type, api::new_timestamp()) - .then([announce_locally] (auto&& mutations) { - return migration_manager::announce(std::move(mutations), announce_locally); - }); -} - -future<> migration_manager::announce_new_type(user_type new_type, bool announce_locally) { - mlogger.info("Create new User Type: {}", new_type->get_name_as_string()); - return do_announce_new_type(new_type, announce_locally); -} - -future<> migration_manager::announce_type_update(user_type updated_type, bool announce_locally) { - mlogger.info("Update User Type: {}", updated_type->get_name_as_string()); - return do_announce_new_type(updated_type, announce_locally); -} - -#if 0 -public static void announceNewFunction(UDFunction udf, boolean announceLocally) -{ - mlogger.info(String.format("Create scalar function '%s'", udf.name())); - KSMetaData ksm = Schema.instance.getKSMetaData(udf.name().keyspace); - announce(LegacySchemaTables.makeCreateFunctionMutation(ksm, udf, FBUtilities.timestampMicros()), announceLocally); -} - -public static void announceNewAggregate(UDAggregate udf, boolean announceLocally) -{ - mlogger.info(String.format("Create aggregate function '%s'", udf.name())); - KSMetaData ksm = Schema.instance.getKSMetaData(udf.name().keyspace); - announce(LegacySchemaTables.makeCreateAggregateMutation(ksm, udf, FBUtilities.timestampMicros()), announceLocally); -} - -public static void announceKeyspaceUpdate(KSMetaData ksm) throws ConfigurationException -{ - announceKeyspaceUpdate(ksm, false); -} - -public static void announceKeyspaceUpdate(KSMetaData ksm, boolean announceLocally) throws ConfigurationException -{ - ksm.validate(); - - KSMetaData oldKsm = Schema.instance.getKSMetaData(ksm.name); - if (oldKsm == null) - throw new ConfigurationException(String.format("Cannot update non existing keyspace '%s'.", ksm.name)); - - mlogger.info(String.format("Update Keyspace '%s' From %s To %s", ksm.name, oldKsm, ksm)); - announce(LegacySchemaTables.makeCreateKeyspaceMutation(ksm, FBUtilities.timestampMicros()), announceLocally); -} - -public static void announceColumnFamilyUpdate(CFMetaData cfm, boolean fromThrift) throws ConfigurationException -{ - announceColumnFamilyUpdate(cfm, fromThrift, false); -} - -public static void announceColumnFamilyUpdate(CFMetaData cfm, boolean fromThrift, boolean announceLocally) throws ConfigurationException -{ - cfm.validate(); - - CFMetaData oldCfm = Schema.instance.getCFMetaData(cfm.ksName, cfm.cfName); - if (oldCfm == null) - throw new ConfigurationException(String.format("Cannot update non existing table '%s' in keyspace '%s'.", cfm.cfName, cfm.ksName)); - KSMetaData ksm = Schema.instance.getKSMetaData(cfm.ksName); - - oldCfm.validateCompatility(cfm); - - mlogger.info(String.format("Update table '%s/%s' From %s To %s", cfm.ksName, cfm.cfName, oldCfm, cfm)); - announce(LegacySchemaTables.makeUpdateTableMutation(ksm, oldCfm, cfm, FBUtilities.timestampMicros(), fromThrift), announceLocally); -} -#endif - -future<> migration_manager::announce_keyspace_drop(const sstring& ks_name, bool announce_locally) -{ - auto& db = get_local_storage_proxy().get_db().local(); - if (!db.has_keyspace(ks_name)) { - throw exceptions::configuration_exception(sprint("Cannot drop non existing keyspace '%s'.", ks_name)); - } - auto& keyspace = db.find_keyspace(ks_name); - mlogger.info("Drop Keyspace '{}'", ks_name); - auto&& mutations = db::schema_tables::make_drop_keyspace_mutations(keyspace.metadata(), api::new_timestamp()); - return announce(std::move(mutations), announce_locally); -} - -future<> migration_manager::announce_column_family_drop(const sstring& ks_name, - const sstring& cf_name, - bool announce_locally) -{ - try { - auto& db = get_local_storage_proxy().get_db().local(); - auto& old_cfm = db.find_column_family(ks_name, cf_name); - auto& schema = old_cfm.schema(); - if (schema->is_view()) { - throw exceptions::invalid_request_exception("Cannot use DROP TABLE on Materialized View"); - } - auto&& views = old_cfm.views(); - if (!views.empty()) { - throw exceptions::invalid_request_exception(sprint( - "Cannot drop table when materialized views still depend on it (%s.{%s})", - ks_name, ::join(", ", views | boost::adaptors::transformed([](auto&& v) { return v->cf_name(); })))); - } - mlogger.info("Drop table '{}.{}'", schema->ks_name(), schema->cf_name()); - return db::schema_tables::make_drop_table_mutations(db.find_keyspace(ks_name).metadata(), schema, api::new_timestamp()) - .then([announce_locally] (auto&& mutations) { - return announce(std::move(mutations), announce_locally); - }); - } catch (const no_such_column_family& e) { - throw exceptions::configuration_exception(sprint("Cannot drop non existing table '%s' in keyspace '%s'.", cf_name, ks_name)); - } -} - -future<> migration_manager::announce_type_drop(user_type dropped_type, bool announce_locally) -{ - auto& db = get_local_storage_proxy().get_db().local(); - auto&& keyspace = db.find_keyspace(dropped_type->_keyspace); - mlogger.info("Drop User Type: {}", dropped_type->get_name_as_string()); - return db::schema_tables::make_drop_type_mutations(keyspace.metadata(), dropped_type, api::new_timestamp()) - .then([announce_locally] (auto&& mutations) { - return announce(std::move(mutations), announce_locally); - }); -} - -future<> migration_manager::announce_new_view(view_ptr view, bool announce_locally) -{ -#if 0 - view.metadata.validate(); -#endif - auto& db = get_local_storage_proxy().get_db().local(); - try { - auto&& keyspace = db.find_keyspace(view->ks_name()).metadata(); - if (keyspace->cf_meta_data().find(view->cf_name()) != keyspace->cf_meta_data().end()) { - throw exceptions::already_exists_exception(view->ks_name(), view->cf_name()); - } - mlogger.info("Create new view: {}", view); - return db::schema_tables::make_create_view_mutations(keyspace, std::move(view), api::new_timestamp()) - .then([announce_locally] (auto&& mutations) { - return announce(std::move(mutations), announce_locally); - }); - } catch (const no_such_keyspace& e) { - throw exceptions::configuration_exception(sprint("Cannot add view '%s' to non existing keyspace '%s'.", view->cf_name(), view->ks_name())); - } -} - -future<> migration_manager::announce_view_update(view_ptr view, bool announce_locally) -{ -#if 0 - view.metadata.validate(); -#endif - auto& db = get_local_storage_proxy().get_db().local(); - try { - auto&& keyspace = db.find_keyspace(view->ks_name()).metadata(); - auto& old_view = keyspace->cf_meta_data().at(view->cf_name()); - if (!old_view->is_view()) { - throw exceptions::invalid_request_exception("Cannot use ALTER MATERIALIZED VIEW on Table"); - } -#if 0 - oldCfm.validateCompatility(cfm); -#endif - mlogger.info("Update view '{}.{}' From {} To {}", view->ks_name(), view->cf_name(), *old_view, *view); - return db::schema_tables::make_update_view_mutations(std::move(keyspace), view_ptr(old_view), std::move(view), api::new_timestamp(), true) - .then([announce_locally] (auto&& mutations) { - return announce(std::move(mutations), announce_locally); - }); - } catch (const std::out_of_range& e) { - throw exceptions::configuration_exception(sprint("Cannot update non existing materialized view '%s' in keyspace '%s'.", - view->cf_name(), view->ks_name())); - } -} - -future<> migration_manager::announce_view_drop(const sstring& ks_name, - const sstring& cf_name, - bool announce_locally) -{ - auto& db = get_local_storage_proxy().get_db().local(); - try { - auto& view = db.find_column_family(ks_name, cf_name).schema(); - if (!view->is_view()) { - throw exceptions::invalid_request_exception("Cannot use DROP MATERIALIZED VIEW on Table"); - } - auto keyspace = db.find_keyspace(ks_name).metadata(); - mlogger.info("Drop view '{}.{}'", view->ks_name(), view->cf_name()); - return db::schema_tables::make_drop_view_mutations(std::move(keyspace), view_ptr(std::move(view)), api::new_timestamp()) - .then([announce_locally] (auto&& mutations) { - return announce(std::move(mutations), announce_locally); - }); - } catch (const no_such_column_family& e) { - throw exceptions::configuration_exception(sprint("Cannot drop non existing materialized view '%s' in keyspace '%s'.", - cf_name, ks_name)); - } -} - -#if 0 -public static void announceFunctionDrop(UDFunction udf, boolean announceLocally) -{ - mlogger.info(String.format("Drop scalar function overload '%s' args '%s'", udf.name(), udf.argTypes())); - KSMetaData ksm = Schema.instance.getKSMetaData(udf.name().keyspace); - announce(LegacySchemaTables.makeDropFunctionMutation(ksm, udf, FBUtilities.timestampMicros()), announceLocally); -} - -public static void announceAggregateDrop(UDAggregate udf, boolean announceLocally) -{ - mlogger.info(String.format("Drop aggregate function overload '%s' args '%s'", udf.name(), udf.argTypes())); - KSMetaData ksm = Schema.instance.getKSMetaData(udf.name().keyspace); - announce(LegacySchemaTables.makeDropAggregateMutation(ksm, udf, FBUtilities.timestampMicros()), announceLocally); -} -#endif - -/** - * actively announce a new version to active hosts via rpc - * @param schema The schema mutation to be applied - */ -future<> migration_manager::announce(mutation schema, bool announce_locally) -{ - std::vector mutations; - mutations.emplace_back(std::move(schema)); - return announce(std::move(mutations), announce_locally); -} - -future<> migration_manager::announce(std::vector mutations, bool announce_locally) -{ - if (announce_locally) { - return db::schema_tables::merge_schema(get_storage_proxy(), std::move(mutations), false); - } else { - return announce(std::move(mutations)); - } -} - -future<> migration_manager::push_schema_mutation(const gms::inet_address& endpoint, const std::vector& schema) -{ - netw::messaging_service::msg_addr id{endpoint, 0}; - auto fm = std::vector(schema.begin(), schema.end()); - return netw::get_local_messaging_service().send_definitions_update(id, std::move(fm)); -} - -// Returns a future on the local application of the schema -future<> migration_manager::announce(std::vector schema) { - auto f = db::schema_tables::merge_schema(get_storage_proxy(), schema); - - return do_with(std::move(schema), [live_members = gms::get_local_gossiper().get_live_members()](auto && schema) { - return parallel_for_each(live_members.begin(), live_members.end(), [&schema](auto& endpoint) { - // only push schema to nodes with known and equal versions - if (endpoint != utils::fb_utilities::get_broadcast_address() && - netw::get_local_messaging_service().knows_version(endpoint) && - netw::get_local_messaging_service().get_raw_version(endpoint) == - netw::messaging_service::current_version) { - return push_schema_mutation(endpoint, schema); - } else { - return make_ready_future<>(); - } - }); - }).then([f = std::move(f)] () mutable { return std::move(f); }); -} - -/** - * Announce my version passively over gossip. - * Used to notify nodes as they arrive in the cluster. - * - * @param version The schema version to announce - */ -future<> migration_manager::passive_announce(utils::UUID version) { - return gms::get_gossiper().invoke_on(0, [version] (auto&& gossiper) { - auto& ss = service::get_local_storage_service(); - mlogger.debug("Gossiping my schema version {}", version); - return gossiper.add_local_application_state(gms::application_state::SCHEMA, ss.value_factory.schema(version)); - }); -} - -#if 0 -/** - * Clear all locally stored schema information and reset schema to initial state. - * Called by user (via JMX) who wants to get rid of schema disagreement. - * - * @throws IOException if schema tables truncation fails - */ -public static void resetLocalSchema() throws IOException -{ - mlogger.info("Starting local schema reset..."); - - mlogger.debug("Truncating schema tables..."); - - LegacySchemaTables.truncateSchemaTables(); - - mlogger.debug("Clearing local schema keyspace definitions..."); - - Schema.instance.clear(); - - Set liveEndpoints = Gossiper.instance.getLiveMembers(); - liveEndpoints.remove(FBUtilities.getBroadcastAddress()); - - // force migration if there are nodes around - for (InetAddress node : liveEndpoints) - { - if (shouldPullSchemaFrom(node)) - { - mlogger.debug("Requesting schema from {}", node); - FBUtilities.waitOnFuture(submitMigrationTask(node)); - break; - } - } - - mlogger.info("Local schema reset is complete."); -} - -public static class MigrationsSerializer implements IVersionedSerializer> -{ - public static MigrationsSerializer instance = new MigrationsSerializer(); - - public void serialize(Collection schema, DataOutputPlus out, int version) throws IOException - { - out.writeInt(schema.size()); - for (Mutation mutation : schema) - Mutation.serializer.serialize(mutation, out, version); - } - - public Collection deserialize(DataInput in, int version) throws IOException - { - int count = in.readInt(); - Collection schema = new ArrayList<>(count); - - for (int i = 0; i < count; i++) - schema.add(Mutation.serializer.deserialize(in, version)); - - return schema; - } - - public long serializedSize(Collection schema, int version) - { - int size = TypeSizes.NATIVE.sizeof(schema.size()); - for (Mutation mutation : schema) - size += Mutation.serializer.serializedSize(mutation, version); - return size; - } -} -#endif - - -// Ensure that given schema version 's' was synced with on current node. See schema::is_synced(). -// -// The endpoint is the node from which 's' originated. -// -static future<> maybe_sync(const schema_ptr& s, netw::messaging_service::msg_addr endpoint) { - if (s->is_synced()) { - return make_ready_future<>(); - } - - return s->registry_entry()->maybe_sync([s, endpoint] { - auto merge = [gs = global_schema_ptr(s), endpoint] { - schema_ptr s = gs.get(); - mlogger.debug("Syncing schema of {}.{} (v={}) with {}", s->ks_name(), s->cf_name(), s->version(), endpoint); - return get_local_migration_manager().merge_schema_from(endpoint); - }; - - // Serialize schema sync by always doing it on shard 0. - if (engine().cpu_id() == 0) { - return merge(); - } else { - return smp::submit_to(0, [gs = global_schema_ptr(s), endpoint, merge] { - schema_ptr s = gs.get(); - schema_registry_entry& e = *s->registry_entry(); - return e.maybe_sync(merge); - }); - } - }); -} - -future get_schema_definition(table_schema_version v, netw::messaging_service::msg_addr dst) { - return local_schema_registry().get_or_load(v, [dst] (table_schema_version v) { - mlogger.debug("Requesting schema {} from {}", v, dst); - auto& ms = netw::get_local_messaging_service(); - return ms.send_get_schema_version(dst, v); - }); -} - -future get_schema_for_read(table_schema_version v, netw::messaging_service::msg_addr dst) { - return get_schema_definition(v, dst); -} - -future get_schema_for_write(table_schema_version v, netw::messaging_service::msg_addr dst) { - return get_schema_definition(v, dst).then([dst] (schema_ptr s) { - return maybe_sync(s, dst).then([s] { - return s; - }); - }); -} - -} diff --git a/scylla/service/migration_manager.hh b/scylla/service/migration_manager.hh deleted file mode 100644 index 29aa44a..0000000 --- a/scylla/service/migration_manager.hh +++ /dev/null @@ -1,175 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "service/migration_listener.hh" -#include "gms/endpoint_state.hh" -#include "db/schema_tables.hh" -#include "core/distributed.hh" -#include "gms/inet_address.hh" -#include "message/messaging_service_fwd.hh" -#include "utils/UUID.hh" - -#include - -namespace service { - -class migration_manager : public seastar::async_sharded_service { - std::vector _listeners; - - static const std::chrono::milliseconds migration_delay; -public: - migration_manager(); - - /// Register a migration listener on current shard. - void register_listener(migration_listener* listener); - - /// Unregister a migration listener on current shard. - void unregister_listener(migration_listener* listener); - - future<> schedule_schema_pull(const gms::inet_address& endpoint, const gms::endpoint_state& state); - - future<> maybe_schedule_schema_pull(const utils::UUID& their_version, const gms::inet_address& endpoint); - - future<> submit_migration_task(const gms::inet_address& endpoint); - - // Fetches schema from remote node and applies it locally. - // Differs from submit_migration_task() in that all errors are propagated. - future<> merge_schema_from(netw::msg_addr); - - // Merge mutations received from src. - // Keep mutations alive around whole async operation. - future<> merge_schema_from(netw::msg_addr src, const std::vector& mutations); - - future<> notify_create_keyspace(const lw_shared_ptr& ksm); - future<> notify_create_column_family(const schema_ptr& cfm); - future<> notify_create_user_type(const user_type& type); - future<> notify_create_view(const view_ptr& view); - future<> notify_update_keyspace(const lw_shared_ptr& ksm); - future<> notify_update_column_family(const schema_ptr& cfm, bool columns_changed); - future<> notify_update_user_type(const user_type& type); - future<> notify_update_view(const view_ptr& view, bool columns_changed); - future<> notify_drop_keyspace(const sstring& ks_name); - future<> notify_drop_column_family(const schema_ptr& cfm); - future<> notify_drop_user_type(const user_type& type); - future<> notify_drop_view(const view_ptr& view); - - bool should_pull_schema_from(const gms::inet_address& endpoint); - bool has_compatible_schema_tables_version(const gms::inet_address& endpoint); - - future<> announce_keyspace_update(lw_shared_ptr ksm, bool announce_locally = false); - - future<> announce_keyspace_update(lw_shared_ptr ksm, api::timestamp_type timestamp, bool announce_locally); - - future<> announce_new_keyspace(lw_shared_ptr ksm, bool announce_locally = false); - - future<> announce_new_keyspace(lw_shared_ptr ksm, api::timestamp_type timestamp, bool announce_locally); - - future<> announce_column_family_update(schema_ptr cfm, bool from_thrift, std::vector&& view_updates, bool announce_locally = false); - - future<> announce_new_column_family(schema_ptr cfm, bool announce_locally = false); - - future<> announce_new_type(user_type new_type, bool announce_locally = false); - - future<> announce_type_update(user_type updated_type, bool announce_locally = false); - - future<> announce_keyspace_drop(const sstring& ks_name, bool announce_locally = false); - - future<> announce_column_family_drop(const sstring& ks_name, const sstring& cf_name, bool announce_locally = false); - - future<> announce_type_drop(user_type dropped_type, bool announce_locally = false); - - future<> announce_new_view(view_ptr view, bool announce_locally = false); - - future<> announce_view_update(view_ptr view, bool announce_locally = false); - - future<> announce_view_drop(const sstring& ks_name, const sstring& cf_name, bool announce_locally = false); - - /** - * actively announce a new version to active hosts via rpc - * @param schema The schema mutation to be applied - */ - static future<> announce(mutation schema, bool announce_locally); - - static future<> announce(std::vector mutations, bool announce_locally); - - static future<> push_schema_mutation(const gms::inet_address& endpoint, const std::vector& schema); - - // Returns a future on the local application of the schema - static future<> announce(std::vector schema); - - static future<> passive_announce(utils::UUID version); - - future<> stop(); - - bool is_ready_for_bootstrap(); - - void init_messaging_service(); -private: - void uninit_messaging_service(); -}; - -extern distributed _the_migration_manager; - -inline distributed& get_migration_manager() { - return _the_migration_manager; -} - -inline migration_manager& get_local_migration_manager() { - return _the_migration_manager.local(); -} - -// Returns schema of given version, either from cache or from remote node identified by 'from'. -// Doesn't affect current node's schema in any way. -future get_schema_definition(table_schema_version, netw::msg_addr from); - -// Returns schema of given version, either from cache or from remote node identified by 'from'. -// The returned schema may not be synchronized. See schema::is_synced(). -// Intended to be used in the read path. -future get_schema_for_read(table_schema_version, netw::msg_addr from); - -// Returns schema of given version, either from cache or from remote node identified by 'from'. -// Ensures that this node is synchronized with the returned schema. See schema::is_synced(). -// Intended to be used in the write path, which relies on synchronized schema. -future get_schema_for_write(table_schema_version, netw::msg_addr from); - -} diff --git a/scylla/service/migration_task.cc b/scylla/service/migration_task.cc deleted file mode 100644 index f20ac2c..0000000 --- a/scylla/service/migration_task.cc +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "service/migration_task.hh" - -#include "message/messaging_service.hh" -#include "gms/failure_detector.hh" -#include "db/schema_tables.hh" -#include "frozen_mutation.hh" -#include "migration_manager.hh" - -namespace service { - -static logging::logger mlogger("migration_task"); - -future<> migration_task::run_may_throw(distributed& proxy, const gms::inet_address& endpoint) -{ - if (!gms::get_failure_detector().local().is_alive(endpoint)) { - mlogger.error("Can't send migration request: node {} is down.", endpoint); - return make_ready_future<>(); - } - netw::messaging_service::msg_addr id{endpoint, 0}; - return service::get_local_migration_manager().merge_schema_from(id).handle_exception([](std::exception_ptr e) { - try { - std::rethrow_exception(e); - } catch (const exceptions::configuration_exception& e) { - mlogger.error("Configuration exception merging remote schema: {}", e.what()); - } - }); -} - -} diff --git a/scylla/service/migration_task.hh b/scylla/service/migration_task.hh deleted file mode 100644 index b844d6e..0000000 --- a/scylla/service/migration_task.hh +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "gms/inet_address.hh" - -#include "service/storage_proxy.hh" - -namespace service { - -class migration_task { -public: - static future<> run_may_throw(distributed& proxy, const gms::inet_address& endpoint); -}; - -} diff --git a/scylla/service/misc_services.cc b/scylla/service/misc_services.cc deleted file mode 100644 index 17d279f..0000000 --- a/scylla/service/misc_services.cc +++ /dev/null @@ -1,182 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Modified by ScyllaDB - * Copyright 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "load_broadcaster.hh" -#include "cache_hitrate_calculator.hh" -#include "db/system_keyspace.hh" -#include "gms/application_state.hh" -#include "service/storage_service.hh" - -namespace service { - -constexpr std::chrono::milliseconds load_broadcaster::BROADCAST_INTERVAL; - -logging::logger llogger("load_broadcaster"); - -void load_broadcaster::start_broadcasting() { - _done = make_ready_future<>(); - - // send the first broadcast "right away" (i.e., in 2 gossip heartbeats, when we should have someone to talk to); - // after that send every BROADCAST_INTERVAL. - - _timer.set_callback([this] { - llogger.debug("Disseminating load info ..."); - _done = _db.map_reduce0([](database& db) { - int64_t res = 0; - for (auto i : db.get_column_families()) { - res += i.second->get_stats().live_disk_space_used; - } - return res; - }, int64_t(0), std::plus()).then([this] (int64_t size) { - gms::versioned_value::factory value_factory; - return _gossiper.add_local_application_state(gms::application_state::LOAD, - value_factory.load(size)).then([this] { - _timer.arm(BROADCAST_INTERVAL); - return make_ready_future<>(); - }); - }); - }); - - _timer.arm(2 * gms::gossiper::INTERVAL); -} - -future<> load_broadcaster::stop_broadcasting() { - _timer.cancel(); - return std::move(_done); -} - - -// cache_hitrate_calculator implementation -cache_hitrate_calculator::cache_hitrate_calculator(seastar::sharded& db, seastar::sharded& me) : _db(db), _me(me), - _timer(std::bind(std::mem_fn(&cache_hitrate_calculator::recalculate_timer), this)) -{} - -void cache_hitrate_calculator::recalculate_timer() { - recalculate_hitrates().then_wrapped([p = shared_from_this()] (future f) { - lowres_clock::duration d; - if (f.failed()) { - d = std::chrono::milliseconds(2000); - } else { - d = f.get0(); - } - p->run_on((engine().cpu_id() + 1) % smp::count, d); - }); -} - -void cache_hitrate_calculator::run_on(size_t master, lowres_clock::duration d) { - if (!_stopped) { - _me.invoke_on(master, [d] (cache_hitrate_calculator& local) { - local._timer.arm(d); - }).handle_exception_type([] (seastar::no_sharded_instance_exception&) { /* ignore */ }); - } -} - -future cache_hitrate_calculator::recalculate_hitrates() { - struct stat { - float h = 0; - float m = 0; - stat& operator+=(stat& o) { - h += o.h; - m += o.m; - return *this; - } - }; - - static auto non_system_filter = [&] (const std::pair>& cf) { - return _db.local().find_keyspace(cf.second->schema()->ks_name()).get_replication_strategy().get_type() != locator::replication_strategy_type::local; - }; - - auto cf_to_cache_hit_stats = [] (database& db) { - return boost::copy_range>(db.get_column_families() | boost::adaptors::filtered(non_system_filter) | - boost::adaptors::transformed([] (const std::pair>& cf) { - auto& stats = cf.second->get_row_cache().stats(); - return std::make_pair(cf.first, stat{float(stats.reads_with_no_misses.rate().rates[0]), float(stats.reads_with_misses.rate().rates[0])}); - })); - }; - - auto sum_stats_per_cf = [] (std::unordered_map a, std::unordered_map b) { - for (auto& r : b) { - a[r.first] += r.second; - } - return std::move(a); - }; - - return _db.map_reduce0(cf_to_cache_hit_stats, std::unordered_map(), sum_stats_per_cf).then([this] (std::unordered_map rates) mutable { - _diff = 0; - // set calculated rates on all shards - return _db.invoke_on_all([this, rates = std::move(rates), cpuid = engine().cpu_id()] (database& db) { - sstring gstate; - for (auto& cf : db.get_column_families() | boost::adaptors::filtered(non_system_filter)) { - stat s = rates.at(cf.first); - float rate = 0; - if (s.h) { - rate = s.h / (s.h + s.m); - } - if (engine().cpu_id() == cpuid) { - // calculate max difference between old rate and new one for all cfs - _diff = std::max(_diff, std::abs(float(cf.second->get_global_cache_hit_rate()) - rate)); - gstate += sprint("%s.%s:%f;", cf.second->schema()->ks_name(), cf.second->schema()->cf_name(), rate); - } - cf.second->set_global_cache_hit_rate(cache_temperature(rate)); - } - if (gstate.size()) { - auto& g = gms::get_local_gossiper(); - auto& ss = get_local_storage_service(); - return g.add_local_application_state(gms::application_state::CACHE_HITRATES, ss.value_factory.cache_hitrates(std::move(gstate))); - } - return make_ready_future<>(); - }); - }).then([this] { - // if max difference during this round is big schedule next recalculate earlier - if (_diff < 0.01) { - return std::chrono::milliseconds(2000); - } else { - return std::chrono::milliseconds(500); - } - }); -} - -future<> cache_hitrate_calculator::stop() { - _timer.cancel(); - _stopped = true; - return make_ready_future<>(); -} - -} diff --git a/scylla/service/pager/paging_state.cc b/scylla/service/pager/paging_state.cc deleted file mode 100644 index 09fcfbc..0000000 --- a/scylla/service/pager/paging_state.cc +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "bytes.hh" -#include "keys.hh" -#include "paging_state.hh" -#include "core/simple-stream.hh" -#include "idl/keys.dist.hh" -#include "idl/paging_state.dist.hh" -#include "serializer_impl.hh" -#include "idl/keys.dist.impl.hh" -#include "idl/paging_state.dist.impl.hh" -#include "message/messaging_service.hh" - -service::pager::paging_state::paging_state(partition_key pk, std::experimental::optional ck, - uint32_t rem) - : _partition_key(std::move(pk)), _clustering_key(std::move(ck)), _remaining(rem) { -} - -::shared_ptr service::pager::paging_state::deserialize( - bytes_opt data) { - if (!data) { - return nullptr; - } - - if (data.value().size() < sizeof(uint32_t) || le_to_cpu(*unaligned_cast(data.value().begin())) != netw::messaging_service::current_version) { - throw exceptions::protocol_exception("Invalid value for the paging state"); - } - - - // skip 4 bytes that contain format id - seastar::simple_input_stream in(reinterpret_cast(data.value().begin() + sizeof(uint32_t)), data.value().size() - sizeof(uint32_t)); - - try { - return ::make_shared(ser::deserialize(in, boost::type())); - } catch (...) { - std::throw_with_nested( - exceptions::protocol_exception( - "Invalid value for the paging state")); - } -} - -bytes_opt service::pager::paging_state::serialize() const { - bytes b = ser::serialize_to_buffer(*this, sizeof(uint32_t)); - // put serialization format id - *unaligned_cast(b.begin()) = cpu_to_le(netw::messaging_service::current_version); - return {std::move(b)}; -} diff --git a/scylla/service/pager/paging_state.hh b/scylla/service/pager/paging_state.hh deleted file mode 100644 index 8c30899..0000000 --- a/scylla/service/pager/paging_state.hh +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include - -#include "bytes.hh" -#include "keys.hh" - -namespace service { - -namespace pager { - -class paging_state final { - partition_key _partition_key; - std::experimental::optional _clustering_key; - uint32_t _remaining; - -public: - paging_state(partition_key pk, std::experimental::optional ck, uint32_t rem); - - /** - * Last processed key, i.e. where to start from in next paging round - */ - const partition_key& get_partition_key() const { - return _partition_key; - } - /** - * Clustering key in last partition. I.e. first, next, row - */ - const std::experimental::optional& get_clustering_key() const { - return _clustering_key; - } - /** - * Max remaining rows to fetch in total. - * I.e. initial row_limit - #rows returned so far. - */ - uint32_t get_remaining() const { - return _remaining; - } - - static ::shared_ptr deserialize(bytes_opt bytes); - bytes_opt serialize() const; -}; - -} - -} diff --git a/scylla/service/pager/query_pager.hh b/scylla/service/pager/query_pager.hh deleted file mode 100644 index 279bfd5..0000000 --- a/scylla/service/pager/query_pager.hh +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "paging_state.hh" -#include "cql3/result_set.hh" -#include "cql3/selection/selection.hh" - -namespace service { - -namespace pager { - -/** - * Perform a query, paging it by page of a given size. - * - * This is essentially an iterator of pages. Each call to fetchPage() will - * return the next page (i.e. the next list of rows) and isExhausted() - * indicates whether there is more page to fetch. The pageSize will - * either be in term of cells or in term of CQL3 row, depending on the - * parameters of the command we page. - * - * Please note that the pager might page within rows, so there is no guarantee - * that successive pages won't return the same row (though with different - * columns every time). - * - * Also, there is no guarantee that fetchPage() won't return an empty list, - * even if isExhausted() return false (but it is guaranteed to return an empty - * list *if* isExhausted() return true). Indeed, isExhausted() does *not* - * trigger a query so in some (fairly rare) case we might not know the paging - * is done even though it is. - */ -class query_pager { -public: - virtual ~query_pager() {} - - /** - * Fetches the next page. - * - * @param pageSize the maximum number of elements to return in the next page. - * @return the page of result. - */ - virtual future> fetch_page(uint32_t page_size, gc_clock::time_point) = 0; - - /** - * For more than one page. - */ - virtual future<> fetch_page(cql3::selection::result_set_builder&, uint32_t page_size, gc_clock::time_point) = 0; - - /** - * Whether or not this pager is exhausted, i.e. whether or not a call to - * fetchPage may return more result. - * - * @return whether the pager is exhausted. - */ - virtual bool is_exhausted() const = 0; - - /** - * The maximum number of cells/CQL3 row that we may still have to return. - * In other words, that's the initial user limit minus what we've already - * returned (note that it's not how many we *will* return, just the upper - * limit on it). - */ - virtual int max_remaining() const = 0; - - /** - * Get the current state (snapshot) of the pager. The state can allow to restart the - * paging on another host from where we are at this point. - * - * @return the current paging state. Will return null if paging is at the - * beginning. If the pager is exhausted, the result is undefined. - */ - virtual ::shared_ptr state() const = 0; -}; - -} -} - diff --git a/scylla/service/pager/query_pagers.cc b/scylla/service/pager/query_pagers.cc deleted file mode 100644 index d0ecc34..0000000 --- a/scylla/service/pager/query_pagers.cc +++ /dev/null @@ -1,363 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "query_pagers.hh" -#include "query_pager.hh" -#include "cql3/selection/selection.hh" -#include "log.hh" -#include "to_string.hh" - -static logging::logger qlogger("paging"); - -class service::pager::query_pagers::impl : public query_pager { -public: - impl(schema_ptr s, ::shared_ptr selection, - service::query_state& state, - const cql3::query_options& options, - lw_shared_ptr cmd, - dht::partition_range_vector ranges) - : _has_clustering_keys(has_clustering_keys(*s, *cmd)) - , _max(cmd->row_limit) - , _schema(std::move(s)) - , _selection(selection) - , _state(state) - , _options(options) - , _cmd(std::move(cmd)) - , _ranges(std::move(ranges)) - {} - -private: - static bool has_clustering_keys(const schema& s, const query::read_command& cmd) { - return s.clustering_key_size() > 0 - && !cmd.slice.options.contains(); - } - - future<> fetch_page(cql3::selection::result_set_builder& builder, uint32_t page_size, gc_clock::time_point now) override { - auto state = _options.get_paging_state(); - - if (!_last_pkey && state) { - _max = state->get_remaining(); - _last_pkey = state->get_partition_key(); - _last_ckey = state->get_clustering_key(); - } - - if (_last_pkey) { - auto dpk = dht::global_partitioner().decorate_key(*_schema, *_last_pkey); - dht::ring_position lo(dpk); - - auto reversed = _cmd->slice.options.contains(); - - qlogger.trace("PKey={}, CKey={}, reversed={}", dpk, _last_ckey, reversed); - - // Note: we're assuming both that the ranges are checked - // and "cql-compliant", and that storage_proxy will process - // the ranges in order - // - // If the original query has singular restrictions like "col in (x, y, z)", - // we will eventually generate an empty range. This is ok, because empty range == nothing, - // which is what we thus mean. - auto modify_ranges = [reversed](auto& ranges, auto& lo, bool inclusive, const auto& cmp) { - typedef typename std::remove_reference_t::value_type range_type; - typedef typename range_type::bound bound_type; - bool found = false; - - auto i = ranges.begin(); - while (i != ranges.end()) { - bool contains = i->contains(lo, cmp); - - if (contains) { - found = true; - } - - bool remove = !found - || (contains && !inclusive && (i->is_singular() - || (reversed && i->start() && !cmp(i->start()->value(), lo)) - || (!reversed && i->end() && !cmp(i->end()->value(), lo)))) - ; - - if (remove) { - qlogger.trace("Remove range {}", *i); - i = ranges.erase(i); - continue; - } - if (contains) { - auto r = reversed && !i->is_singular() - ? range_type(i->start(), bound_type{ lo, inclusive }) - : range_type( bound_type{ lo, inclusive }, i->end(), i->is_singular()) - ; - qlogger.trace("Modify range {} -> {}", *i, r); - *i = std::move(r); - } - ++i; - } - qlogger.trace("Result ranges {}", ranges); - }; - - // Because of #1446 we don't have a comparator to use with - // range which would produce correct results. - // This means we cannot reuse the same logic for dealing with - // partition and clustering keys. - auto modify_ck_ranges = [reversed] (const schema& s, auto& ranges, auto& lo) { - typedef typename std::remove_reference_t::value_type range_type; - typedef typename range_type::bound bound_type; - - auto cmp = [reversed, bv_cmp = bound_view::compare(s)] (const auto& a, const auto& b) { - return reversed ? bv_cmp(b, a) : bv_cmp(a, b); - }; - auto start_bound = [reversed] (const auto& range) -> const bound_view& { - return reversed ? range.second : range.first; - }; - auto end_bound = [reversed] (const auto& range) -> const bound_view& { - return reversed ? range.first : range.second; - }; - clustering_key_prefix::equality eq(s); - - auto it = ranges.begin(); - while (it != ranges.end()) { - auto range = bound_view::from_range(*it); - if (cmp(end_bound(range), lo) || eq(end_bound(range).prefix, lo)) { - qlogger.trace("Remove ck range {}", *it); - it = ranges.erase(it); - continue; - } else if (cmp(start_bound(range), lo)) { - assert(cmp(lo, end_bound(range))); - auto r = reversed ? range_type(it->start(), bound_type { lo, false }) - : range_type(bound_type { lo, false }, it->end()); - qlogger.trace("Modify ck range {} -> {}", *it, r); - *it = std::move(r); - } - ++it; - } - }; - - // last ck can be empty depending on whether we - // deserialized state or not. This case means "last page ended on - // something-not-bound-by-clustering" (i.e. a static row, alone) - const bool has_ck = _has_clustering_keys && _last_ckey; - - // If we have no clustering keys, it should mean we only have one row - // per PK. Thus we can just bypass the last one. - modify_ranges(_ranges, lo, has_ck, dht::ring_position_comparator(*_schema)); - - if (has_ck) { - query::clustering_row_ranges row_ranges = _cmd->slice.default_row_ranges(); - clustering_key_prefix ckp = clustering_key_prefix::from_exploded(*_schema, _last_ckey->explode(*_schema)); - modify_ck_ranges(*_schema, row_ranges, ckp); - - _cmd->slice.set_range(*_schema, *_last_pkey, row_ranges); - } - } - - auto max_rows = std::min(_max, page_size); - - // We always need PK so we can determine where to start next. - _cmd->slice.options.set(); - // don't add empty bytes (cks) unless we have to - if (_has_clustering_keys) { - _cmd->slice.options.set< - query::partition_slice::option::send_clustering_key>(); - } - _cmd->row_limit = max_rows; - - qlogger.debug("Fetching {}, page size={}, max_rows={}", - _cmd->cf_id, page_size, max_rows - ); - - auto ranges = _ranges; - auto command = ::make_lw_shared(*_cmd); - return get_local_storage_proxy().query(_schema, std::move(command), std::move(ranges), - _options.get_consistency(), _state.get_trace_state()).then( - [this, &builder, page_size, now](foreign_ptr> results) { - handle_result(builder, std::move(results), page_size, now); - }); - } - - future> fetch_page(uint32_t page_size, - gc_clock::time_point now) override { - return do_with( - cql3::selection::result_set_builder(*_selection, now, - _options.get_cql_serialization_format()), - [this, page_size, now](auto& builder) { - return this->fetch_page(builder, page_size, now).then([&builder] { - return builder.build(); - }); - }); - } - - void handle_result( - cql3::selection::result_set_builder& builder, - foreign_ptr> results, - uint32_t page_size, gc_clock::time_point now) { - - class myvisitor : public cql3::selection::result_set_builder::visitor { - public: - uint32_t total_rows = 0; - std::experimental::optional last_pkey; - std::experimental::optional last_ckey; - - myvisitor(cql3::selection::result_set_builder& builder, - const schema& s, - const cql3::selection::selection& selection) - : visitor(builder, s, selection) { - } - - void accept_new_partition(uint32_t) { - throw std::logic_error("Should not reach!"); - } - void accept_new_partition(const partition_key& key, uint32_t row_count) { - qlogger.trace("Accepting partition: {} ({})", key, row_count); - total_rows += std::max(row_count, 1u); - last_pkey = key; - last_ckey = { }; - visitor::accept_new_partition(key, row_count); - } - void accept_new_row(const clustering_key& key, - const query::result_row_view& static_row, - const query::result_row_view& row) { - last_ckey = key; - visitor::accept_new_row(key, static_row, row); - } - void accept_new_row(const query::result_row_view& static_row, - const query::result_row_view& row) { - visitor::accept_new_row(static_row, row); - } - void accept_partition_end(const query::result_row_view& static_row) { - visitor::accept_partition_end(static_row); - } - }; - - myvisitor v(builder, *_schema, *_selection); - query::result_view::consume(*results, _cmd->slice, v); - - if (_last_pkey) { - // refs #752, when doing aggregate queries we will re-use same - // slice repeatedly. Since "specific ck ranges" only deal with - // a single extra range, we must clear out the old one - // Even if it was not so of course, leaving junk in the slice - // is bad. - _cmd->slice.clear_range(*_schema, *_last_pkey); - } - - _max = _max - v.total_rows; - _exhausted = (v.total_rows < page_size && !results->is_short_read()) || _max == 0; - _last_pkey = v.last_pkey; - _last_ckey = v.last_ckey; - - qlogger.debug("Fetched {} rows, max_remain={} {}", v.total_rows, _max, _exhausted ? "(exh)" : ""); - - if (_last_pkey) { - qlogger.debug("Last partition key: {}", *_last_pkey); - } - if (_has_clustering_keys && _last_ckey) { - qlogger.debug("Last clustering key: {}", *_last_ckey); - } - } - - bool is_exhausted() const override { - return _exhausted; - } - - int max_remaining() const override { - return _max; - } - - ::shared_ptr state() const override { - return _exhausted ? - nullptr : - ::make_shared(*_last_pkey, - _last_ckey, _max); - } - -private: - // remember if we use clustering. if not, each partition == one row - const bool _has_clustering_keys; - bool _exhausted = false; - uint32_t _max; - - std::experimental::optional _last_pkey; - std::experimental::optional _last_ckey; - - schema_ptr _schema; - ::shared_ptr _selection; - service::query_state& _state; - const cql3::query_options& _options; - lw_shared_ptr _cmd; - dht::partition_range_vector _ranges; -}; - -bool service::pager::query_pagers::may_need_paging(uint32_t page_size, - const query::read_command& cmd, - const dht::partition_range_vector& ranges) { - auto est_max_rows = - [&] { - if (ranges.empty()) { - return cmd.row_limit; - } - uint32_t n = 0; - for (auto& r : ranges) { - if (r.is_singular() && cmd.slice.options.contains()) { - ++n; - continue; - } - return cmd.row_limit; - } - return n; - }; - - auto est = est_max_rows(); - auto need_paging = est > page_size; - - qlogger.debug("Query of {}, page_size={}, limit={} {}", cmd.cf_id, page_size, - cmd.row_limit, - need_paging ? "requires paging" : "does not require paging"); - - return need_paging; -} - -::shared_ptr service::pager::query_pagers::pager( - schema_ptr s, ::shared_ptr selection, - service::query_state& state, const cql3::query_options& options, - lw_shared_ptr cmd, - dht::partition_range_vector ranges) { - return ::make_shared(std::move(s), std::move(selection), state, - options, std::move(cmd), std::move(ranges)); -} - diff --git a/scylla/service/pager/query_pagers.hh b/scylla/service/pager/query_pagers.hh deleted file mode 100644 index b33f4d5..0000000 --- a/scylla/service/pager/query_pagers.hh +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include -#include - -#include "schema.hh" -#include "query-result.hh" -#include "query-request.hh" -#include "service/query_state.hh" -#include "cql3/selection/selection.hh" -#include "cql3/query_options.hh" -#include "query_pager.hh" - -namespace service { - -namespace pager { - -class query_pagers { -public: - static bool may_need_paging(uint32_t page_size, const query::read_command&, - const dht::partition_range_vector&); - static ::shared_ptr pager(schema_ptr, - ::shared_ptr, - service::query_state&, - const cql3::query_options&, - lw_shared_ptr, - dht::partition_range_vector); -private: - class impl; -}; - -} -} - diff --git a/scylla/service/priority_manager.cc b/scylla/service/priority_manager.cc deleted file mode 100644 index c0c2416..0000000 --- a/scylla/service/priority_manager.cc +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright 2016 ScyllaDB - */ -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ -#include "priority_manager.hh" - -namespace service { -priority_manager& get_local_priority_manager() { - static thread_local priority_manager pm = priority_manager(); - return pm; -} -} diff --git a/scylla/service/priority_manager.hh b/scylla/service/priority_manager.hh deleted file mode 100644 index 44c732e..0000000 --- a/scylla/service/priority_manager.hh +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright 2016 ScyllaDB - */ -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include -#include -#include - -#include "seastarx.hh" - -namespace service { -class priority_manager { - ::io_priority_class _commitlog_priority; - ::io_priority_class _mt_flush_priority; - ::io_priority_class _stream_read_priority; - ::io_priority_class _stream_write_priority; - ::io_priority_class _sstable_query_read; - ::io_priority_class _compaction_priority; - -public: - const ::io_priority_class& - commitlog_priority() { - return _commitlog_priority; - } - - const ::io_priority_class& - memtable_flush_priority() { - return _mt_flush_priority; - } - - const ::io_priority_class& - streaming_read_priority() { - return _stream_read_priority; - } - - const ::io_priority_class& - streaming_write_priority() { - return _stream_write_priority; - } - - const ::io_priority_class& - sstable_query_read_priority() { - return _sstable_query_read; - } - - const ::io_priority_class& - compaction_priority() { - return _compaction_priority; - } - - priority_manager() - : _commitlog_priority(engine().register_one_priority_class("commitlog", 100)) - , _mt_flush_priority(engine().register_one_priority_class("memtable_flush", 100)) - , _stream_read_priority(engine().register_one_priority_class("streaming_read", 20)) - , _stream_write_priority(engine().register_one_priority_class("streaming_write", 20)) - , _sstable_query_read(engine().register_one_priority_class("query", 100)) - , _compaction_priority(engine().register_one_priority_class("compaction", 100)) - - {} -}; - -priority_manager& get_local_priority_manager(); -const inline ::io_priority_class& -get_local_commitlog_priority() { - return get_local_priority_manager().commitlog_priority(); -} - -const inline ::io_priority_class& -get_local_memtable_flush_priority() { - return get_local_priority_manager().memtable_flush_priority(); -} - -const inline ::io_priority_class& -get_local_streaming_read_priority() { - return get_local_priority_manager().streaming_read_priority(); -} - -const inline ::io_priority_class& -get_local_streaming_write_priority() { - return get_local_priority_manager().streaming_write_priority(); -} - -const inline ::io_priority_class& -get_local_sstable_query_read_priority() { - return get_local_priority_manager().sstable_query_read_priority(); -} - -const inline ::io_priority_class& -get_local_compaction_priority() { - return get_local_priority_manager().compaction_priority(); -} -} diff --git a/scylla/service/query_state.hh b/scylla/service/query_state.hh deleted file mode 100644 index 3271943..0000000 --- a/scylla/service/query_state.hh +++ /dev/null @@ -1,64 +0,0 @@ - -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#ifndef SERVICE_QUERY_STATE_HH -#define SERVICE_QUERY_STATE_HH - -#include "service/client_state.hh" -#include "tracing/tracing.hh" - -namespace service { - -class query_state final { -private: - client_state _client_state; - tracing::trace_state_ptr _trace_state_ptr; - -public: - query_state(client_state client_state) - : _client_state(client_state) - , _trace_state_ptr(_client_state.get_trace_state()) - { } - - const tracing::trace_state_ptr& get_trace_state() const { - return _trace_state_ptr; - } - - tracing::trace_state_ptr& get_trace_state() { - return _trace_state_ptr; - } - - client_state& get_client_state() { - return _client_state; - } - - const client_state& get_client_state() const { - return _client_state; - } - api::timestamp_type get_timestamp() { - return _client_state.get_timestamp(); - } -}; - -} - -#endif diff --git a/scylla/service/storage_proxy.cc b/scylla/service/storage_proxy.cc deleted file mode 100644 index cd0fef6..0000000 --- a/scylla/service/storage_proxy.cc +++ /dev/null @@ -1,4169 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "partition_range_compat.hh" -#include "db/consistency_level.hh" -#include "db/commitlog/commitlog.hh" -#include "storage_proxy.hh" -#include "unimplemented.hh" -#include "frozen_mutation.hh" -#include "query_result_merger.hh" -#include "core/do_with.hh" -#include "message/messaging_service.hh" -#include "gms/failure_detector.hh" -#include "gms/gossiper.hh" -#include "storage_service.hh" -#include "core/future-util.hh" -#include "db/read_repair_decision.hh" -#include "db/config.hh" -#include "db/batchlog_manager.hh" -#include "exceptions/exceptions.hh" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "utils/latency.hh" -#include "schema.hh" -#include "schema_registry.hh" -#include "utils/joinpoint.hh" -#include -#include "core/metrics.hh" -#include - -namespace service { - -static logging::logger slogger("storage_proxy"); -static logging::logger qlogger("query_result"); -static logging::logger mlogger("mutation_data"); - -const sstring storage_proxy::COORDINATOR_STATS_CATEGORY("storage_proxy_coordinator"); -const sstring storage_proxy::REPLICA_STATS_CATEGORY("storage_proxy_replica"); - -distributed _the_storage_proxy; - -using namespace exceptions; - -static inline bool is_me(gms::inet_address from) { - return from == utils::fb_utilities::get_broadcast_address(); -} - -static inline -const dht::token& start_token(const dht::partition_range& r) { - static const dht::token min_token = dht::minimum_token(); - return r.start() ? r.start()->value().token() : min_token; -} - -static inline -const dht::token& end_token(const dht::partition_range& r) { - static const dht::token max_token = dht::maximum_token(); - return r.end() ? r.end()->value().token() : max_token; -} - -static inline -sstring get_dc(gms::inet_address ep) { - auto& snitch_ptr = locator::i_endpoint_snitch::get_local_snitch_ptr(); - return snitch_ptr->get_datacenter(ep); -} - -static inline -sstring get_local_dc() { - auto local_addr = utils::fb_utilities::get_broadcast_address(); - return get_dc(local_addr); -} - -class mutation_holder { -protected: - size_t _size = 0; - schema_ptr _schema; -public: - virtual ~mutation_holder() {} - virtual lw_shared_ptr get_mutation_for(gms::inet_address ep) = 0; - virtual bool is_shared() = 0; - size_t size() const { - return _size; - } - const schema_ptr& schema() { - return _schema; - } -}; - -// different mutation for each destination (for read repairs) -class per_destination_mutation : public mutation_holder { - std::unordered_map> _mutations; - dht::token _token; -public: - per_destination_mutation(const std::unordered_map>& mutations) { - for (auto&& m : mutations) { - lw_shared_ptr fm; - if (m.second) { - _schema = m.second.value().schema(); - _token = m.second.value().token(); - fm = make_lw_shared(freeze(m.second.value())); - _size += fm->representation().size(); - } - _mutations.emplace(m.first, std::move(fm)); - } - } - lw_shared_ptr get_mutation_for(gms::inet_address ep) override { - return _mutations[ep]; - } - virtual bool is_shared() override { - return false; - } - dht::token& token() { - return _token; - } -}; - -// same mutation for each destination -class shared_mutation : public mutation_holder { - lw_shared_ptr _mutation; -public: - shared_mutation(const mutation& m) : _mutation(make_lw_shared(freeze(m))) { - _size = _mutation->representation().size(); - _schema = m.schema(); - }; - lw_shared_ptr get_mutation_for(gms::inet_address ep) override { - return _mutation; - } - virtual bool is_shared() override { - return true; - } -}; - -class abstract_write_response_handler : public enable_shared_from_this { -protected: - storage_proxy::response_id_type _id; - promise<> _ready; // available when cl is achieved - shared_ptr _proxy; - tracing::trace_state_ptr _trace_state; - db::consistency_level _cl; - keyspace& _ks; - db::write_type _type; - std::unique_ptr _mutation_holder; - std::unordered_set _targets; // who we sent this mutation to - size_t _pending_endpoints; // how many endpoints in bootstrap state there is - // added dead_endpoints as a memeber here as well. This to be able to carry the info across - // calls in helper methods in a convinient way. Since we hope this will be empty most of the time - // it should not be a huge burden. (flw) - std::vector _dead_endpoints; - size_t _cl_acks = 0; - bool _cl_achieved = false; - bool _timedout = false; - bool _throttled = false; -protected: - size_t total_block_for() { - // original comment from cassandra: - // during bootstrap, include pending endpoints in the count - // or we may fail the consistency level guarantees (see #833, #8058) - return db::block_for(_ks, _cl) + _pending_endpoints; - } - virtual void signal(gms::inet_address from) { - signal(); - } -public: - abstract_write_response_handler(shared_ptr p, keyspace& ks, db::consistency_level cl, db::write_type type, - std::unique_ptr mh, std::unordered_set targets, tracing::trace_state_ptr trace_state, - size_t pending_endpoints = 0, std::vector dead_endpoints = {}) - : _id(p->_next_response_id++), _proxy(std::move(p)), _trace_state(trace_state), _cl(cl), _ks(ks), _type(type), _mutation_holder(std::move(mh)), _targets(std::move(targets)), - _pending_endpoints(pending_endpoints), _dead_endpoints(std::move(dead_endpoints)) { - ++_proxy->_stats.writes; - } - virtual ~abstract_write_response_handler() { - --_proxy->_stats.writes; - if (_cl_achieved) { - if (_throttled) { - _ready.set_value(); - } else { - _proxy->_stats.background_writes--; - _proxy->_stats.background_write_bytes -= _mutation_holder->size(); - _proxy->unthrottle(); - } - } else if (_timedout) { - _ready.set_exception(mutation_write_timeout_exception(get_schema()->ks_name(), get_schema()->cf_name(), _cl, _cl_acks, total_block_for(), _type)); - } - }; - bool is_counter() const { - return _type == db::write_type::COUNTER; - } - void unthrottle() { - _proxy->_stats.background_writes++; - _proxy->_stats.background_write_bytes += _mutation_holder->size(); - _throttled = false; - _ready.set_value(); - } - void signal(size_t nr = 1) { - _cl_acks += nr; - if (!_cl_achieved && _cl_acks >= total_block_for()) { - _cl_achieved = true; - if (_proxy->need_throttle_writes()) { - _throttled = true; - _proxy->_throttled_writes.push_back(_id); - ++_proxy->_stats.throttled_writes; - } else { - unthrottle(); - } - } - } - void on_timeout() { - if (_cl_achieved) { - slogger.trace("Write is not acknowledged by {} replicas after achieving CL", get_targets()); - } - _timedout = true; - } - // return true on last ack - bool response(gms::inet_address from) { - signal(from); - auto it = _targets.find(from); - assert(it != _targets.end()); - _targets.erase(it); - return _targets.size() == 0; - } - future<> wait() { - return _ready.get_future(); - } - const std::unordered_set& get_targets() const { - return _targets; - } - const std::vector& get_dead_endpoints() const { - return _dead_endpoints; - } - lw_shared_ptr get_mutation_for(gms::inet_address ep) { - return _mutation_holder->get_mutation_for(ep); - } - const schema_ptr& get_schema() const { - return _mutation_holder->schema(); - } - storage_proxy::response_id_type id() const { - return _id; - } - bool read_repair_write() { - return !_mutation_holder->is_shared(); - } - const tracing::trace_state_ptr& get_trace_state() const { - return _trace_state; - } - friend storage_proxy; -}; - -class datacenter_write_response_handler : public abstract_write_response_handler { - void signal(gms::inet_address from) override { - if (is_me(from) || db::is_local(from)) { - abstract_write_response_handler::signal(); - } - } -public: - datacenter_write_response_handler(shared_ptr p, keyspace& ks, db::consistency_level cl, db::write_type type, - std::unique_ptr mh, std::unordered_set targets, - std::vector pending_endpoints, std::vector dead_endpoints, tracing::trace_state_ptr tr_state) : - abstract_write_response_handler(std::move(p), ks, cl, type, std::move(mh), - std::move(targets), std::move(tr_state), db::count_local_endpoints(pending_endpoints), std::move(dead_endpoints)) {} -}; - -class write_response_handler : public abstract_write_response_handler { -public: - write_response_handler(shared_ptr p, keyspace& ks, db::consistency_level cl, db::write_type type, - std::unique_ptr mh, std::unordered_set targets, - std::vector pending_endpoints, std::vector dead_endpoints, tracing::trace_state_ptr tr_state) : - abstract_write_response_handler(std::move(p), ks, cl, type, std::move(mh), - std::move(targets), std::move(tr_state), pending_endpoints.size(), std::move(dead_endpoints)) {} -}; - -class datacenter_sync_write_response_handler : public abstract_write_response_handler { - std::unordered_map _dc_responses; - void signal(gms::inet_address from) override { - auto& snitch_ptr = locator::i_endpoint_snitch::get_local_snitch_ptr(); - sstring data_center = snitch_ptr->get_datacenter(from); - auto dc_resp = _dc_responses.find(data_center); - - if (dc_resp->second > 0) { - --dc_resp->second; - abstract_write_response_handler::signal(); - } - } -public: - datacenter_sync_write_response_handler(shared_ptr p, keyspace& ks, db::consistency_level cl, db::write_type type, - std::unique_ptr mh, std::unordered_set targets, std::vector pending_endpoints, - std::vector dead_endpoints, tracing::trace_state_ptr tr_state) : - abstract_write_response_handler(std::move(p), ks, cl, type, std::move(mh), targets, std::move(tr_state), 0, dead_endpoints) { - auto& snitch_ptr = locator::i_endpoint_snitch::get_local_snitch_ptr(); - - for (auto& target : targets) { - auto dc = snitch_ptr->get_datacenter(target); - - if (_dc_responses.find(dc) == _dc_responses.end()) { - auto pending_for_dc = boost::range::count_if(pending_endpoints, [&snitch_ptr, &dc] (gms::inet_address& ep){ - return snitch_ptr->get_datacenter(ep) == dc; - }); - _dc_responses.emplace(dc, db::local_quorum_for(ks, dc) + pending_for_dc); - _pending_endpoints += pending_for_dc; - } - } - } -}; - -bool storage_proxy::need_throttle_writes() const { - return _stats.background_write_bytes > memory::stats().total_memory() / 10 || _stats.queued_write_bytes > 6*1024*1024; -} - -void storage_proxy::unthrottle() { - while(!need_throttle_writes() && !_throttled_writes.empty()) { - auto id = _throttled_writes.front(); - _throttled_writes.pop_front(); - auto it = _response_handlers.find(id); - if (it != _response_handlers.end()) { - it->second.handler->unthrottle(); - } - } -} - -storage_proxy::response_id_type storage_proxy::register_response_handler(shared_ptr&& h) { - auto id = h->id(); - auto e = _response_handlers.emplace(id, rh_entry(std::move(h), [this, id] { - auto& e = _response_handlers.find(id)->second; - if (e.handler->_cl_achieved || e.handler->_cl == db::consistency_level::ANY) { - // we are here because either cl was achieved, but targets left in the handler are not - // responding, so a hint should be written for them, or cl == any in which case - // hints are counted towards consistency, so we need to write hints and count how much was written - auto hints = hint_to_dead_endpoints(e.handler->_mutation_holder, e.handler->get_targets()); - e.handler->signal(hints); - if (e.handler->_cl == db::consistency_level::ANY && hints) { - slogger.trace("Wrote hint to satisfy CL.ANY after no replicas acknowledged the write"); - } - } - - e.handler->on_timeout(); - remove_response_handler(id); - })); - assert(e.second); - return id; -} - -void storage_proxy::remove_response_handler(storage_proxy::response_id_type id) { - _response_handlers.erase(id); -} - -void storage_proxy::got_response(storage_proxy::response_id_type id, gms::inet_address from) { - auto it = _response_handlers.find(id); - if (it != _response_handlers.end()) { - tracing::trace(it->second.handler->get_trace_state(), "Got a response from /{}", from); - if (it->second.handler->response(from)) { - remove_response_handler(id); // last one, remove entry. Will cancel expiration timer too. - } - } -} - -future<> storage_proxy::response_wait(storage_proxy::response_id_type id, clock_type::time_point timeout) { - auto& e = _response_handlers.find(id)->second; - - e.expire_timer.arm(timeout); - - return e.handler->wait(); -} - -::shared_ptr& storage_proxy::get_write_response_handler(storage_proxy::response_id_type id) { - return _response_handlers.find(id)->second.handler; -} - -storage_proxy::response_id_type storage_proxy::create_write_response_handler(keyspace& ks, db::consistency_level cl, db::write_type type, std::unique_ptr m, - std::unordered_set targets, const std::vector& pending_endpoints, std::vector dead_endpoints, tracing::trace_state_ptr tr_state) -{ - shared_ptr h; - auto& rs = ks.get_replication_strategy(); - - if (db::is_datacenter_local(cl)) { - h = ::make_shared(shared_from_this(), ks, cl, type, std::move(m), std::move(targets), std::move(pending_endpoints), std::move(dead_endpoints), std::move(tr_state)); - } else if (cl == db::consistency_level::EACH_QUORUM && rs.get_type() == locator::replication_strategy_type::network_topology){ - h = ::make_shared(shared_from_this(), ks, cl, type, std::move(m), std::move(targets), std::move(pending_endpoints), std::move(dead_endpoints), std::move(tr_state)); - } else { - h = ::make_shared(shared_from_this(), ks, cl, type, std::move(m), std::move(targets), std::move(pending_endpoints), std::move(dead_endpoints), std::move(tr_state)); - } - return register_response_handler(std::move(h)); -} - -seastar::metrics::label storage_proxy::split_stats::datacenter_label("datacenter"); -seastar::metrics::label storage_proxy::split_stats::op_type_label("op_type"); - -storage_proxy::split_stats::split_stats(const sstring& category, const sstring& short_description_prefix, const sstring& long_description_prefix, const sstring& op_type) - : _short_description_prefix(short_description_prefix) - , _long_description_prefix(long_description_prefix) - , _category(category) - , _op_type(op_type) { - // register a local Node counter to begin with... - namespace sm = seastar::metrics; - - _metrics.add_group(_category, { - sm::make_derive(_short_description_prefix + sstring("_local_node"), [this] { return _local.val; }, - sm::description(_long_description_prefix + "on a local Node"), {op_type_label(_op_type)}) - }); -} - -storage_proxy::stats::stats() - : writes_attempts(COORDINATOR_STATS_CATEGORY, "total_write_attempts", "total number of write requests", "mutation_data") - , writes_errors(COORDINATOR_STATS_CATEGORY, "write_errors", "number of write requests that failed", "mutation_data") - , read_repair_write_attempts(COORDINATOR_STATS_CATEGORY, "read_repair_write_attempts", "number of write operations in a read repair context", "mutation_data") - , data_read_attempts(COORDINATOR_STATS_CATEGORY, "reads", "number of data read requests", "data") - , data_read_completed(COORDINATOR_STATS_CATEGORY, "completed_reads", "number of data read requests that completed", "data") - , data_read_errors(COORDINATOR_STATS_CATEGORY, "read_errors", "number of data read requests that failed", "data") - , digest_read_attempts(COORDINATOR_STATS_CATEGORY, "reads", "number of digest read requests", "digest") - , digest_read_completed(COORDINATOR_STATS_CATEGORY, "completed_reads", "number of digest read requests that completed", "digest") - , digest_read_errors(COORDINATOR_STATS_CATEGORY, "read_errors", "number of digest read requests that failed", "digest") - , mutation_data_read_attempts(COORDINATOR_STATS_CATEGORY, "reads", "number of mutation data read requests", "mutation_data") - , mutation_data_read_completed(COORDINATOR_STATS_CATEGORY, "completed_reads", "number of mutation data read requests that completed", "mutation_data") - , mutation_data_read_errors(COORDINATOR_STATS_CATEGORY, "read_errors", "number of mutation data read requests that failed", "mutation_data") {} - -inline uint64_t& storage_proxy::split_stats::get_ep_stat(gms::inet_address ep) { - if (is_me(ep)) { - return _local.val; - } - - sstring dc = get_dc(ep); - - // if this is the first time we see an endpoint from this DC - add a - // corresponding collectd metric - if (_dc_stats.find(dc) == _dc_stats.end()) { - namespace sm = seastar::metrics; - - _metrics.add_group(_category, { - sm::make_derive(_short_description_prefix + sstring("_remote_node"), [this, dc] { return _dc_stats[dc].val; }, - sm::description(seastar::format("{} when communicating with external Nodes in DC {}", _long_description_prefix, dc)), {datacenter_label(dc), op_type_label(_op_type)}) - }); - } - return _dc_stats[dc].val; -} - -storage_proxy::~storage_proxy() {} -storage_proxy::storage_proxy(distributed& db) : _db(db) { - namespace sm = seastar::metrics; - _metrics.add_group(COORDINATOR_STATS_CATEGORY, { - sm::make_histogram("read_latency", sm::description("The general read latency histogram"), [this]{return _stats.estimated_read.get_histogram();}), - sm::make_histogram("write_latency", sm::description("The general write latency histogram"), [this]{return _stats.estimated_write.get_histogram();}), - sm::make_queue_length("foreground_writes", [this] { return _stats.writes - _stats.background_writes; }, - sm::description("number of currently pending foreground write requests")), - - sm::make_queue_length("background_writes", [this] { return _stats.background_writes; }, - sm::description("number of currently pending background write requests")), - - sm::make_queue_length("current_throttled_writes", [this] { return _throttled_writes.size(); }, - sm::description("number of currently throttled write requests")), - - sm::make_total_operations("throttled_writes", [this] { return _stats.throttled_writes; }, - sm::description("number of throttled write requests")), - - sm::make_current_bytes("queued_write_bytes", [this] { return _stats.queued_write_bytes; }, - sm::description("number of bytes in pending write requests")), - - sm::make_current_bytes("background_write_bytes", [this] { return _stats.background_write_bytes; }, - sm::description("number of bytes in pending background write requests")), - - sm::make_queue_length("foreground_reads", [this] { return _stats.reads - _stats.background_reads; }, - sm::description("number of currently pending foreground read requests")), - - sm::make_queue_length("background_reads", [this] { return _stats.background_reads; }, - sm::description("number of currently pending background read requests")), - - sm::make_total_operations("read_retries", [this] { return _stats.read_retries; }, - sm::description("number of read retry attempts")), - - sm::make_total_operations("canceled_read_repairs", [this] { return _stats.global_read_repairs_canceled_due_to_concurrent_write; }, - sm::description("number of global read repairs canceled due to a concurrent write")), - - sm::make_total_operations("foreground_read_repair", [this] { return _stats.read_repair_repaired_blocking; }, - sm::description("number of foreground read repairs")), - - sm::make_total_operations("background_read_repairs", [this] { return _stats.read_repair_repaired_background; }, - sm::description("number of background read repairs")), - - sm::make_total_operations("write_timeouts", [this] { return _stats.write_timeouts._count; }, - sm::description("number of write request failed due to a timeout")), - - sm::make_total_operations("write_unavailable", [this] { return _stats.write_unavailables._count; }, - sm::description("number write requests failed due to an \"unavailable\" error")), - - sm::make_total_operations("read_timeouts", [this] { return _stats.read_timeouts._count; }, - sm::description("number of read request failed due to a timeout")), - - sm::make_total_operations("read_unavailable", [this] { return _stats.read_unavailables._count; }, - sm::description("number read requests failed due to an \"unavailable\" error")), - - sm::make_total_operations("range_timeouts", [this] { return _stats.range_slice_timeouts._count; }, - sm::description("number of range read operations failed due to a timeout")), - - sm::make_total_operations("range_unavailable", [this] { return _stats.range_slice_unavailables._count; }, - sm::description("number of range read operations failed due to an \"unavailable\" error")), - }); - - _metrics.add_group(REPLICA_STATS_CATEGORY, { - sm::make_total_operations("received_counter_updates", _stats.received_counter_updates, - sm::description("number of counter updates received by this node acting as an update leader")), - - sm::make_total_operations("received_mutations", _stats.received_mutations, - sm::description("number of mutations received by a replica Node")), - - sm::make_total_operations("forwarded_mutations", _stats.forwarded_mutations, - sm::description("number of mutations forwarded to other replica Nodes")), - - sm::make_total_operations("forwarding_errors", _stats.forwarding_errors, - sm::description("number of errors during forwarding mutations to other replica Nodes")), - - sm::make_total_operations("reads", _stats.replica_data_reads, - sm::description("number of remote data read requests this Node received"), {storage_proxy::split_stats::op_type_label("data")}), - - sm::make_total_operations("reads", _stats.replica_mutation_data_reads, - sm::description("number of remote mutation data read requests this Node received"), {storage_proxy::split_stats::op_type_label("mutation_data")}), - - sm::make_total_operations("reads", _stats.replica_digest_reads, - sm::description("number of remote digest read requests this Node received"), {storage_proxy::split_stats::op_type_label("digest")}), - - }); -} - -storage_proxy::rh_entry::rh_entry(shared_ptr&& h, std::function&& cb) : handler(std::move(h)), expire_timer(std::move(cb)) {} - -storage_proxy::unique_response_handler::unique_response_handler(storage_proxy& p_, response_id_type id_) : id(id_), p(p_) {} -storage_proxy::unique_response_handler::unique_response_handler(unique_response_handler&& x) : id(x.id), p(x.p) { x.id = 0; }; -storage_proxy::unique_response_handler::~unique_response_handler() { - if (id) { - p.remove_response_handler(id); - } -} -storage_proxy::response_id_type storage_proxy::unique_response_handler::release() { - auto r = id; - id = 0; - return r; -} - -#if 0 - static - { - /* - * We execute counter writes in 2 places: either directly in the coordinator node if it is a replica, or - * in CounterMutationVerbHandler on a replica othewise. The write must be executed on the COUNTER_MUTATION stage - * but on the latter case, the verb handler already run on the COUNTER_MUTATION stage, so we must not execute the - * underlying on the stage otherwise we risk a deadlock. Hence two different performer. - */ - counterWritePerformer = new WritePerformer() - { - public void apply(IMutation mutation, - Iterable targets, - AbstractWriteResponseHandler responseHandler, - String localDataCenter, - ConsistencyLevel consistencyLevel) - { - counterWriteTask(mutation, targets, responseHandler, localDataCenter).run(); - } - }; - - counterWriteOnCoordinatorPerformer = new WritePerformer() - { - public void apply(IMutation mutation, - Iterable targets, - AbstractWriteResponseHandler responseHandler, - String localDataCenter, - ConsistencyLevel consistencyLevel) - { - StageManager.getStage(Stage.COUNTER_MUTATION) - .execute(counterWriteTask(mutation, targets, responseHandler, localDataCenter)); - } - }; - } - - /** - * Apply @param updates if and only if the current values in the row for @param key - * match the provided @param conditions. The algorithm is "raw" Paxos: that is, Paxos - * minus leader election -- any node in the cluster may propose changes for any row, - * which (that is, the row) is the unit of values being proposed, not single columns. - * - * The Paxos cohort is only the replicas for the given key, not the entire cluster. - * So we expect performance to be reasonable, but CAS is still intended to be used - * "when you really need it," not for all your updates. - * - * There are three phases to Paxos: - * 1. Prepare: the coordinator generates a ballot (timeUUID in our case) and asks replicas to (a) promise - * not to accept updates from older ballots and (b) tell us about the most recent update it has already - * accepted. - * 2. Accept: if a majority of replicas reply, the coordinator asks replicas to accept the value of the - * highest proposal ballot it heard about, or a new value if no in-progress proposals were reported. - * 3. Commit (Learn): if a majority of replicas acknowledge the accept request, we can commit the new - * value. - * - * Commit procedure is not covered in "Paxos Made Simple," and only briefly mentioned in "Paxos Made Live," - * so here is our approach: - * 3a. The coordinator sends a commit message to all replicas with the ballot and value. - * 3b. Because of 1-2, this will be the highest-seen commit ballot. The replicas will note that, - * and send it with subsequent promise replies. This allows us to discard acceptance records - * for successfully committed replicas, without allowing incomplete proposals to commit erroneously - * later on. - * - * Note that since we are performing a CAS rather than a simple update, we perform a read (of committed - * values) between the prepare and accept phases. This gives us a slightly longer window for another - * coordinator to come along and trump our own promise with a newer one but is otherwise safe. - * - * @param keyspaceName the keyspace for the CAS - * @param cfName the column family for the CAS - * @param key the row key for the row to CAS - * @param request the conditions for the CAS to apply as well as the update to perform if the conditions hold. - * @param consistencyForPaxos the consistency for the paxos prepare and propose round. This can only be either SERIAL or LOCAL_SERIAL. - * @param consistencyForCommit the consistency for write done during the commit phase. This can be anything, except SERIAL or LOCAL_SERIAL. - * - * @return null if the operation succeeds in updating the row, or the current values corresponding to conditions. - * (since, if the CAS doesn't succeed, it means the current value do not match the conditions). - */ - public static ColumnFamily cas(String keyspaceName, - String cfName, - ByteBuffer key, - CASRequest request, - ConsistencyLevel consistencyForPaxos, - ConsistencyLevel consistencyForCommit, - ClientState state) - throws UnavailableException, IsBootstrappingException, ReadTimeoutException, WriteTimeoutException, InvalidRequestException - { - final long start = System.nanoTime(); - int contentions = 0; - try - { - consistencyForPaxos.validateForCas(); - consistencyForCommit.validateForCasCommit(keyspaceName); - - CFMetaData metadata = Schema.instance.getCFMetaData(keyspaceName, cfName); - - long timeout = TimeUnit.MILLISECONDS.toNanos(DatabaseDescriptor.getCasContentionTimeout()); - while (System.nanoTime() - start < timeout) - { - // for simplicity, we'll do a single liveness check at the start of each attempt - Pair, Integer> p = getPaxosParticipants(keyspaceName, key, consistencyForPaxos); - List liveEndpoints = p.left; - int requiredParticipants = p.right; - - final Pair pair = beginAndRepairPaxos(start, key, metadata, liveEndpoints, requiredParticipants, consistencyForPaxos, consistencyForCommit, true, state); - final UUID ballot = pair.left; - contentions += pair.right; - // read the current values and check they validate the conditions - Tracing.trace("Reading existing values for CAS precondition"); - long timestamp = System.currentTimeMillis(); - ReadCommand readCommand = ReadCommand.create(keyspaceName, key, cfName, timestamp, request.readFilter()); - List rows = read(Arrays.asList(readCommand), consistencyForPaxos == ConsistencyLevel.LOCAL_SERIAL ? ConsistencyLevel.LOCAL_QUORUM : ConsistencyLevel.QUORUM); - ColumnFamily current = rows.get(0).cf; - if (!request.appliesTo(current)) - { - Tracing.trace("CAS precondition does not match current values {}", current); - // We should not return null as this means success - casWriteMetrics.conditionNotMet.inc(); - return current == null ? ArrayBackedSortedColumns.factory.create(metadata) : current; - } - - // finish the paxos round w/ the desired updates - // TODO turn null updates into delete? - ColumnFamily updates = request.makeUpdates(current); - - // Apply triggers to cas updates. A consideration here is that - // triggers emit Mutations, and so a given trigger implementation - // may generate mutations for partitions other than the one this - // paxos round is scoped for. In this case, TriggerExecutor will - // validate that the generated mutations are targetted at the same - // partition as the initial updates and reject (via an - // InvalidRequestException) any which aren't. - updates = TriggerExecutor.instance.execute(key, updates); - - Commit proposal = Commit.newProposal(key, ballot, updates); - Tracing.trace("CAS precondition is met; proposing client-requested updates for {}", ballot); - if (proposePaxos(proposal, liveEndpoints, requiredParticipants, true, consistencyForPaxos)) - { - commitPaxos(proposal, consistencyForCommit); - Tracing.trace("CAS successful"); - return null; - } - - Tracing.trace("Paxos proposal not accepted (pre-empted by a higher ballot)"); - contentions++; - Uninterruptibles.sleepUninterruptibly(ThreadLocalRandom.current().nextInt(100), TimeUnit.MILLISECONDS); - // continue to retry - } - - throw new WriteTimeoutException(WriteType.CAS, consistencyForPaxos, 0, consistencyForPaxos.blockFor(Keyspace.open(keyspaceName))); - } - catch (WriteTimeoutException|ReadTimeoutException e) - { - casWriteMetrics.timeouts.mark(); - throw e; - } - catch(UnavailableException e) - { - casWriteMetrics.unavailables.mark(); - throw e; - } - finally - { - if(contentions > 0) - casWriteMetrics.contention.update(contentions); - casWriteMetrics.addNano(System.nanoTime() - start); - } - } - - private static Predicate sameDCPredicateFor(final String dc) - { - final IEndpointSnitch snitch = DatabaseDescriptor.getEndpointSnitch(); - return new Predicate() - { - public boolean apply(InetAddress host) - { - return dc.equals(snitch.getDatacenter(host)); - } - }; - } - - private static Pair, Integer> getPaxosParticipants(String keyspaceName, ByteBuffer key, ConsistencyLevel consistencyForPaxos) throws UnavailableException - { - Token tk = StorageService.getPartitioner().getToken(key); - List naturalEndpoints = StorageService.instance.getNaturalEndpoints(keyspaceName, tk); - Collection pendingEndpoints = StorageService.instance.getTokenMetadata().pendingEndpointsFor(tk, keyspaceName); - - if (consistencyForPaxos == ConsistencyLevel.LOCAL_SERIAL) - { - // Restrict naturalEndpoints and pendingEndpoints to node in the local DC only - String localDc = DatabaseDescriptor.getEndpointSnitch().getDatacenter(FBUtilities.getBroadcastAddress()); - Predicate isLocalDc = sameDCPredicateFor(localDc); - naturalEndpoints = ImmutableList.copyOf(Iterables.filter(naturalEndpoints, isLocalDc)); - pendingEndpoints = ImmutableList.copyOf(Iterables.filter(pendingEndpoints, isLocalDc)); - } - int participants = pendingEndpoints.size() + naturalEndpoints.size(); - int requiredParticipants = participants + 1 / 2; // See CASSANDRA-833 - List liveEndpoints = ImmutableList.copyOf(Iterables.filter(Iterables.concat(naturalEndpoints, pendingEndpoints), IAsyncCallback.isAlive)); - if (liveEndpoints.size() < requiredParticipants) - throw new UnavailableException(consistencyForPaxos, requiredParticipants, liveEndpoints.size()); - - // We cannot allow CAS operations with 2 or more pending endpoints, see #8346. - // Note that we fake an impossible number of required nodes in the unavailable exception - // to nail home the point that it's an impossible operation no matter how many nodes are live. - if (pendingEndpoints.size() > 1) - throw new UnavailableException(String.format("Cannot perform LWT operation as there is more than one (%d) pending range movement", pendingEndpoints.size()), - consistencyForPaxos, - participants + 1, - liveEndpoints.size()); - - return Pair.create(liveEndpoints, requiredParticipants); - } - - /** - * begin a Paxos session by sending a prepare request and completing any in-progress requests seen in the replies - * - * @return the Paxos ballot promised by the replicas if no in-progress requests were seen and a quorum of - * nodes have seen the mostRecentCommit. Otherwise, return null. - */ - private static Pair beginAndRepairPaxos(long start, - ByteBuffer key, - CFMetaData metadata, - List liveEndpoints, - int requiredParticipants, - ConsistencyLevel consistencyForPaxos, - ConsistencyLevel consistencyForCommit, - final boolean isWrite, - ClientState state) - throws WriteTimeoutException - { - long timeout = TimeUnit.MILLISECONDS.toNanos(DatabaseDescriptor.getCasContentionTimeout()); - - PrepareCallback summary = null; - int contentions = 0; - while (System.nanoTime() - start < timeout) - { - // We don't want to use a timestamp that is older than the last one assigned by the ClientState or operations - // may appear out-of-order (#7801). But note that state.getTimestamp() is in microseconds while the ballot - // timestamp is only in milliseconds - long currentTime = (state.getTimestamp() / 1000) + 1; - long ballotMillis = summary == null - ? currentTime - : Math.max(currentTime, 1 + UUIDGen.unixTimestamp(summary.mostRecentInProgressCommit.ballot)); - UUID ballot = UUIDGen.getTimeUUID(ballotMillis); - - // prepare - Tracing.trace("Preparing {}", ballot); - Commit toPrepare = Commit.newPrepare(key, metadata, ballot); - summary = preparePaxos(toPrepare, liveEndpoints, requiredParticipants, consistencyForPaxos); - if (!summary.promised) - { - Tracing.trace("Some replicas have already promised a higher ballot than ours; aborting"); - contentions++; - // sleep a random amount to give the other proposer a chance to finish - Uninterruptibles.sleepUninterruptibly(ThreadLocalRandom.current().nextInt(100), TimeUnit.MILLISECONDS); - continue; - } - - Commit inProgress = summary.mostRecentInProgressCommitWithUpdate; - Commit mostRecent = summary.mostRecentCommit; - - // If we have an in-progress ballot greater than the MRC we know, then it's an in-progress round that - // needs to be completed, so do it. - if (!inProgress.update.isEmpty() && inProgress.isAfter(mostRecent)) - { - Tracing.trace("Finishing incomplete paxos round {}", inProgress); - if(isWrite) - casWriteMetrics.unfinishedCommit.inc(); - else - casReadMetrics.unfinishedCommit.inc(); - Commit refreshedInProgress = Commit.newProposal(inProgress.key, ballot, inProgress.update); - if (proposePaxos(refreshedInProgress, liveEndpoints, requiredParticipants, false, consistencyForPaxos)) - { - commitPaxos(refreshedInProgress, consistencyForCommit); - } - else - { - Tracing.trace("Some replicas have already promised a higher ballot than ours; aborting"); - // sleep a random amount to give the other proposer a chance to finish - contentions++; - Uninterruptibles.sleepUninterruptibly(ThreadLocalRandom.current().nextInt(100), TimeUnit.MILLISECONDS); - } - continue; - } - - // To be able to propose our value on a new round, we need a quorum of replica to have learn the previous one. Why is explained at: - // https://issues.apache.org/jira/browse/CASSANDRA-5062?focusedCommentId=13619810&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-13619810) - // Since we waited for quorum nodes, if some of them haven't seen the last commit (which may just be a timing issue, but may also - // mean we lost messages), we pro-actively "repair" those nodes, and retry. - Iterable missingMRC = summary.replicasMissingMostRecentCommit(); - if (Iterables.size(missingMRC) > 0) - { - Tracing.trace("Repairing replicas that missed the most recent commit"); - sendCommit(mostRecent, missingMRC); - // TODO: provided commits don't invalid the prepare we just did above (which they don't), we could just wait - // for all the missingMRC to acknowledge this commit and then move on with proposing our value. But that means - // adding the ability to have commitPaxos block, which is exactly CASSANDRA-5442 will do. So once we have that - // latter ticket, we can pass CL.ALL to the commit above and remove the 'continue'. - continue; - } - - // We might commit this ballot and we want to ensure operations starting after this CAS succeed will be assigned - // a timestamp greater that the one of this ballot, so operation order is preserved (#7801) - state.updateLastTimestamp(ballotMillis * 1000); - - return Pair.create(ballot, contentions); - } - - throw new WriteTimeoutException(WriteType.CAS, consistencyForPaxos, 0, consistencyForPaxos.blockFor(Keyspace.open(metadata.ksName))); - } - - /** - * Unlike commitPaxos, this does not wait for replies - */ - private static void sendCommit(Commit commit, Iterable replicas) - { - MessageOut message = new MessageOut(MessagingService.Verb.PAXOS_COMMIT, commit, Commit.serializer); - for (InetAddress target : replicas) - MessagingService.instance().sendOneWay(message, target); - } - - private static PrepareCallback preparePaxos(Commit toPrepare, List endpoints, int requiredParticipants, ConsistencyLevel consistencyForPaxos) - throws WriteTimeoutException - { - PrepareCallback callback = new PrepareCallback(toPrepare.key, toPrepare.update.metadata(), requiredParticipants, consistencyForPaxos); - MessageOut message = new MessageOut(MessagingService.Verb.PAXOS_PREPARE, toPrepare, Commit.serializer); - for (InetAddress target : endpoints) - MessagingService.instance().sendRR(message, target, callback); - callback.await(); - return callback; - } - - private static boolean proposePaxos(Commit proposal, List endpoints, int requiredParticipants, boolean timeoutIfPartial, ConsistencyLevel consistencyLevel) - throws WriteTimeoutException - { - ProposeCallback callback = new ProposeCallback(endpoints.size(), requiredParticipants, !timeoutIfPartial, consistencyLevel); - MessageOut message = new MessageOut(MessagingService.Verb.PAXOS_PROPOSE, proposal, Commit.serializer); - for (InetAddress target : endpoints) - MessagingService.instance().sendRR(message, target, callback); - - callback.await(); - - if (callback.isSuccessful()) - return true; - - if (timeoutIfPartial && !callback.isFullyRefused()) - throw new WriteTimeoutException(WriteType.CAS, consistencyLevel, callback.getAcceptCount(), requiredParticipants); - - return false; - } - - private static void commitPaxos(Commit proposal, ConsistencyLevel consistencyLevel) throws WriteTimeoutException - { - boolean shouldBlock = consistencyLevel != ConsistencyLevel.ANY; - Keyspace keyspace = Keyspace.open(proposal.update.metadata().ksName); - - Token tk = StorageService.getPartitioner().getToken(proposal.key); - List naturalEndpoints = StorageService.instance.getNaturalEndpoints(keyspace.getName(), tk); - Collection pendingEndpoints = StorageService.instance.getTokenMetadata().pendingEndpointsFor(tk, keyspace.getName()); - - AbstractWriteResponseHandler responseHandler = null; - if (shouldBlock) - { - AbstractReplicationStrategy rs = keyspace.getReplicationStrategy(); - responseHandler = rs.getWriteResponseHandler(naturalEndpoints, pendingEndpoints, consistencyLevel, null, WriteType.SIMPLE); - } - - MessageOut message = new MessageOut(MessagingService.Verb.PAXOS_COMMIT, proposal, Commit.serializer); - for (InetAddress destination : Iterables.concat(naturalEndpoints, pendingEndpoints)) - { - if (FailureDetector.instance.isAlive(destination)) - { - if (shouldBlock) - MessagingService.instance().sendRR(message, destination, responseHandler); - else - MessagingService.instance().sendOneWay(message, destination); - } - } - - if (shouldBlock) - responseHandler.get(); - } -#endif - - -future<> -storage_proxy::mutate_locally(const mutation& m, clock_type::time_point timeout) { - auto shard = _db.local().shard_of(m); - return _db.invoke_on(shard, [s = global_schema_ptr(m.schema()), m = freeze(m), timeout] (database& db) -> future<> { - return db.apply(s, m, timeout); - }); -} - -future<> -storage_proxy::mutate_locally(const schema_ptr& s, const frozen_mutation& m, clock_type::time_point timeout) { - auto shard = _db.local().shard_of(m); - return _db.invoke_on(shard, [&m, gs = global_schema_ptr(s), timeout] (database& db) -> future<> { - return db.apply(gs, m, timeout); - }); -} - -future<> -storage_proxy::mutate_locally(std::vector mutations, clock_type::time_point timeout) { - return do_with(std::move(mutations), [this, timeout] (std::vector& pmut){ - return parallel_for_each(pmut.begin(), pmut.end(), [this, timeout] (const mutation& m) { - return mutate_locally(m, timeout); - }); - }); -} - -future<> -storage_proxy::mutate_counters_on_leader(std::vector mutations, db::consistency_level cl, clock_type::time_point timeout, - tracing::trace_state_ptr trace_state) { - _stats.received_counter_updates += mutations.size(); - return do_with(std::move(mutations), [this, cl, timeout, trace_state = std::move(trace_state)] (std::vector& update_ms) mutable { - return parallel_for_each(update_ms, [this, cl, timeout, trace_state] (frozen_mutation_and_schema& fm_a_s) { - return mutate_counter_on_leader_and_replicate(fm_a_s.s, std::move(fm_a_s.fm), cl, timeout, trace_state); - }); - }); -} - -future<> -storage_proxy::mutate_counter_on_leader_and_replicate(const schema_ptr& s, frozen_mutation fm, db::consistency_level cl, clock_type::time_point timeout, - tracing::trace_state_ptr trace_state) { - auto shard = _db.local().shard_of(fm); - return _db.invoke_on(shard, [gs = global_schema_ptr(s), fm = std::move(fm), cl, timeout, gt = tracing::global_trace_state_ptr(std::move(trace_state))] (database& db) { - auto trace_state = gt.get(); - return db.apply_counter_update(gs, fm, timeout, trace_state).then([cl, timeout, trace_state] (mutation m) mutable { - return service::get_local_storage_proxy().replicate_counter_from_leader(std::move(m), cl, std::move(trace_state), timeout); - }); - }); -} - -future<> -storage_proxy::mutate_streaming_mutation(const schema_ptr& s, utils::UUID plan_id, const frozen_mutation& m, bool fragmented) { - auto shard = _db.local().shard_of(m); - return _db.invoke_on(shard, [&m, plan_id, fragmented, gs = global_schema_ptr(s)] (database& db) mutable -> future<> { - return db.apply_streaming_mutation(gs, plan_id, m, fragmented); - }); -} - - -/** - * Helper for create_write_response_handler, shared across mutate/mutate_atomically. - * Both methods do roughly the same thing, with the latter intermixing batch log ops - * in the logic. - * Since ordering is (maybe?) significant, we need to carry some info across from here - * to the hint method below (dead nodes). - */ -storage_proxy::response_id_type -storage_proxy::create_write_response_handler(const mutation& m, db::consistency_level cl, db::write_type type, tracing::trace_state_ptr tr_state) { - auto keyspace_name = m.schema()->ks_name(); - keyspace& ks = _db.local().find_keyspace(keyspace_name); - auto& rs = ks.get_replication_strategy(); - std::vector natural_endpoints = rs.get_natural_endpoints(m.token()); - std::vector pending_endpoints = - get_local_storage_service().get_token_metadata().pending_endpoints_for(m.token(), keyspace_name); - - slogger.trace("creating write handler for token: {} natural: {} pending: {}", m.token(), natural_endpoints, pending_endpoints); - tracing::trace(tr_state, "Creating write handler for token: {} natural: {} pending: {}", m.token(), natural_endpoints ,pending_endpoints); - - // filter out naturale_endpoints from pending_endpoint if later is not yet updated during node join - auto itend = boost::range::remove_if(pending_endpoints, [&natural_endpoints] (gms::inet_address& p) { - return boost::range::find(natural_endpoints, p) != natural_endpoints.end(); - }); - pending_endpoints.erase(itend, pending_endpoints.end()); - - auto all = boost::range::join(natural_endpoints, pending_endpoints); - - if (std::find_if(all.begin(), all.end(), std::bind1st(std::mem_fn(&storage_proxy::cannot_hint), this)) != all.end()) { - // avoid OOMing due to excess hints. we need to do this check even for "live" nodes, since we can - // still generate hints for those if it's overloaded or simply dead but not yet known-to-be-dead. - // The idea is that if we have over maxHintsInProgress hints in flight, this is probably due to - // a small number of nodes causing problems, so we should avoid shutting down writes completely to - // healthy nodes. Any node with no hintsInProgress is considered healthy. - throw overloaded_exception(_total_hints_in_progress); - } - - // filter live endpoints from dead ones - std::unordered_set live_endpoints; - std::vector dead_endpoints; - live_endpoints.reserve(all.size()); - dead_endpoints.reserve(all.size()); - std::partition_copy(all.begin(), all.end(), std::inserter(live_endpoints, live_endpoints.begin()), std::back_inserter(dead_endpoints), - std::bind1st(std::mem_fn(&gms::failure_detector::is_alive), &gms::get_local_failure_detector())); - - slogger.trace("creating write handler with live: {} dead: {}", live_endpoints, dead_endpoints); - tracing::trace(tr_state, "Creating write handler with live: {} dead: {}", live_endpoints, dead_endpoints); - - db::assure_sufficient_live_nodes(cl, ks, live_endpoints, pending_endpoints); - - return create_write_response_handler(ks, cl, type, std::make_unique(m), std::move(live_endpoints), pending_endpoints, std::move(dead_endpoints), std::move(tr_state)); -} - -storage_proxy::response_id_type -storage_proxy::create_write_response_handler(const std::unordered_map>& m, db::consistency_level cl, db::write_type type, tracing::trace_state_ptr tr_state) { - std::unordered_set endpoints(m.size()); - boost::copy(m | boost::adaptors::map_keys, std::inserter(endpoints, endpoints.begin())); - auto mh = std::make_unique(m); - - slogger.trace("creating write handler for read repair token: {} endpoint: {}", mh->token(), endpoints); - tracing::trace(tr_state, "Creating write handler for read repair token: {} endpoint: {}", mh->token(), endpoints); - - auto keyspace_name = mh->schema()->ks_name(); - keyspace& ks = _db.local().find_keyspace(keyspace_name); - - return create_write_response_handler(ks, cl, type, std::move(mh), std::move(endpoints), std::vector(), std::vector(), std::move(tr_state)); -} - -void -storage_proxy::hint_to_dead_endpoints(response_id_type id, db::consistency_level cl) { - auto& h = *get_write_response_handler(id); - - size_t hints = hint_to_dead_endpoints(h._mutation_holder, h.get_dead_endpoints()); - - if (cl == db::consistency_level::ANY) { - // for cl==ANY hints are counted towards consistency - h.signal(hints); - } -} - -template -future> storage_proxy::mutate_prepare(const Range& mutations, db::consistency_level cl, db::write_type type, CreateWriteHandler create_handler) { - // apply is used to convert exceptions to exceptional future - return futurize>::apply([this] (const Range& mutations, db::consistency_level cl, db::write_type type, CreateWriteHandler create_handler) { - std::vector ids; - ids.reserve(std::distance(std::begin(mutations), std::end(mutations))); - for (auto& m : mutations) { - ids.emplace_back(*this, create_handler(m, cl, type)); - } - return make_ready_future>(std::move(ids)); - }, mutations, cl, type, std::move(create_handler)); -} - -template -future> storage_proxy::mutate_prepare(const Range& mutations, db::consistency_level cl, db::write_type type, tracing::trace_state_ptr tr_state) { - return mutate_prepare<>(mutations, cl, type, [this, tr_state = std::move(tr_state)] (const typename Range::value_type& m, db::consistency_level cl, db::write_type type) mutable { - return create_write_response_handler(m, cl, type, tr_state); - }); -} - -future<> storage_proxy::mutate_begin(std::vector ids, db::consistency_level cl, - stdx::optional timeout_opt) { - return parallel_for_each(ids, [this, cl, timeout_opt] (unique_response_handler& protected_response) { - auto response_id = protected_response.id; - // it is better to send first and hint afterwards to reduce latency - // but request may complete before hint_to_dead_endpoints() is called and - // response_id handler will be removed, so we will have to do hint with separate - // frozen_mutation copy, or manage handler live time differently. - hint_to_dead_endpoints(response_id, cl); - - auto timeout = timeout_opt.value_or(clock_type::now() + std::chrono::milliseconds(_db.local().get_config().write_request_timeout_in_ms())); - // call before send_to_live_endpoints() for the same reason as above - auto f = response_wait(response_id, timeout); - send_to_live_endpoints(protected_response.release(), timeout); // response is now running and it will either complete or timeout - return std::move(f); - }); -} - -// this function should be called with a future that holds result of mutation attempt (usually -// future returned by mutate_begin()). The future should be ready when function is called. -future<> storage_proxy::mutate_end(future<> mutate_result, utils::latency_counter lc, tracing::trace_state_ptr trace_state) { - assert(mutate_result.available()); - _stats.write.mark(lc.stop().latency()); - if (lc.is_start()) { - _stats.estimated_write.add(lc.latency(), _stats.write.hist.count); - } - try { - mutate_result.get(); - tracing::trace(trace_state, "Mutation successfully completed"); - return make_ready_future<>(); - } catch (no_such_keyspace& ex) { - tracing::trace(trace_state, "Mutation failed: write to non existing keyspace: {}", ex.what()); - slogger.trace("Write to non existing keyspace: {}", ex.what()); - return make_exception_future<>(std::current_exception()); - } catch(mutation_write_timeout_exception& ex) { - // timeout - tracing::trace(trace_state, "Mutation failed: write timeout; received {:d} of {:d} required replies", ex.received, ex.block_for); - slogger.debug("Write timeout; received {} of {} required replies", ex.received, ex.block_for); - _stats.write_timeouts.mark(); - return make_exception_future<>(std::current_exception()); - } catch (exceptions::unavailable_exception& ex) { - tracing::trace(trace_state, "Mutation failed: unavailable"); - _stats.write_unavailables.mark(); - slogger.trace("Unavailable"); - return make_exception_future<>(std::current_exception()); - } catch(overloaded_exception& ex) { - tracing::trace(trace_state, "Mutation failed: overloaded"); - _stats.write_unavailables.mark(); - slogger.trace("Overloaded"); - return make_exception_future<>(std::current_exception()); - } catch (...) { - tracing::trace(trace_state, "Mutation failed: unknown reason"); - throw; - } -} - -gms::inet_address storage_proxy::find_leader_for_counter_update(const mutation& m, db::consistency_level cl) { - auto& ks = _db.local().find_keyspace(m.schema()->ks_name()); - auto live_endpoints = get_live_endpoints(ks, m.token()); - - if (live_endpoints.empty()) { - throw exceptions::unavailable_exception(cl, block_for(ks, cl), 0); - } - - auto local_endpoints = boost::copy_range>(live_endpoints | boost::adaptors::filtered([&] (auto&& ep) { - return db::is_local(ep); - })); - if (local_endpoints.empty()) { - // FIXME: O(n log n) to get maximum - auto& snitch = locator::i_endpoint_snitch::get_local_snitch_ptr(); - snitch->sort_by_proximity(utils::fb_utilities::get_broadcast_address(), live_endpoints); - return live_endpoints[0]; - } else { - // FIXME: favour ourselves to avoid additional hop? - static thread_local std::random_device rd; - static thread_local std::default_random_engine re(rd()); - std::uniform_int_distribution<> dist(0, local_endpoints.size() - 1); - return local_endpoints[dist(re)]; - } -} - -template -future<> storage_proxy::mutate_counters(Range&& mutations, db::consistency_level cl, tracing::trace_state_ptr tr_state) { - if (boost::empty(mutations)) { - return make_ready_future<>(); - } - - slogger.trace("mutate_counters cl={}", cl); - mlogger.trace("counter mutations={}", mutations); - - - // Choose a leader for each mutation - std::unordered_map> leaders; - for (auto& m : mutations) { - auto leader = find_leader_for_counter_update(m, cl); - leaders[leader].emplace_back(frozen_mutation_and_schema { freeze(m), m.schema() }); - // FIXME: check if CL can be reached - } - - // Forward mutations to the leaders chosen for them - auto timeout = clock_type::now() + std::chrono::milliseconds(_db.local().get_config().counter_write_request_timeout_in_ms()); - auto my_address = utils::fb_utilities::get_broadcast_address(); - return parallel_for_each(leaders, [this, cl, timeout, tr_state = std::move(tr_state), my_address] (auto& endpoint_and_mutations) { - auto endpoint = endpoint_and_mutations.first; - - // The leader receives a vector of mutations and processes them together, - // so if there is a timeout we don't really know which one is to "blame" - // and what to put in ks and cf fields of write timeout exception. - // Let's just use the schema of the first mutation in a vector. - auto handle_error = [this, sp = this->shared_from_this(), s = endpoint_and_mutations.second[0].s, cl] (std::exception_ptr exp) { - auto& ks = _db.local().find_keyspace(s->ks_name()); - try { - std::rethrow_exception(std::move(exp)); - } catch (rpc::timeout_error&) { - return make_exception_future<>(mutation_write_timeout_exception(s->ks_name(), s->cf_name(), cl, 0, db::block_for(ks, cl), db::write_type::COUNTER)); - } catch (timed_out_error&) { - return make_exception_future<>(mutation_write_timeout_exception(s->ks_name(), s->cf_name(), cl, 0, db::block_for(ks, cl), db::write_type::COUNTER)); - } - }; - - auto f = make_ready_future<>(); - if (endpoint == my_address) { - f = this->mutate_counters_on_leader(std::move(endpoint_and_mutations.second), cl, timeout, tr_state); - } else { - auto& mutations = endpoint_and_mutations.second; - auto fms = boost::copy_range>(mutations | boost::adaptors::transformed([] (auto& m) { - return std::move(m.fm); - })); - - auto& ms = netw::get_local_messaging_service(); - auto msg_addr = netw::messaging_service::msg_addr{ endpoint_and_mutations.first, 0 }; - tracing::trace(tr_state, "Enqueuing counter update to {}", msg_addr); - f = ms.send_counter_mutation(msg_addr, timeout, std::move(fms), cl, tracing::make_trace_info(tr_state)); - } - return f.handle_exception(std::move(handle_error)); - }); -} - -struct mutate_executor { - static auto get() { return &storage_proxy::do_mutate; } -}; -static thread_local auto mutate_stage = seastar::make_execution_stage("storage_proxy_mutate", mutate_executor::get()); - -/** - * Use this method to have these Mutations applied - * across all replicas. This method will take care - * of the possibility of a replica being down and hint - * the data across to some other replica. - * - * @param mutations the mutations to be applied across the replicas - * @param consistency_level the consistency level for the operation - * @param tr_state trace state handle - */ -future<> storage_proxy::mutate(std::vector mutations, db::consistency_level cl, tracing::trace_state_ptr tr_state, bool raw_counters) { - return mutate_stage(this, std::move(mutations), cl, std::move(tr_state), raw_counters); -} - -future<> storage_proxy::do_mutate(std::vector mutations, db::consistency_level cl, tracing::trace_state_ptr tr_state, bool raw_counters) { - auto mid = raw_counters ? mutations.begin() : boost::range::partition(mutations, [] (auto&& m) { - return m.schema()->is_counter(); - }); - return seastar::when_all_succeed( - mutate_counters(boost::make_iterator_range(mutations.begin(), mid), cl, tr_state), - mutate_internal(boost::make_iterator_range(mid, mutations.end()), cl, false, tr_state) - ); -} - -future<> storage_proxy::replicate_counter_from_leader(mutation m, db::consistency_level cl, tracing::trace_state_ptr tr_state, - clock_type::time_point timeout) { - // FIXME: do not send the mutation to itself, it has already been applied (it is not incorrect to do so, though) - return mutate_internal(std::array{std::move(m)}, cl, true, std::move(tr_state), timeout); -} - -/* - * Range template parameter can either be range of 'mutation' or a range of 'std::unordered_map'. - * create_write_response_handler() has specialization for both types. The one for the former uses keyspace to figure out - * endpoints to send mutation to, the one for the late uses enpoints that are used as keys for the map. - */ -template -future<> -storage_proxy::mutate_internal(Range mutations, db::consistency_level cl, bool counters, tracing::trace_state_ptr tr_state, - stdx::optional timeout_opt) { - if (boost::empty(mutations)) { - return make_ready_future<>(); - } - - slogger.trace("mutate cl={}", cl); - mlogger.trace("mutations={}", mutations); - - // If counters is set it means that we are replicating counter shards. There - // is no need for special handling anymore, since the leader has already - // done its job, but we need to return correct db::write_type in case of - // a timeout so that client doesn't attempt to retry the request. - auto type = counters ? db::write_type::COUNTER - : (std::next(std::begin(mutations)) == std::end(mutations) ? db::write_type::SIMPLE : db::write_type::UNLOGGED_BATCH); - utils::latency_counter lc; - lc.start(); - - return mutate_prepare(mutations, cl, type, tr_state).then([this, cl, timeout_opt] (std::vector ids) { - return mutate_begin(std::move(ids), cl, timeout_opt); - }).then_wrapped([p = shared_from_this(), lc, tr_state] (future<> f) mutable { - return p->mutate_end(std::move(f), lc, std::move(tr_state)); - }); -} - -future<> -storage_proxy::mutate_with_triggers(std::vector mutations, db::consistency_level cl, - bool should_mutate_atomically, tracing::trace_state_ptr tr_state, bool raw_counters) { - warn(unimplemented::cause::TRIGGERS); -#if 0 - Collection augmented = TriggerExecutor.instance.execute(mutations); - if (augmented != null) { - return mutate_atomically(augmented, consistencyLevel); - } else { -#endif - if (should_mutate_atomically) { - assert(!raw_counters); - return mutate_atomically(std::move(mutations), cl, std::move(tr_state)); - } - return mutate(std::move(mutations), cl, std::move(tr_state), raw_counters); -#if 0 - } -#endif -} - -/** - * See mutate. Adds additional steps before and after writing a batch. - * Before writing the batch (but after doing availability check against the FD for the row replicas): - * write the entire batch to a batchlog elsewhere in the cluster. - * After: remove the batchlog entry (after writing hints for the batch rows, if necessary). - * - * @param mutations the Mutations to be applied across the replicas - * @param consistency_level the consistency level for the operation - */ -future<> -storage_proxy::mutate_atomically(std::vector mutations, db::consistency_level cl, tracing::trace_state_ptr tr_state) { - - utils::latency_counter lc; - lc.start(); - - class context { - storage_proxy& _p; - std::vector _mutations; - db::consistency_level _cl; - tracing::trace_state_ptr _trace_state; - - const utils::UUID _batch_uuid; - const std::unordered_set _batchlog_endpoints; - - public: - context(storage_proxy & p, std::vector&& mutations, db::consistency_level cl, tracing::trace_state_ptr tr_state) - : _p(p) - , _mutations(std::move(mutations)) - , _cl(cl) - , _trace_state(std::move(tr_state)) - , _batch_uuid(utils::UUID_gen::get_time_UUID()) - , _batchlog_endpoints( - [this]() -> std::unordered_set { - auto local_addr = utils::fb_utilities::get_broadcast_address(); - auto topology = service::get_storage_service().local().get_token_metadata().get_topology(); - auto local_endpoints = topology.get_datacenter_racks().at(get_local_dc()); // note: origin copies, so do that here too... - auto local_rack = locator::i_endpoint_snitch::get_local_snitch_ptr()->get_rack(local_addr); - auto chosen_endpoints = db::get_batchlog_manager().local().endpoint_filter(local_rack, local_endpoints); - - if (chosen_endpoints.empty()) { - if (_cl == db::consistency_level::ANY) { - return {local_addr}; - } - throw exceptions::unavailable_exception(db::consistency_level::ONE, 1, 0); - } - return chosen_endpoints; - }()) { - tracing::trace(_trace_state, "Created a batch context"); - tracing::set_batchlog_endpoints(_trace_state, _batchlog_endpoints); - } - - future<> send_batchlog_mutation(mutation m, db::consistency_level cl = db::consistency_level::ONE) { - return _p.mutate_prepare<>(std::array{std::move(m)}, cl, db::write_type::BATCH_LOG, [this] (const mutation& m, db::consistency_level cl, db::write_type type) { - auto& ks = _p._db.local().find_keyspace(m.schema()->ks_name()); - return _p.create_write_response_handler(ks, cl, type, std::make_unique(m), _batchlog_endpoints, {}, {}, _trace_state); - }).then([this, cl] (std::vector ids) { - return _p.mutate_begin(std::move(ids), cl); - }); - } - future<> sync_write_to_batchlog() { - auto m = db::get_batchlog_manager().local().get_batch_log_mutation_for(_mutations, _batch_uuid, netw::messaging_service::current_version); - tracing::trace(_trace_state, "Sending a batchlog write mutation"); - return send_batchlog_mutation(std::move(m)); - }; - future<> async_remove_from_batchlog() { - // delete batch - auto schema = _p._db.local().find_schema(db::system_keyspace::NAME, db::system_keyspace::BATCHLOG); - auto key = partition_key::from_exploded(*schema, {uuid_type->decompose(_batch_uuid)}); - auto now = service::client_state(service::client_state::internal_tag()).get_timestamp(); - mutation m(key, schema); - m.partition().apply_delete(*schema, clustering_key_prefix::make_empty(), tombstone(now, gc_clock::now())); - - tracing::trace(_trace_state, "Sending a batchlog remove mutation"); - return send_batchlog_mutation(std::move(m), db::consistency_level::ANY).handle_exception([] (std::exception_ptr eptr) { - slogger.error("Failed to remove mutations from batchlog: {}", eptr); - }); - }; - - future<> run() { - return _p.mutate_prepare(_mutations, _cl, db::write_type::BATCH, _trace_state).then([this] (std::vector ids) { - return sync_write_to_batchlog().then([this, ids = std::move(ids)] () mutable { - tracing::trace(_trace_state, "Sending batch mutations"); - return _p.mutate_begin(std::move(ids), _cl); - }).then(std::bind(&context::async_remove_from_batchlog, this)); - }); - } - }; - - auto mk_ctxt = [this, tr_state] (std::vector mutations, db::consistency_level cl) mutable { - try { - return make_ready_future>(make_lw_shared(*this, std::move(mutations), cl, std::move(tr_state))); - } catch(...) { - return make_exception_future>(std::current_exception()); - } - }; - - return mk_ctxt(std::move(mutations), cl).then([this] (lw_shared_ptr ctxt) { - return ctxt->run().finally([ctxt]{}); - }).then_wrapped([p = shared_from_this(), lc, tr_state = std::move(tr_state)] (future<> f) mutable { - return p->mutate_end(std::move(f), lc, std::move(tr_state)); - }); -} - -bool storage_proxy::cannot_hint(gms::inet_address target) { - return _total_hints_in_progress > _max_hints_in_progress - && (get_hints_in_progress_for(target) > 0 && should_hint(target)); -} - -future<> storage_proxy::send_to_endpoint(mutation m, gms::inet_address target, db::write_type type) { - utils::latency_counter lc; - lc.start(); - - return mutate_prepare(std::array{std::move(m)}, db::consistency_level::ONE, type, - [this, target] (const mutation& m, db::consistency_level cl, db::write_type type) { - auto& ks = _db.local().find_keyspace(m.schema()->ks_name()); - return create_write_response_handler(ks, cl, type, std::make_unique(m), {target}, {}, {}, nullptr); - }).then([this] (std::vector ids) { - return mutate_begin(std::move(ids), db::consistency_level::ONE); - }).then_wrapped([p = shared_from_this(), lc] (future<>&& f) { - return p->mutate_end(std::move(f), lc, nullptr); - }); -} - -/** - * Send the mutations to the right targets, write it locally if it corresponds or writes a hint when the node - * is not available. - * - * Note about hints: - * - * | Hinted Handoff | Consist. Level | - * | on | >=1 | --> wait for hints. We DO NOT notify the handler with handler.response() for hints; - * | on | ANY | --> wait for hints. Responses count towards consistency. - * | off | >=1 | --> DO NOT fire hints. And DO NOT wait for them to complete. - * | off | ANY | --> DO NOT fire hints. And DO NOT wait for them to complete. - * - * @throws OverloadedException if the hints cannot be written/enqueued - */ - // returned future is ready when sent is complete, not when mutation is executed on all (or any) targets! -void storage_proxy::send_to_live_endpoints(storage_proxy::response_id_type response_id, clock_type::time_point timeout) -{ - // extra-datacenter replicas, grouped by dc - std::unordered_map> dc_groups; - std::vector>> local; - local.reserve(3); - - auto handler_ptr = get_write_response_handler(response_id); - auto& handler = *handler_ptr; - - for(auto dest: handler.get_targets()) { - sstring dc = get_dc(dest); - // read repair writes do not go through coordinator since mutations are per destination - if (handler.read_repair_write() || dc == get_local_dc()) { - local.emplace_back("", std::vector({dest})); - } else { - dc_groups[dc].push_back(dest); - } - } - - auto all = boost::range::join(local, dc_groups); - auto my_address = utils::fb_utilities::get_broadcast_address(); - - // lambda for applying mutation locally - auto lmutate = [handler_ptr, response_id, this, my_address, timeout] (lw_shared_ptr m) mutable { - tracing::trace(handler_ptr->get_trace_state(), "Executing a mutation locally"); - auto s = handler_ptr->get_schema(); - return mutate_locally(std::move(s), *m, timeout).then([response_id, this, my_address, m, h = std::move(handler_ptr), p = shared_from_this()] { - // make mutation alive until it is processed locally, otherwise it - // may disappear if write timeouts before this future is ready - got_response(response_id, my_address); - }); - }; - - // lambda for applying mutation remotely - auto rmutate = [this, handler_ptr, timeout, response_id, my_address] (gms::inet_address coordinator, std::vector&& forward, const frozen_mutation& m) { - auto& ms = netw::get_local_messaging_service(); - auto msize = m.representation().size(); - _stats.queued_write_bytes += msize; - - auto& tr_state = handler_ptr->get_trace_state(); - tracing::trace(tr_state, "Sending a mutation to /{}", coordinator); - - return ms.send_mutation(netw::messaging_service::msg_addr{coordinator, 0}, timeout, m, - std::move(forward), my_address, engine().cpu_id(), response_id, tracing::make_trace_info(tr_state)).finally([this, p = shared_from_this(), h = std::move(handler_ptr), msize] { - _stats.queued_write_bytes -= msize; - unthrottle(); - }); - }; - - // OK, now send and/or apply locally - for (typename decltype(dc_groups)::value_type& dc_targets : all) { - auto& forward = dc_targets.second; - // last one in forward list is a coordinator - auto coordinator = forward.back(); - forward.pop_back(); - - future<> f = make_ready_future<>(); - - - lw_shared_ptr m = handler.get_mutation_for(coordinator); - - if (!m || (handler.is_counter() && coordinator == my_address)) { - got_response(response_id, coordinator); - } else { - if (!handler.read_repair_write()) { - ++_stats.writes_attempts.get_ep_stat(coordinator); - } else { - ++_stats.read_repair_write_attempts.get_ep_stat(coordinator); - } - - if (coordinator == my_address) { - f = futurize::apply(lmutate, std::move(m)); - } else { - f = futurize::apply(rmutate, coordinator, std::move(forward), *m); - } - } - - f.handle_exception([coordinator, p = shared_from_this()] (std::exception_ptr eptr) { - ++p->_stats.writes_errors.get_ep_stat(coordinator); - try { - std::rethrow_exception(eptr); - } catch(rpc::closed_error&) { - // ignore, disconnect will be logged by gossiper - } catch(seastar::gate_closed_exception&) { - // may happen during shutdown, ignore it - } catch(timed_out_error&) { - // from lmutate(). Ignore so that logs are not flooded - // database total_writes_timedout counter was incremented. - } catch(...) { - slogger.error("exception during mutation write to {}: {}", coordinator, std::current_exception()); - } - }); - } -} - -// returns number of hints stored -template -size_t storage_proxy::hint_to_dead_endpoints(std::unique_ptr& mh, const Range& targets) noexcept -{ - return boost::count_if(targets | boost::adaptors::filtered(std::bind1st(std::mem_fn(&storage_proxy::should_hint), this)), - std::bind(std::mem_fn(&storage_proxy::submit_hint), this, std::ref(mh), std::placeholders::_1)); -} - -size_t storage_proxy::get_hints_in_progress_for(gms::inet_address target) { - auto it = _hints_in_progress.find(target); - - if (it == _hints_in_progress.end()) { - return 0; - } - - return it->second; -} - -bool storage_proxy::submit_hint(std::unique_ptr& mh, gms::inet_address target) -{ - warn(unimplemented::cause::HINT); - // local write that time out should be handled by LocalMutationRunnable - assert(is_me(target)); - return false; -#if 0 - HintRunnable runnable = new HintRunnable(target) - { - public void runMayThrow() - { - int ttl = HintedHandOffManager.calculateHintTTL(mutation); - if (ttl > 0) - { - slogger.debug("Adding hint for {}", target); - writeHintForMutation(mutation, System.currentTimeMillis(), ttl, target); - // Notify the handler only for CL == ANY - if (responseHandler != null && responseHandler.consistencyLevel == ConsistencyLevel.ANY) - responseHandler.response(null); - } else - { - slogger.debug("Skipped writing hint for {} (ttl {})", target, ttl); - } - } - }; - - return submitHint(runnable); -#endif -} - -#if 0 - private static Future submitHint(HintRunnable runnable) - { - StorageMetrics.totalHintsInProgress.inc(); - getHintsInProgressFor(runnable.target).incrementAndGet(); - return (Future) StageManager.getStage(Stage.MUTATION).submit(runnable); - } - - /** - * @param now current time in milliseconds - relevant for hint replay handling of truncated CFs - */ - public static void writeHintForMutation(Mutation mutation, long now, int ttl, InetAddress target) - { - assert ttl > 0; - UUID hostId = StorageService.instance.getTokenMetadata().getHostId(target); - assert hostId != null : "Missing host ID for " + target.getHostAddress(); - HintedHandOffManager.instance.hintFor(mutation, now, ttl, hostId).apply(); - StorageMetrics.totalHints.inc(); - } - - /** - * Handle counter mutation on the coordinator host. - * - * A counter mutation needs to first be applied to a replica (that we'll call the leader for the mutation) before being - * replicated to the other endpoint. To achieve so, there is two case: - * 1) the coordinator host is a replica: we proceed to applying the update locally and replicate throug - * applyCounterMutationOnCoordinator - * 2) the coordinator is not a replica: we forward the (counter)mutation to a chosen replica (that will proceed through - * applyCounterMutationOnLeader upon receive) and wait for its acknowledgment. - * - * Implementation note: We check if we can fulfill the CL on the coordinator host even if he is not a replica to allow - * quicker response and because the WriteResponseHandlers don't make it easy to send back an error. We also always gather - * the write latencies at the coordinator node to make gathering point similar to the case of standard writes. - */ - public static AbstractWriteResponseHandler mutateCounter(CounterMutation cm, String localDataCenter) throws UnavailableException, OverloadedException - { - InetAddress endpoint = findSuitableEndpoint(cm.getKeyspaceName(), cm.key(), localDataCenter, cm.consistency()); - - if (endpoint.equals(FBUtilities.getBroadcastAddress())) - { - return applyCounterMutationOnCoordinator(cm, localDataCenter); - } - else - { - // Exit now if we can't fulfill the CL here instead of forwarding to the leader replica - String keyspaceName = cm.getKeyspaceName(); - AbstractReplicationStrategy rs = Keyspace.open(keyspaceName).getReplicationStrategy(); - Token tk = StorageService.getPartitioner().getToken(cm.key()); - List naturalEndpoints = StorageService.instance.getNaturalEndpoints(keyspaceName, tk); - Collection pendingEndpoints = StorageService.instance.getTokenMetadata().pendingEndpointsFor(tk, keyspaceName); - - rs.getWriteResponseHandler(naturalEndpoints, pendingEndpoints, cm.consistency(), null, WriteType.COUNTER).assureSufficientLiveNodes(); - - // Forward the actual update to the chosen leader replica - AbstractWriteResponseHandler responseHandler = new WriteResponseHandler(endpoint, WriteType.COUNTER); - - Tracing.trace("Enqueuing counter update to {}", endpoint); - MessagingService.instance().sendRR(cm.makeMutationMessage(), endpoint, responseHandler, false); - return responseHandler; - } - } - - /** - * Find a suitable replica as leader for counter update. - * For now, we pick a random replica in the local DC (or ask the snitch if - * there is no replica alive in the local DC). - * TODO: if we track the latency of the counter writes (which makes sense - * contrarily to standard writes since there is a read involved), we could - * trust the dynamic snitch entirely, which may be a better solution. It - * is unclear we want to mix those latencies with read latencies, so this - * may be a bit involved. - */ - private static InetAddress findSuitableEndpoint(String keyspaceName, ByteBuffer key, String localDataCenter, ConsistencyLevel cl) throws UnavailableException - { - Keyspace keyspace = Keyspace.open(keyspaceName); - IEndpointSnitch snitch = DatabaseDescriptor.getEndpointSnitch(); - List endpoints = StorageService.instance.getLiveNaturalEndpoints(keyspace, key); - if (endpoints.isEmpty()) - // TODO have a way to compute the consistency level - throw new UnavailableException(cl, cl.blockFor(keyspace), 0); - - List localEndpoints = new ArrayList(); - for (InetAddress endpoint : endpoints) - { - if (snitch.getDatacenter(endpoint).equals(localDataCenter)) - localEndpoints.add(endpoint); - } - if (localEndpoints.isEmpty()) - { - // No endpoint in local DC, pick the closest endpoint according to the snitch - snitch.sortByProximity(FBUtilities.getBroadcastAddress(), endpoints); - return endpoints.get(0); - } - else - { - return localEndpoints.get(ThreadLocalRandom.current().nextInt(localEndpoints.size())); - } - } - - // Must be called on a replica of the mutation. This replica becomes the - // leader of this mutation. - public static AbstractWriteResponseHandler applyCounterMutationOnLeader(CounterMutation cm, String localDataCenter, Runnable callback) - throws UnavailableException, OverloadedException - { - return performWrite(cm, cm.consistency(), localDataCenter, counterWritePerformer, callback, WriteType.COUNTER); - } - - // Same as applyCounterMutationOnLeader but must with the difference that it use the MUTATION stage to execute the write (while - // applyCounterMutationOnLeader assumes it is on the MUTATION stage already) - public static AbstractWriteResponseHandler applyCounterMutationOnCoordinator(CounterMutation cm, String localDataCenter) - throws UnavailableException, OverloadedException - { - return performWrite(cm, cm.consistency(), localDataCenter, counterWriteOnCoordinatorPerformer, null, WriteType.COUNTER); - } - - private static Runnable counterWriteTask(final IMutation mutation, - final Iterable targets, - final AbstractWriteResponseHandler responseHandler, - final String localDataCenter) - { - return new DroppableRunnable(MessagingService.Verb.COUNTER_MUTATION) - { - @Override - public void runMayThrow() throws OverloadedException, WriteTimeoutException - { - IMutation processed = SinkManager.processWriteRequest(mutation); - if (processed == null) - return; - - assert processed instanceof CounterMutation; - CounterMutation cm = (CounterMutation) processed; - - Mutation result = cm.apply(); - responseHandler.response(null); - - Set remotes = Sets.difference(ImmutableSet.copyOf(targets), - ImmutableSet.of(FBUtilities.getBroadcastAddress())); - if (!remotes.isEmpty()) - sendToHintedEndpoints(result, remotes, responseHandler, localDataCenter); - } - }; - } - - private static boolean systemKeyspaceQuery(List cmds) - { - for (ReadCommand cmd : cmds) - if (!cmd.ksName.equals(SystemKeyspace.NAME)) - return false; - return true; - } -#endif - -future<> storage_proxy::schedule_repair(std::unordered_map>> diffs, db::consistency_level cl, tracing::trace_state_ptr trace_state) { - if (diffs.empty()) { - return make_ready_future<>(); - } - return mutate_internal(diffs | boost::adaptors::map_values, cl, false, std::move(trace_state)); -} - -class abstract_read_resolver { -protected: - db::consistency_level _cl; - size_t _targets_count; - promise<> _done_promise; // all target responded - bool _timedout = false; // will be true if request timeouts - timer _timeout; - size_t _responses = 0; - schema_ptr _schema; - - virtual void on_timeout() {} - virtual size_t response_count() const = 0; -public: - abstract_read_resolver(schema_ptr schema, db::consistency_level cl, size_t target_count, storage_proxy::clock_type::time_point timeout) - : _cl(cl) - , _targets_count(target_count) - , _schema(std::move(schema)) - { - _timeout.set_callback([this] { - _timedout = true; - _done_promise.set_exception(read_timeout_exception(_schema->ks_name(), _schema->cf_name(), _cl, response_count(), _targets_count, _responses != 0)); - on_timeout(); - }); - _timeout.arm(timeout); - } - virtual ~abstract_read_resolver() {}; - future<> done() { - return _done_promise.get_future(); - } - virtual void error(gms::inet_address ep, std::exception_ptr eptr) { - sstring why; - try { - std::rethrow_exception(eptr); - } catch (rpc::closed_error&) { - return; // do not report connection closed exception, gossiper does that - } catch (rpc::timeout_error&) { - return; // do not report timeouts, the whole operation will timeout and be reported - } catch(std::exception& e) { - why = e.what(); - } catch(...) { - why = "Unknown exception"; - } - - // do nothing other than log for now, request will timeout eventually - slogger.error("Exception when communicating with {}: {}", ep, why); - } -}; - -class digest_read_resolver : public abstract_read_resolver { - size_t _block_for; - size_t _cl_responses = 0; - promise>, bool> _cl_promise; // cl is reached - bool _cl_reported = false; - foreign_ptr> _data_result; - std::vector _digest_results; - api::timestamp_type _last_modified = api::missing_timestamp; - - virtual void on_timeout() override { - if (!_cl_reported) { - _cl_promise.set_exception(read_timeout_exception(_schema->ks_name(), _schema->cf_name(), _cl, _cl_responses, _block_for, _data_result)); - } - // we will not need them any more - _data_result = foreign_ptr>(); - _digest_results.clear(); - } - virtual size_t response_count() const override { - return _digest_results.size(); - } -public: - digest_read_resolver(schema_ptr schema, db::consistency_level cl, size_t block_for, storage_proxy::clock_type::time_point timeout) : abstract_read_resolver(std::move(schema), cl, 0, timeout), _block_for(block_for) {} - void add_data(gms::inet_address from, foreign_ptr> result) { - if (!_timedout) { - // if only one target was queried digest_check() will be skipped so we can also skip digest calculation - _digest_results.emplace_back(_targets_count == 1 ? query::result_digest() : *result->digest()); - _last_modified = std::max(_last_modified, result->last_modified()); - if (!_data_result) { - _data_result = std::move(result); - } - got_response(from); - } - } - void add_digest(gms::inet_address from, query::result_digest digest, api::timestamp_type last_modified) { - if (!_timedout) { - _digest_results.emplace_back(std::move(digest)); - _last_modified = std::max(_last_modified, last_modified); - got_response(from); - } - } - bool digests_match() const { - assert(response_count()); - if (response_count() == 1) { - return true; - } - auto& first = *_digest_results.begin(); - return std::find_if(_digest_results.begin() + 1, _digest_results.end(), [&first] (query::result_digest digest) { return digest != first; }) == _digest_results.end(); - } - bool waiting_for(gms::inet_address ep) { - return db::is_datacenter_local(_cl) ? is_me(ep) || db::is_local(ep) : true; - } - void got_response(gms::inet_address ep) { - if (!_cl_reported) { - if (waiting_for(ep)) { - _cl_responses++; - } - if (_cl_responses >= _block_for && _data_result) { - _cl_reported = true; - _cl_promise.set_value(std::move(_data_result), digests_match()); - } - } - if (is_completed()) { - _timeout.cancel(); - _done_promise.set_value(); - } - } - future>, bool> has_cl() { - return _cl_promise.get_future(); - } - bool has_data() { - return _data_result; - } - void add_wait_targets(size_t targets_count) { - _targets_count += targets_count; - } - bool is_completed() { - return response_count() == _targets_count; - } - api::timestamp_type last_modified() const { - return _last_modified; - } -}; - -class data_read_resolver : public abstract_read_resolver { - struct reply { - gms::inet_address from; - foreign_ptr> result; - bool reached_end = false; - reply(gms::inet_address from_, foreign_ptr> result_) : from(std::move(from_)), result(std::move(result_)) {} - }; - struct version { - gms::inet_address from; - stdx::optional par; - bool reached_end; - bool reached_partition_end; - version(gms::inet_address from_, stdx::optional par_, bool reached_end, bool reached_partition_end) - : from(std::move(from_)), par(std::move(par_)), reached_end(reached_end), reached_partition_end(reached_partition_end) {} - }; - struct mutation_and_live_row_count { - mutation mut; - size_t live_row_count; - }; - - struct primary_key { - dht::decorated_key partition; - stdx::optional clustering; - - class less_compare_clustering { - bool _is_reversed; - clustering_key::less_compare _ck_cmp; - public: - less_compare_clustering(const schema s, bool is_reversed) - : _is_reversed(is_reversed), _ck_cmp(s) { } - - bool operator()(const primary_key& a, const primary_key& b) const { - if (!b.clustering) { - return false; - } - if (!a.clustering) { - return true; - } - if (_is_reversed) { - return _ck_cmp(*b.clustering, *a.clustering); - } else { - return _ck_cmp(*a.clustering, *b.clustering); - } - } - }; - - class less_compare { - const schema& _schema; - less_compare_clustering _ck_cmp; - public: - less_compare(const schema& s, bool is_reversed) - : _schema(s), _ck_cmp(s, is_reversed) { } - - bool operator()(const primary_key& a, const primary_key& b) const { - auto pk_result = a.partition.tri_compare(_schema, b.partition); - if (pk_result) { - return pk_result < 0; - } - return _ck_cmp(a, b); - } - }; - }; - - size_t _total_live_count = 0; - uint32_t _max_live_count = 0; - uint32_t _short_read_diff = 0; - uint32_t _max_per_partition_live_count = 0; - uint32_t _partition_count = 0; - uint32_t _live_partition_count = 0; - bool _increase_per_partition_limit = false; - bool _all_reached_end = true; - query::short_read _is_short_read; - std::vector _data_results; - std::unordered_map>> _diffs; -private: - virtual void on_timeout() override { - // we will not need them any more - _data_results.clear(); - } - virtual size_t response_count() const override { - return _data_results.size(); - } - - void register_live_count(const std::vector& replica_versions, uint32_t reconciled_live_rows, uint32_t limit) { - bool any_not_at_end = boost::algorithm::any_of(replica_versions, [] (const version& v) { - return !v.reached_partition_end; - }); - if (any_not_at_end && reconciled_live_rows < limit && limit - reconciled_live_rows > _short_read_diff) { - _short_read_diff = limit - reconciled_live_rows; - _max_per_partition_live_count = reconciled_live_rows; - } - } - void find_short_partitions(const std::vector& rp, const std::vector>& versions, - uint32_t per_partition_limit, uint32_t row_limit, uint32_t partition_limit) { - // Go through the partitions that weren't limited by the total row limit - // and check whether we got enough rows to satisfy per-partition row - // limit. - auto partitions_left = partition_limit; - auto rows_left = row_limit; - auto pv = versions.rbegin(); - for (auto&& m_a_rc : rp | boost::adaptors::reversed) { - auto row_count = m_a_rc.live_row_count; - if (row_count < rows_left && partitions_left) { - rows_left -= row_count; - partitions_left -= !!row_count; - register_live_count(*pv, row_count, per_partition_limit); - } else { - break; - } - ++pv; - } - } - - static primary_key get_last_row(const schema& s, const partition& p, bool is_reversed) { - class last_clustering_key final : public mutation_partition_visitor { - stdx::optional _last_ck; - bool _is_reversed; - public: - explicit last_clustering_key(bool is_reversed) : _is_reversed(is_reversed) { } - - virtual void accept_partition_tombstone(tombstone) override { } - virtual void accept_static_cell(column_id, atomic_cell_view) override { } - virtual void accept_static_cell(column_id, collection_mutation_view) override { } - virtual void accept_row_tombstone(const range_tombstone&) override { } - virtual void accept_row(position_in_partition_view pos, const row_tombstone&, const row_marker&, is_dummy dummy, is_continuous) override { - assert(!dummy); - if (!_is_reversed || !_last_ck) { - _last_ck = pos.key(); - } - } - virtual void accept_row_cell(column_id id, atomic_cell_view) override { } - virtual void accept_row_cell(column_id id, collection_mutation_view) override { } - - stdx::optional&& release() { - return std::move(_last_ck); - } - }; - - last_clustering_key lck(is_reversed); - p.mut().partition().accept(s, lck); - return {p.mut().decorated_key(s), lck.release()}; - } - - // Returns the highest row sent by the specified replica, according to the schema and the direction of - // the query. - // versions is a table where rows are partitions in descending order and the columns identify the partition - // sent by a particular replica. - static primary_key get_last_row(const schema& s, bool is_reversed, const std::vector>& versions, uint32_t replica) { - const partition* last_partition = nullptr; - // Versions are in the reversed order. - for (auto&& pv : versions) { - const stdx::optional& p = pv[replica].par; - if (p) { - last_partition = &p.value(); - break; - } - } - assert(last_partition); - return get_last_row(s, *last_partition, is_reversed); - } - - static primary_key get_last_reconciled_row(const schema& s, const mutation_and_live_row_count& m_a_rc, const query::read_command& cmd, uint32_t limit, bool is_reversed) { - const auto& m = m_a_rc.mut; - auto mp = m.partition(); - auto&& ranges = cmd.slice.row_ranges(s, m.key()); - mp.compact_for_query(s, cmd.timestamp, ranges, is_reversed, limit); - - stdx::optional ck; - if (!mp.clustered_rows().empty()) { - if (is_reversed) { - ck = mp.clustered_rows().begin()->key(); - } else { - ck = mp.clustered_rows().rbegin()->key(); - } - } - return primary_key { m.decorated_key(), ck }; - } - - static bool got_incomplete_information_in_partition(const schema& s, const primary_key& last_reconciled_row, const std::vector& versions, bool is_reversed) { - primary_key::less_compare_clustering ck_cmp(s, is_reversed); - for (auto&& v : versions) { - if (!v.par || v.reached_partition_end) { - continue; - } - auto replica_last_row = get_last_row(s, *v.par, is_reversed); - if (ck_cmp(replica_last_row, last_reconciled_row)) { - return true; - } - } - return false; - } - - bool got_incomplete_information_across_partitions(const schema& s, const query::read_command& cmd, - const primary_key& last_reconciled_row, std::vector& rp, - const std::vector>& versions, bool is_reversed) { - bool short_reads_allowed = cmd.slice.options.contains(); - primary_key::less_compare cmp(s, is_reversed); - stdx::optional shortest_read; - auto num_replicas = versions[0].size(); - for (uint32_t i = 0; i < num_replicas; ++i) { - if (versions.front()[i].reached_end) { - continue; - } - auto replica_last_row = get_last_row(s, is_reversed, versions, i); - if (cmp(replica_last_row, last_reconciled_row)) { - if (short_reads_allowed) { - if (!shortest_read || cmp(replica_last_row, *shortest_read)) { - shortest_read = std::move(replica_last_row); - } - } else { - return true; - } - } - } - - // Short reads are allowed, trim the reconciled result. - if (shortest_read) { - _is_short_read = query::short_read::yes; - - // Prepare to remove all partitions past shortest_read - auto it = rp.begin(); - for (; it != rp.end() && shortest_read->partition.less_compare(s, it->mut.decorated_key()); ++it) { } - - // Remove all clustering rows past shortest_read - if (it != rp.end() && it->mut.decorated_key().equal(s, shortest_read->partition)) { - if (!shortest_read->clustering) { - ++it; - } else { - std::vector ranges; - ranges.emplace_back(is_reversed ? query::clustering_range::make_starting_with(std::move(*shortest_read->clustering)) - : query::clustering_range::make_ending_with(std::move(*shortest_read->clustering))); - it->live_row_count = it->mut.partition().compact_for_query(s, cmd.timestamp, ranges, is_reversed, query::max_rows); - } - } - - // Actually remove all partitions past shortest_read - rp.erase(rp.begin(), it); - - // Update total live count and live partition count - _live_partition_count = 0; - _total_live_count = boost::accumulate(rp, uint32_t(0), [this] (uint32_t lc, const mutation_and_live_row_count& m_a_rc) { - _live_partition_count += !!m_a_rc.live_row_count; - return lc + m_a_rc.live_row_count; - }); - } - - return false; - } - - bool got_incomplete_information(const schema& s, const query::read_command& cmd, uint32_t original_row_limit, uint32_t original_per_partition_limit, - uint32_t original_partition_limit, std::vector& rp, const std::vector>& versions) { - // We need to check whether the reconciled result contains all information from all available - // replicas. It is possible that some of the nodes have returned less rows (because the limit - // was set and they had some tombstones missing) than the others. In such cases we cannot just - // merge all results and return that to the client as the replicas that returned less row - // may have newer data for the rows they did not send than any other node in the cluster. - // - // This function is responsible for detecting whether such problem may happen. We get partition - // and clustering keys of the last row that is going to be returned to the client and check if - // it is in range of rows returned by each replicas that returned as many rows as they were - // asked for (if a replica returned less rows it means it returned everything it has). - auto is_reversed = cmd.slice.options.contains(query::partition_slice::option::reversed); - - auto rows_left = original_row_limit; - auto partitions_left = original_partition_limit; - auto pv = versions.rbegin(); - for (auto&& m_a_rc : rp | boost::adaptors::reversed) { - auto row_count = m_a_rc.live_row_count; - if (row_count < rows_left && partitions_left > !!row_count) { - rows_left -= row_count; - partitions_left -= !!row_count; - if (original_per_partition_limit != query::max_rows) { - auto&& last_row = get_last_reconciled_row(s, m_a_rc, cmd, original_per_partition_limit, is_reversed); - if (got_incomplete_information_in_partition(s, last_row, *pv, is_reversed)) { - _increase_per_partition_limit = true; - return true; - } - } - } else { - auto&& last_row = get_last_reconciled_row(s, m_a_rc, cmd, rows_left, is_reversed); - return got_incomplete_information_across_partitions(s, cmd, last_row, rp, versions, is_reversed); - } - ++pv; - } - return false; - } -public: - data_read_resolver(schema_ptr schema, db::consistency_level cl, size_t targets_count, storage_proxy::clock_type::time_point timeout) : abstract_read_resolver(std::move(schema), cl, targets_count, timeout) { - _data_results.reserve(targets_count); - } - void add_mutate_data(gms::inet_address from, foreign_ptr> result) { - if (!_timedout) { - _max_live_count = std::max(result->row_count(), _max_live_count); - _data_results.emplace_back(std::move(from), std::move(result)); - if (_data_results.size() == _targets_count) { - _timeout.cancel(); - _done_promise.set_value(); - } - } - } - uint32_t max_live_count() const { - return _max_live_count; - } - bool any_partition_short_read() const { - return _short_read_diff > 0; - } - bool increase_per_partition_limit() const { - return _increase_per_partition_limit; - } - uint32_t max_per_partition_live_count() const { - return _max_per_partition_live_count; - } - uint32_t partition_count() const { - return _partition_count; - } - uint32_t live_partition_count() const { - return _live_partition_count; - } - bool all_reached_end() const { - return _all_reached_end; - } - stdx::optional resolve(schema_ptr schema, const query::read_command& cmd, uint32_t original_row_limit, uint32_t original_per_partition_limit, - uint32_t original_partition_limit) { - assert(_data_results.size()); - - if (_data_results.size() == 1) { - // if there is a result only from one node there is nothing to reconcile - // should happen only for range reads since single key reads will not - // try to reconcile for CL=ONE - auto& p = _data_results[0].result; - return reconcilable_result(p->row_count(), p->partitions(), p->is_short_read()); - } - - const auto& s = *schema; - - // return true if lh > rh - auto cmp = [&s](reply& lh, reply& rh) { - if (lh.result->partitions().size() == 0) { - return false; // reply with empty partition array goes to the end of the sorted array - } else if (rh.result->partitions().size() == 0) { - return true; - } else { - auto lhk = lh.result->partitions().back().mut().key(s); - auto rhk = rh.result->partitions().back().mut().key(s); - return lhk.ring_order_tri_compare(s, rhk) > 0; - } - }; - - // this array will have an entry for each partition which will hold all available versions - std::vector> versions; - versions.reserve(_data_results.front().result->partitions().size()); - - for (auto& r : _data_results) { - _is_short_read = _is_short_read || r.result->is_short_read(); - r.reached_end = !r.result->is_short_read() && r.result->row_count() < cmd.row_limit - && (cmd.partition_limit == query::max_partitions - || boost::range::count_if(r.result->partitions(), [] (const partition& p) { - return p.row_count(); - }) < cmd.partition_limit); - _all_reached_end = _all_reached_end && r.reached_end; - } - - do { - // after this sort reply with largest key is at the beginning - boost::sort(_data_results, cmp); - if (_data_results.front().result->partitions().empty()) { - break; // if top of the heap is empty all others are empty too - } - const auto& max_key = _data_results.front().result->partitions().back().mut().key(s); - versions.emplace_back(); - std::vector& v = versions.back(); - v.reserve(_targets_count); - for (reply& r : _data_results) { - auto pit = r.result->partitions().rbegin(); - if (pit != r.result->partitions().rend() && pit->mut().key(s).legacy_equal(s, max_key)) { - bool reached_partition_end = pit->row_count() < cmd.slice.partition_row_limit(); - v.emplace_back(r.from, std::move(*pit), r.reached_end, reached_partition_end); - r.result->partitions().pop_back(); - } else { - // put empty partition for destination without result - v.emplace_back(r.from, stdx::optional(), r.reached_end, true); - } - } - - boost::sort(v, [] (const version& x, const version& y) { - return x.from < y.from; - }); - } while(true); - - std::vector reconciled_partitions; - reconciled_partitions.reserve(versions.size()); - - // reconcile all versions - boost::range::transform(boost::make_iterator_range(versions.begin(), versions.end()), std::back_inserter(reconciled_partitions), - [this, schema, original_per_partition_limit] (std::vector& v) { - auto it = boost::range::find_if(v, [] (auto&& ver) { - return bool(ver.par); - }); - auto m = boost::accumulate(v, mutation(it->par->mut().key(*schema), schema), [this, schema] (mutation& m, const version& ver) { - if (ver.par) { - m.partition().apply(*schema, ver.par->mut().partition(), *schema); - } - return std::move(m); - }); - auto live_row_count = m.live_row_count(); - _total_live_count += live_row_count; - _live_partition_count += !!live_row_count; - return mutation_and_live_row_count { std::move(m), live_row_count }; - }); - _partition_count = reconciled_partitions.size(); - - bool has_diff = false; - - // calculate differences - for (auto z : boost::combine(versions, reconciled_partitions)) { - const mutation& m = z.get<1>().mut; - for (const version& v : z.get<0>()) { - auto diff = v.par - ? m.partition().difference(schema, v.par->mut().unfreeze(schema).partition()) - : m.partition(); - auto it = _diffs[m.token()].find(v.from); - std::experimental::optional mdiff; - if (!diff.empty()) { - has_diff = true; - mdiff = mutation(schema, m.decorated_key(), std::move(diff)); - } - if (it == _diffs[m.token()].end()) { - _diffs[m.token()].emplace(v.from, std::move(mdiff)); - } else { - // should not really happen, but lets try to deal with it - if (mdiff) { - if (it->second) { - it->second.value().apply(std::move(mdiff.value())); - } else { - it->second = std::move(mdiff); - } - } - } - } - } - - if (has_diff) { - if (got_incomplete_information(*schema, cmd, original_row_limit, original_per_partition_limit, - original_partition_limit, reconciled_partitions, versions)) { - return {}; - } - // filter out partitions with empty diffs - for (auto it = _diffs.begin(); it != _diffs.end();) { - if (boost::algorithm::none_of(it->second | boost::adaptors::map_values, std::mem_fn(&std::experimental::optional::operator bool))) { - it = _diffs.erase(it); - } else { - ++it; - } - } - } else { - _diffs.clear(); - } - - find_short_partitions(reconciled_partitions, versions, original_per_partition_limit, original_row_limit, original_partition_limit); - - bool allow_short_reads = cmd.slice.options.contains(); - if (allow_short_reads && _max_live_count >= original_row_limit && _total_live_count < original_row_limit && _total_live_count) { - // We ended up with less rows than the client asked for (but at least one), - // avoid retry and mark as short read instead. - _is_short_read = query::short_read::yes; - } - - // build reconcilable_result from reconciled data - // traverse backwards since large keys are at the start - std::vector vec; - auto r = boost::accumulate(reconciled_partitions | boost::adaptors::reversed, std::ref(vec), [] (std::vector& a, const mutation_and_live_row_count& m_a_rc) { - a.emplace_back(partition(m_a_rc.live_row_count, freeze(m_a_rc.mut))); - return std::ref(a); - }); - - return reconcilable_result(_total_live_count, std::move(r.get()), _is_short_read); - } - auto total_live_count() const { - return _total_live_count; - } - auto get_diffs_for_repair() { - return std::move(_diffs); - } -}; - -class abstract_read_executor : public enable_shared_from_this { -protected: - using targets_iterator = std::vector::iterator; - using digest_resolver_ptr = ::shared_ptr; - using data_resolver_ptr = ::shared_ptr; - using clock_type = storage_proxy::clock_type; - - schema_ptr _schema; - shared_ptr _proxy; - lw_shared_ptr _cmd; - lw_shared_ptr _retry_cmd; - dht::partition_range _partition_range; - db::consistency_level _cl; - size_t _block_for; - std::vector _targets; - promise>> _result_promise; - tracing::trace_state_ptr _trace_state; - lw_shared_ptr _cf; - -public: - abstract_read_executor(schema_ptr s, lw_shared_ptr cf, shared_ptr proxy, lw_shared_ptr cmd, dht::partition_range pr, db::consistency_level cl, size_t block_for, - std::vector targets, tracing::trace_state_ptr trace_state) : - _schema(std::move(s)), _proxy(std::move(proxy)), _cmd(std::move(cmd)), _partition_range(std::move(pr)), _cl(cl), _block_for(block_for), _targets(std::move(targets)), _trace_state(std::move(trace_state)), - _cf(std::move(cf)) { - _proxy->_stats.reads++; - } - virtual ~abstract_read_executor() { - _proxy->_stats.reads--; - }; - -protected: - future>, cache_temperature> make_mutation_data_request(lw_shared_ptr cmd, gms::inet_address ep, clock_type::time_point timeout) { - ++_proxy->_stats.mutation_data_read_attempts.get_ep_stat(ep); - if (is_me(ep)) { - tracing::trace(_trace_state, "read_mutation_data: querying locally"); - return _proxy->query_mutations_locally(_schema, cmd, _partition_range, _trace_state); - } else { - auto& ms = netw::get_local_messaging_service(); - tracing::trace(_trace_state, "read_mutation_data: sending a message to /{}", ep); - return ms.send_read_mutation_data(netw::messaging_service::msg_addr{ep, 0}, timeout, *cmd, _partition_range).then([this, ep](reconcilable_result&& result, rpc::optional hit_rate) { - tracing::trace(_trace_state, "read_mutation_data: got response from /{}", ep); - return make_ready_future>, cache_temperature>(make_foreign(::make_lw_shared(std::move(result))), hit_rate.value_or(cache_temperature::invalid())); - }); - } - } - future>, cache_temperature> make_data_request(gms::inet_address ep, clock_type::time_point timeout, bool want_digest) { - ++_proxy->_stats.data_read_attempts.get_ep_stat(ep); - if (is_me(ep)) { - tracing::trace(_trace_state, "read_data: querying locally"); - auto qrr = want_digest ? query::result_request::result_and_digest : query::result_request::only_result; - return _proxy->query_singular_local(_schema, _cmd, _partition_range, qrr, _trace_state); - } else { - auto& ms = netw::get_local_messaging_service(); - tracing::trace(_trace_state, "read_data: sending a message to /{}", ep); - auto da = want_digest ? query::digest_algorithm::MD5 : query::digest_algorithm::none; - return ms.send_read_data(netw::messaging_service::msg_addr{ep, 0}, timeout, *_cmd, _partition_range, da).then([this, ep](query::result&& result, rpc::optional hit_rate) { - tracing::trace(_trace_state, "read_data: got response from /{}", ep); - return make_ready_future>, cache_temperature>(make_foreign(::make_lw_shared(std::move(result))), hit_rate.value_or(cache_temperature::invalid())); - }); - } - } - future make_digest_request(gms::inet_address ep, clock_type::time_point timeout) { - ++_proxy->_stats.digest_read_attempts.get_ep_stat(ep); - if (is_me(ep)) { - tracing::trace(_trace_state, "read_digest: querying locally"); - return _proxy->query_singular_local_digest(_schema, _cmd, _partition_range, _trace_state); - } else { - auto& ms = netw::get_local_messaging_service(); - tracing::trace(_trace_state, "read_digest: sending a message to /{}", ep); - return ms.send_read_digest(netw::messaging_service::msg_addr{ep, 0}, timeout, *_cmd, _partition_range).then([this, ep] (query::result_digest d, rpc::optional t, - rpc::optional hit_rate) { - tracing::trace(_trace_state, "read_digest: got response from /{}", ep); - return make_ready_future(d, t ? t.value() : api::missing_timestamp, hit_rate.value_or(cache_temperature::invalid())); - }); - } - } - future<> make_mutation_data_requests(lw_shared_ptr cmd, data_resolver_ptr resolver, targets_iterator begin, targets_iterator end, clock_type::time_point timeout) { - return parallel_for_each(begin, end, [this, &cmd, resolver = std::move(resolver), timeout] (gms::inet_address ep) { - return make_mutation_data_request(cmd, ep, timeout).then_wrapped([this, resolver, ep] (future>, cache_temperature> f) { - try { - auto v = f.get(); - _cf->set_hit_rate(ep, std::get<1>(v)); - resolver->add_mutate_data(ep, std::get<0>(std::move(v))); - ++_proxy->_stats.mutation_data_read_completed.get_ep_stat(ep); - } catch(...) { - ++_proxy->_stats.mutation_data_read_errors.get_ep_stat(ep); - resolver->error(ep, std::current_exception()); - } - }); - }); - } - future<> make_data_requests(digest_resolver_ptr resolver, targets_iterator begin, targets_iterator end, clock_type::time_point timeout, bool want_digest) { - return parallel_for_each(begin, end, [this, resolver = std::move(resolver), timeout, want_digest] (gms::inet_address ep) { - return make_data_request(ep, timeout, want_digest).then_wrapped([this, resolver, ep] (future>, cache_temperature> f) { - try { - auto v = f.get(); - _cf->set_hit_rate(ep, std::get<1>(v)); - resolver->add_data(ep, std::get<0>(std::move(v))); - ++_proxy->_stats.data_read_completed.get_ep_stat(ep); - } catch(...) { - ++_proxy->_stats.data_read_errors.get_ep_stat(ep); - resolver->error(ep, std::current_exception()); - } - }); - }); - } - future<> make_digest_requests(digest_resolver_ptr resolver, targets_iterator begin, targets_iterator end, clock_type::time_point timeout) { - return parallel_for_each(begin, end, [this, resolver = std::move(resolver), timeout] (gms::inet_address ep) { - return make_digest_request(ep, timeout).then_wrapped([this, resolver, ep] (future f) { - try { - auto v = f.get(); - _cf->set_hit_rate(ep, std::get<2>(v)); - resolver->add_digest(ep, std::get<0>(v), std::get<1>(v)); - ++_proxy->_stats.digest_read_completed.get_ep_stat(ep); - } catch(...) { - ++_proxy->_stats.digest_read_errors.get_ep_stat(ep); - resolver->error(ep, std::current_exception()); - } - }); - }); - } - virtual future<> make_requests(digest_resolver_ptr resolver, clock_type::time_point timeout) { - resolver->add_wait_targets(_targets.size()); - auto want_digest = _targets.size() > 1; - return when_all(make_data_requests(resolver, _targets.begin(), _targets.begin() + 1, timeout, want_digest), - make_digest_requests(resolver, _targets.begin() + 1, _targets.end(), timeout)).discard_result(); - } - virtual void got_cl() {} - uint32_t original_row_limit() const { - return _cmd->row_limit; - } - uint32_t original_per_partition_row_limit() const { - return _cmd->slice.partition_row_limit(); - } - uint32_t original_partition_limit() const { - return _cmd->partition_limit; - } - void reconcile(db::consistency_level cl, storage_proxy::clock_type::time_point timeout, lw_shared_ptr cmd) { - data_resolver_ptr data_resolver = ::make_shared(_schema, cl, _targets.size(), timeout); - auto exec = shared_from_this(); - - make_mutation_data_requests(cmd, data_resolver, _targets.begin(), _targets.end(), timeout).finally([exec]{}); - - data_resolver->done().then_wrapped([this, exec, data_resolver, cmd = std::move(cmd), cl, timeout] (future<> f) { - try { - f.get(); - auto rr_opt = data_resolver->resolve(_schema, *cmd, original_row_limit(), original_per_partition_row_limit(), original_partition_limit()); // reconciliation happens here - - // We generate a retry if at least one node reply with count live columns but after merge we have less - // than the total number of column we are interested in (which may be < count on a retry). - // So in particular, if no host returned count live columns, we know it's not a short read. - bool can_send_short_read = rr_opt && rr_opt->is_short_read() && rr_opt->row_count() > 0; - if (rr_opt && (can_send_short_read || data_resolver->all_reached_end() || rr_opt->row_count() >= original_row_limit() - || data_resolver->live_partition_count() >= original_partition_limit()) - && !data_resolver->any_partition_short_read()) { - auto result = ::make_foreign(::make_lw_shared( - to_data_query_result(std::move(*rr_opt), _schema, _cmd->slice, _cmd->row_limit, cmd->partition_limit))); - // wait for write to complete before returning result to prevent multiple concurrent read requests to - // trigger repair multiple times and to prevent quorum read to return an old value, even after a quorum - // another read had returned a newer value (but the newer value had not yet been sent to the other replicas) - _proxy->schedule_repair(data_resolver->get_diffs_for_repair(), _cl, _trace_state).then([this, result = std::move(result)] () mutable { - _result_promise.set_value(std::move(result)); - }).handle_exception([this, exec] (std::exception_ptr eptr) { - try { - std::rethrow_exception(eptr); - } catch (mutation_write_timeout_exception&) { - // convert write error to read error - _result_promise.set_exception(read_timeout_exception(_schema->ks_name(), _schema->cf_name(), _cl, _block_for - 1, _block_for, true)); - } catch (...) { - _result_promise.set_exception(std::current_exception()); - } - }); - } else { - _proxy->_stats.read_retries++; - _retry_cmd = make_lw_shared(*cmd); - // We asked t (= cmd->row_limit) live columns and got l (=data_resolver->total_live_count) ones. - // From that, we can estimate that on this row, for x requested - // columns, only l/t end up live after reconciliation. So for next - // round we want to ask x column so that x * (l/t) == t, i.e. x = t^2/l. - auto x = [](uint64_t t, uint64_t l) -> uint32_t { - auto ret = std::min(static_cast(query::max_rows), l == 0 ? t + 1 : ((t * t) / l) + 1); - return static_cast(ret); - }; - if (data_resolver->any_partition_short_read() || data_resolver->increase_per_partition_limit()) { - // The number of live rows was bounded by the per partition limit. - auto new_limit = x(cmd->slice.partition_row_limit(), data_resolver->max_per_partition_live_count()); - _retry_cmd->slice.set_partition_row_limit(new_limit); - _retry_cmd->row_limit = std::max(cmd->row_limit, data_resolver->partition_count() * new_limit); - } else { - // The number of live rows was bounded by the total row limit or partition limit. - if (cmd->partition_limit != query::max_partitions) { - _retry_cmd->partition_limit = x(cmd->partition_limit, data_resolver->live_partition_count()); - } - if (cmd->row_limit != query::max_rows) { - _retry_cmd->row_limit = x(cmd->row_limit, data_resolver->total_live_count()); - } - } - - // We may be unable to send a single live row because of replicas bailing out too early. - // If that is the case disallow short reads so that we can make progress. - if (!data_resolver->total_live_count()) { - _retry_cmd->slice.options.remove(); - } - - slogger.trace("Retrying query with command {} (previous is {})", *_retry_cmd, *cmd); - reconcile(cl, timeout, _retry_cmd); - } - } catch (...) { - _result_promise.set_exception(std::current_exception()); - } - }); - } - void reconcile(db::consistency_level cl, storage_proxy::clock_type::time_point timeout) { - reconcile(cl, timeout, _cmd); - } - -public: - virtual future>> execute(storage_proxy::clock_type::time_point timeout) { - digest_resolver_ptr digest_resolver = ::make_shared(_schema, _cl, _block_for, timeout); - auto exec = shared_from_this(); - - make_requests(digest_resolver, timeout).finally([exec]() { - // hold on to executor until all queries are complete - }); - - digest_resolver->has_cl().then_wrapped([exec, digest_resolver, timeout] (future>, bool> f) mutable { - bool background_repair_check = false; - try { - exec->got_cl(); - - foreign_ptr> result; - bool digests_match; - std::tie(result, digests_match) = f.get(); // can throw - - if (digests_match) { - exec->_result_promise.set_value(std::move(result)); - if (exec->_block_for < exec->_targets.size()) { // if there are more targets then needed for cl, check digest in background - background_repair_check = true; - } - } else { // digest mismatch - if (is_datacenter_local(exec->_cl)) { - auto write_timeout = exec->_proxy->_db.local().get_config().write_request_timeout_in_ms() * 1000; - auto delta = int64_t(digest_resolver->last_modified()) - int64_t(exec->_cmd->read_timestamp); - if (std::abs(delta) <= write_timeout) { - exec->_proxy->_stats.global_read_repairs_canceled_due_to_concurrent_write++; - // if CL is local and non matching data is modified less then write_timeout ms ago do only local repair - auto i = boost::range::remove_if(exec->_targets, std::not1(std::cref(db::is_local))); - exec->_targets.erase(i, exec->_targets.end()); - } - } - exec->reconcile(exec->_cl, timeout); - exec->_proxy->_stats.read_repair_repaired_blocking++; - } - } catch (...) { - exec->_result_promise.set_exception(std::current_exception()); - } - - exec->_proxy->_stats.background_reads++; - digest_resolver->done().then([exec, digest_resolver, timeout, background_repair_check] () mutable { - if (background_repair_check && !digest_resolver->digests_match()) { - exec->_proxy->_stats.read_repair_repaired_background++; - exec->_result_promise = promise>>(); - exec->reconcile(exec->_cl, timeout); - return exec->_result_promise.get_future().discard_result(); - } else { - return make_ready_future<>(); - } - }).handle_exception([] (std::exception_ptr eptr) { - // ignore any failures during background repair - }).then([exec] { - exec->_proxy->_stats.background_reads--; - }); - }); - - return _result_promise.get_future(); - } -}; - -class never_speculating_read_executor : public abstract_read_executor { -public: - using abstract_read_executor::abstract_read_executor; -}; - -// this executor always asks for one additional data reply -class always_speculating_read_executor : public abstract_read_executor { -public: - using abstract_read_executor::abstract_read_executor; - virtual future<> make_requests(digest_resolver_ptr resolver, storage_proxy::clock_type::time_point timeout) { - resolver->add_wait_targets(_targets.size()); - // FIXME: consider disabling for CL=*ONE - bool want_digest = true; - return when_all(make_data_requests(resolver, _targets.begin(), _targets.begin() + 2, timeout, want_digest), - make_digest_requests(resolver, _targets.begin() + 2, _targets.end(), timeout)).discard_result(); - } -}; - -// this executor sends request to an additional replica after some time below timeout -class speculating_read_executor : public abstract_read_executor { - timer _speculate_timer; -public: - using abstract_read_executor::abstract_read_executor; - virtual future<> make_requests(digest_resolver_ptr resolver, storage_proxy::clock_type::time_point timeout) { - _speculate_timer.set_callback([this, resolver, timeout] { - if (!resolver->is_completed()) { // at the time the callback runs request may be completed already - resolver->add_wait_targets(1); // we send one more request so wait for it too - // FIXME: consider disabling for CL=*ONE - bool want_digest = true; - future<> f = resolver->has_data() ? - make_digest_requests(resolver, _targets.end() - 1, _targets.end(), timeout) : - make_data_requests(resolver, _targets.end() - 1, _targets.end(), timeout, want_digest); - f.finally([exec = shared_from_this()]{}); - } - }); - auto& sr = _schema->speculative_retry(); - auto t = (sr.get_type() == speculative_retry::type::PERCENTILE) ? - // FIXME: the timeout should come from previous latency statistics for a partition - std::chrono::milliseconds(_proxy->get_db().local().get_config().read_request_timeout_in_ms()/2) : - std::chrono::milliseconds(unsigned(sr.get_value())); - _speculate_timer.arm(t); - - // if CL + RR result in covering all replicas, getReadExecutor forces AlwaysSpeculating. So we know - // that the last replica in our list is "extra." - resolver->add_wait_targets(_targets.size() - 1); - // FIXME: consider disabling for CL=*ONE - bool want_digest = true; - if (_block_for < _targets.size() - 1) { - // We're hitting additional targets for read repair. Since our "extra" replica is the least- - // preferred by the snitch, we do an extra data read to start with against a replica more - // likely to reply; better to let RR fail than the entire query. - return when_all(make_data_requests(resolver, _targets.begin(), _targets.begin() + 2, timeout, want_digest), - make_digest_requests(resolver, _targets.begin() + 2, _targets.end(), timeout)).discard_result(); - } else { - // not doing read repair; all replies are important, so it doesn't matter which nodes we - // perform data reads against vs digest. - return when_all(make_data_requests(resolver, _targets.begin(), _targets.begin() + 1, timeout, want_digest), - make_digest_requests(resolver, _targets.begin() + 1, _targets.end() - 1, timeout)).discard_result(); - } - } - virtual void got_cl() override { - _speculate_timer.cancel(); - } -}; - -class range_slice_read_executor : public abstract_read_executor { -public: - range_slice_read_executor(schema_ptr s, lw_shared_ptr cf, shared_ptr proxy, lw_shared_ptr cmd, dht::partition_range pr, db::consistency_level cl, std::vector targets, tracing::trace_state_ptr trace_state) : - abstract_read_executor(std::move(s), std::move(cf), std::move(proxy), std::move(cmd), std::move(pr), cl, targets.size(), std::move(targets), std::move(trace_state)) {} - virtual future>> execute(storage_proxy::clock_type::time_point timeout) override { - reconcile(_cl, timeout); - return _result_promise.get_future(); - } -}; - -db::read_repair_decision storage_proxy::new_read_repair_decision(const schema& s) { - double chance = _read_repair_chance(_urandom); - if (s.read_repair_chance() > chance) { - return db::read_repair_decision::GLOBAL; - } - - if (s.dc_local_read_repair_chance() > chance) { - return db::read_repair_decision::DC_LOCAL; - } - - return db::read_repair_decision::NONE; -} - -::shared_ptr storage_proxy::get_read_executor(lw_shared_ptr cmd, dht::partition_range pr, db::consistency_level cl, tracing::trace_state_ptr trace_state) { - const dht::token& token = pr.start()->value().token(); - schema_ptr schema = local_schema_registry().get(cmd->schema_version); - keyspace& ks = _db.local().find_keyspace(schema->ks_name()); - speculative_retry::type retry_type = schema->speculative_retry().get_type(); - gms::inet_address extra_replica; - - std::vector all_replicas = get_live_sorted_endpoints(ks, token); - db::read_repair_decision repair_decision = new_read_repair_decision(*schema); - auto cf = _db.local().find_column_family(schema).shared_from_this(); - std::vector target_replicas = db::filter_for_query(cl, ks, all_replicas, repair_decision, - retry_type == speculative_retry::type::NONE ? nullptr : &extra_replica, - _db.local().get_config().cache_hit_rate_read_balancing() ? &*cf : nullptr); - - slogger.trace("creating read executor for token {} with all: {} targets: {} rp decision: {}", token, all_replicas, target_replicas, repair_decision); - tracing::trace(trace_state, "Creating read executor for token {} with all: {} targets: {} repair decision: {}", token, all_replicas, target_replicas, repair_decision); - - // Throw UAE early if we don't have enough replicas. - try { - db::assure_sufficient_live_nodes(cl, ks, target_replicas); - } catch (exceptions::unavailable_exception& ex) { - slogger.debug("Read unavailable: cl={} required {} alive {}", ex.consistency, ex.required, ex.alive); - _stats.read_unavailables.mark(); - throw; - } - - if (repair_decision != db::read_repair_decision::NONE) { - _stats.read_repair_attempts++; - } - -#if 0 - ColumnFamilyStore cfs = keyspace.getColumnFamilyStore(command.cfName); -#endif - - size_t block_for = db::block_for(ks, cl); - auto p = shared_from_this(); - // Speculative retry is disabled *OR* there are simply no extra replicas to speculate. - if (retry_type == speculative_retry::type::NONE || block_for == all_replicas.size() - || (repair_decision == db::read_repair_decision::DC_LOCAL && is_datacenter_local(cl) && block_for == target_replicas.size())) { - return ::make_shared(schema, cf, p, cmd, std::move(pr), cl, block_for, std::move(target_replicas), std::move(trace_state)); - } - - if (target_replicas.size() == all_replicas.size()) { - // CL.ALL, RRD.GLOBAL or RRD.DC_LOCAL and a single-DC. - // We are going to contact every node anyway, so ask for 2 full data requests instead of 1, for redundancy - // (same amount of requests in total, but we turn 1 digest request into a full blown data request). - return ::make_shared(schema, cf, p, cmd, std::move(pr), cl, block_for, std::move(target_replicas), std::move(trace_state)); - } - - // RRD.NONE or RRD.DC_LOCAL w/ multiple DCs. - if (target_replicas.size() == block_for) { // If RRD.DC_LOCAL extra replica may already be present - if (is_datacenter_local(cl) && !db::is_local(extra_replica)) { - slogger.trace("read executor no extra target to speculate"); - return ::make_shared(schema, cf, p, cmd, std::move(pr), cl, block_for, std::move(target_replicas), std::move(trace_state)); - } else { - target_replicas.push_back(extra_replica); - slogger.trace("creating read executor with extra target {}", extra_replica); - } - } - - if (retry_type == speculative_retry::type::ALWAYS) { - return ::make_shared(schema, cf, p, cmd, std::move(pr), cl, block_for, std::move(target_replicas), std::move(trace_state)); - } else {// PERCENTILE or CUSTOM. - return ::make_shared(schema, cf, p, cmd, std::move(pr), cl, block_for, std::move(target_replicas), std::move(trace_state)); - } -} - -future -storage_proxy::query_singular_local_digest(schema_ptr s, lw_shared_ptr cmd, const dht::partition_range& pr, tracing::trace_state_ptr trace_state, uint64_t max_size) { - return query_singular_local(std::move(s), std::move(cmd), pr, query::result_request::only_digest, std::move(trace_state), max_size).then([] (foreign_ptr> result, cache_temperature hit_rate) { - return make_ready_future(*result->digest(), result->last_modified(), hit_rate); - }); -} - -future>, cache_temperature> -storage_proxy::query_singular_local(schema_ptr s, lw_shared_ptr cmd, const dht::partition_range& pr, query::result_request request, tracing::trace_state_ptr trace_state, uint64_t max_size) { - unsigned shard = _db.local().shard_of(pr.start()->value().token()); - return _db.invoke_on(shard, [max_size, gs = global_schema_ptr(s), prv = dht::partition_range_vector({pr}) /* FIXME: pr is copied */, cmd, request, gt = tracing::global_trace_state_ptr(std::move(trace_state))] (database& db) mutable { - return db.query(gs, *cmd, request, prv, gt, max_size).then([](auto&& f, cache_temperature ht) { - return make_ready_future>, cache_temperature>(make_foreign(std::move(f)), ht); - }); - }); -} - -void storage_proxy::handle_read_error(std::exception_ptr eptr, bool range) { - try { - std::rethrow_exception(eptr); - } catch (read_timeout_exception& ex) { - slogger.debug("Read timeout: received {} of {} required replies, data {}present", ex.received, ex.block_for, ex.data_present ? "" : "not "); - if (range) { - _stats.range_slice_timeouts.mark(); - } else { - _stats.read_timeouts.mark(); - } - } catch (...) { - slogger.debug("Error during read query {}", eptr); - } -} - -future>> -storage_proxy::query_singular(lw_shared_ptr cmd, dht::partition_range_vector&& partition_ranges, db::consistency_level cl, tracing::trace_state_ptr trace_state) { - std::vector<::shared_ptr> exec; - exec.reserve(partition_ranges.size()); - auto timeout = storage_proxy::clock_type::now() + std::chrono::milliseconds(_db.local().get_config().read_request_timeout_in_ms()); - - for (auto&& pr: partition_ranges) { - if (!pr.is_singular()) { - throw std::runtime_error("mixed singular and non singular range are not supported"); - } - exec.push_back(get_read_executor(cmd, std::move(pr), cl, trace_state)); - } - - query::result_merger merger(cmd->row_limit, cmd->partition_limit); - merger.reserve(exec.size()); - - auto f = ::map_reduce(exec.begin(), exec.end(), [timeout] (::shared_ptr& rex) { - return rex->execute(timeout); - }, std::move(merger)); - - return f.handle_exception([exec = std::move(exec), p = shared_from_this()] (std::exception_ptr eptr) { - // hold onto exec until read is complete - p->handle_read_error(eptr, false); - return make_exception_future>>(eptr); - }); -} - -future>>> -storage_proxy::query_partition_key_range_concurrent(storage_proxy::clock_type::time_point timeout, std::vector>>&& results, - lw_shared_ptr cmd, db::consistency_level cl, dht::partition_range_vector::iterator&& i, - dht::partition_range_vector&& ranges, int concurrency_factor, tracing::trace_state_ptr trace_state, - uint32_t remaining_row_count, uint32_t remaining_partition_count) { - schema_ptr schema = local_schema_registry().get(cmd->schema_version); - keyspace& ks = _db.local().find_keyspace(schema->ks_name()); - std::vector<::shared_ptr> exec; - auto concurrent_fetch_starting_index = i; - auto p = shared_from_this(); - auto& cf= _db.local().find_column_family(schema); - auto pcf = _db.local().get_config().cache_hit_rate_read_balancing() ? &cf : nullptr; - - - while (i != ranges.end() && std::distance(concurrent_fetch_starting_index, i) < concurrency_factor) { - dht::partition_range& range = *i; - std::vector live_endpoints = get_live_sorted_endpoints(ks, end_token(range)); - std::vector filtered_endpoints = filter_for_query(cl, ks, live_endpoints, pcf); - ++i; - - // getRestrictedRange has broken the queried range into per-[vnode] token ranges, but this doesn't take - // the replication factor into account. If the intersection of live endpoints for 2 consecutive ranges - // still meets the CL requirements, then we can merge both ranges into the same RangeSliceCommand. - while (i != ranges.end()) - { - dht::partition_range& next_range = *i; - std::vector next_endpoints = get_live_sorted_endpoints(ks, end_token(next_range)); - std::vector next_filtered_endpoints = filter_for_query(cl, ks, next_endpoints, pcf); - - // Origin has this to say here: - // * If the current range right is the min token, we should stop merging because CFS.getRangeSlice - // * don't know how to deal with a wrapping range. - // * Note: it would be slightly more efficient to have CFS.getRangeSlice on the destination nodes unwraps - // * the range if necessary and deal with it. However, we can't start sending wrapped range without breaking - // * wire compatibility, so It's likely easier not to bother; - // It obviously not apply for us(?), but lets follow origin for now - if (end_token(range) == dht::maximum_token()) { - break; - } - - std::vector merged = intersection(live_endpoints, next_endpoints); - - // Check if there is enough endpoint for the merge to be possible. - if (!is_sufficient_live_nodes(cl, ks, merged)) { - break; - } - - std::vector filtered_merged = filter_for_query(cl, ks, merged, pcf); - - // Estimate whether merging will be a win or not - if (!locator::i_endpoint_snitch::get_local_snitch_ptr()->is_worth_merging_for_range_query(filtered_merged, filtered_endpoints, next_filtered_endpoints)) { - break; - } else if (pcf) { - // check that merged set hit rate is not to low - auto find_min = [pcf] (const std::vector& range) { - struct { - column_family* cf = nullptr; - float operator()(const gms::inet_address& ep) const { - return float(cf->get_hit_rate(ep).rate); - } - } ep_to_hr{pcf}; - return *boost::range::min_element(range | boost::adaptors::transformed(ep_to_hr)); - }; - auto merged = find_min(filtered_merged) * 1.2; // give merged set 20% boost - if (merged < find_min(filtered_endpoints) && merged < find_min(next_filtered_endpoints)) { - // if lowest cache hits rate of a merged set is smaller than lowest cache hit - // rate of un-merged sets then do not merge. The idea is that we better issue - // two different range reads with highest chance of hitting a cache then one read that - // will cause more IO on contacted nodes - break; - } - } - - // If we get there, merge this range and the next one - range = dht::partition_range(range.start(), next_range.end()); - live_endpoints = std::move(merged); - filtered_endpoints = std::move(filtered_merged); - ++i; - } - slogger.trace("creating range read executor with targets {}", filtered_endpoints); - try { - db::assure_sufficient_live_nodes(cl, ks, filtered_endpoints); - } catch(exceptions::unavailable_exception& ex) { - slogger.debug("Read unavailable: cl={} required {} alive {}", ex.consistency, ex.required, ex.alive); - _stats.range_slice_unavailables.mark(); - throw; - } - - exec.push_back(::make_shared(schema, cf.shared_from_this(), p, cmd, std::move(range), cl, std::move(filtered_endpoints), trace_state)); - } - - query::result_merger merger(cmd->row_limit, cmd->partition_limit); - merger.reserve(exec.size()); - - auto f = ::map_reduce(exec.begin(), exec.end(), [timeout] (::shared_ptr& rex) { - return rex->execute(timeout); - }, std::move(merger)); - - return f.then([p, exec = std::move(exec), results = std::move(results), i = std::move(i), ranges = std::move(ranges), - cl, cmd, concurrency_factor, timeout, remaining_row_count, remaining_partition_count, trace_state = std::move(trace_state)] - (foreign_ptr>&& result) mutable { - if (!result->row_count() || !result->partition_count()) { - slogger.error("no row count in query result, should not happen here"); - result->calculate_counts(cmd->slice); - } - remaining_row_count -= result->row_count().value(); - remaining_partition_count -= result->partition_count().value(); - results.emplace_back(std::move(result)); - if (i == ranges.end() || !remaining_row_count || !remaining_partition_count) { - return make_ready_future>>>(std::move(results)); - } else { - cmd->row_limit = remaining_row_count; - cmd->partition_limit = remaining_partition_count; - return p->query_partition_key_range_concurrent(timeout, std::move(results), cmd, cl, std::move(i), - std::move(ranges), concurrency_factor, std::move(trace_state), remaining_row_count, remaining_partition_count); - } - }).handle_exception([p] (std::exception_ptr eptr) { - p->handle_read_error(eptr, true); - return make_exception_future>>>(eptr); - }); -} - -future>> -storage_proxy::query_partition_key_range(lw_shared_ptr cmd, dht::partition_range_vector partition_ranges, db::consistency_level cl, tracing::trace_state_ptr trace_state) { - schema_ptr schema = local_schema_registry().get(cmd->schema_version); - keyspace& ks = _db.local().find_keyspace(schema->ks_name()); - dht::partition_range_vector ranges; - auto timeout = storage_proxy::clock_type::now() + std::chrono::milliseconds(_db.local().get_config().read_request_timeout_in_ms()); - - // when dealing with LocalStrategy keyspaces, we can skip the range splitting and merging (which can be - // expensive in clusters with vnodes) - if (ks.get_replication_strategy().get_type() == locator::replication_strategy_type::local) { - ranges = std::move(partition_ranges); - } else { - for (auto&& r : partition_ranges) { - auto restricted_ranges = get_restricted_ranges(ks, *schema, std::move(r)); - std::move(restricted_ranges.begin(), restricted_ranges.end(), std::back_inserter(ranges)); - } - } - - // estimate_result_rows_per_range() is currently broken, and this is not needed - // when paging is available in any case -#if 0 - // our estimate of how many result rows there will be per-range - float result_rows_per_range = estimate_result_rows_per_range(cmd, ks); - // underestimate how many rows we will get per-range in order to increase the likelihood that we'll - // fetch enough rows in the first round - result_rows_per_range -= result_rows_per_range * CONCURRENT_SUBREQUESTS_MARGIN; - int concurrency_factor = result_rows_per_range == 0.0 ? 1 : std::max(1, std::min(int(ranges.size()), int(std::ceil(cmd->row_limit / result_rows_per_range)))); -#else - int result_rows_per_range = 0; - int concurrency_factor = 1; -#endif - - std::vector>> results; - results.reserve(ranges.size()/concurrency_factor + 1); - slogger.debug("Estimated result rows per range: {}; requested rows: {}, ranges.size(): {}; concurrent range requests: {}", - result_rows_per_range, cmd->row_limit, ranges.size(), concurrency_factor); - - return query_partition_key_range_concurrent(timeout, std::move(results), cmd, cl, ranges.begin(), std::move(ranges), concurrency_factor, - std::move(trace_state), cmd->row_limit, cmd->partition_limit) - .then([row_limit = cmd->row_limit, partition_limit = cmd->partition_limit](std::vector>> results) { - query::result_merger merger(row_limit, partition_limit); - merger.reserve(results.size()); - - for (auto&& r: results) { - merger(std::move(r)); - } - - return merger.get(); - }); -} - -future>> -storage_proxy::query(schema_ptr s, - lw_shared_ptr cmd, - dht::partition_range_vector&& partition_ranges, - db::consistency_level cl, tracing::trace_state_ptr trace_state) -{ - if (slogger.is_enabled(logging::log_level::trace) || qlogger.is_enabled(logging::log_level::trace)) { - static thread_local int next_id = 0; - auto query_id = next_id++; - - slogger.trace("query {}.{} cmd={}, ranges={}, id={}", s->ks_name(), s->cf_name(), *cmd, partition_ranges, query_id); - return do_query(s, cmd, std::move(partition_ranges), cl, std::move(trace_state)).then([query_id, cmd, s] (foreign_ptr>&& res) { - if (res->buf().is_linearized()) { - res->calculate_counts(cmd->slice); - slogger.trace("query_result id={}, size={}, rows={}, partitions={}", query_id, res->buf().size(), *res->row_count(), *res->partition_count()); - } else { - slogger.trace("query_result id={}, size={}", query_id, res->buf().size()); - } - qlogger.trace("id={}, {}", query_id, res->pretty_printer(s, cmd->slice)); - return std::move(res); - }); - } - - return do_query(s, cmd, std::move(partition_ranges), cl, std::move(trace_state)); -} - -future>> -storage_proxy::do_query(schema_ptr s, - lw_shared_ptr cmd, - dht::partition_range_vector&& partition_ranges, - db::consistency_level cl, - tracing::trace_state_ptr trace_state) -{ - static auto make_empty = [] { - return make_ready_future>>(make_foreign(make_lw_shared())); - }; - - auto& slice = cmd->slice; - if (partition_ranges.empty() || - (slice.default_row_ranges().empty() && !slice.get_specific_ranges())) { - return make_empty(); - } - utils::latency_counter lc; - lc.start(); - auto p = shared_from_this(); - - if (query::is_single_partition(partition_ranges[0])) { // do not support mixed partitions (yet?) - try { - return query_singular(cmd, std::move(partition_ranges), cl, std::move(trace_state)).finally([lc, p] () mutable { - p->_stats.read.mark(lc.stop().latency()); - if (lc.is_start()) { - p->_stats.estimated_read.add(lc.latency(), p->_stats.read.hist.count); - } - }); - } catch (const no_such_column_family&) { - _stats.read.mark(lc.stop().latency()); - return make_empty(); - } - } - - return query_partition_key_range(cmd, std::move(partition_ranges), cl, std::move(trace_state)).finally([lc, p] () mutable { - p->_stats.range.mark(lc.stop().latency()); - if (lc.is_start()) { - p->_stats.estimated_range.add(lc.latency(), p->_stats.range.hist.count); - } - }); -} - -#if 0 - private static List readWithPaxos(List commands, ConsistencyLevel consistencyLevel, ClientState state) - throws InvalidRequestException, UnavailableException, ReadTimeoutException - { - assert state != null; - - long start = System.nanoTime(); - List rows = null; - - try - { - // make sure any in-progress paxos writes are done (i.e., committed to a majority of replicas), before performing a quorum read - if (commands.size() > 1) - throw new InvalidRequestException("SERIAL/LOCAL_SERIAL consistency may only be requested for one row at a time"); - ReadCommand command = commands.get(0); - - CFMetaData metadata = Schema.instance.getCFMetaData(command.ksName, command.cfName); - Pair, Integer> p = getPaxosParticipants(command.ksName, command.key, consistencyLevel); - List liveEndpoints = p.left; - int requiredParticipants = p.right; - - // does the work of applying in-progress writes; throws UAE or timeout if it can't - final ConsistencyLevel consistencyForCommitOrFetch = consistencyLevel == ConsistencyLevel.LOCAL_SERIAL - ? ConsistencyLevel.LOCAL_QUORUM - : ConsistencyLevel.QUORUM; - try - { - final Pair pair = beginAndRepairPaxos(start, command.key, metadata, liveEndpoints, requiredParticipants, consistencyLevel, consistencyForCommitOrFetch, false, state); - if (pair.right > 0) - casReadMetrics.contention.update(pair.right); - } - catch (WriteTimeoutException e) - { - throw new ReadTimeoutException(consistencyLevel, 0, consistencyLevel.blockFor(Keyspace.open(command.ksName)), false); - } - - rows = fetchRows(commands, consistencyForCommitOrFetch); - } - catch (UnavailableException e) - { - readMetrics.unavailables.mark(); - ClientRequestMetrics.readUnavailables.inc(); - casReadMetrics.unavailables.mark(); - throw e; - } - catch (ReadTimeoutException e) - { - readMetrics.timeouts.mark(); - ClientRequestMetrics.readTimeouts.inc(); - casReadMetrics.timeouts.mark(); - throw e; - } - finally - { - long latency = System.nanoTime() - start; - readMetrics.addNano(latency); - casReadMetrics.addNano(latency); - // TODO avoid giving every command the same latency number. Can fix this in CASSADRA-5329 - for (ReadCommand command : commands) - Keyspace.open(command.ksName).getColumnFamilyStore(command.cfName).metric.coordinatorReadLatency.update(latency, TimeUnit.NANOSECONDS); - } - - return rows; - } -#endif - -std::vector storage_proxy::get_live_endpoints(keyspace& ks, const dht::token& token) { - auto& rs = ks.get_replication_strategy(); - std::vector eps = rs.get_natural_endpoints(token); - auto itend = boost::range::remove_if(eps, std::not1(std::bind1st(std::mem_fn(&gms::failure_detector::is_alive), &gms::get_local_failure_detector()))); - eps.erase(itend, eps.end()); - return std::move(eps); -} - -std::vector storage_proxy::get_live_sorted_endpoints(keyspace& ks, const dht::token& token) { - auto eps = get_live_endpoints(ks, token); - locator::i_endpoint_snitch::get_local_snitch_ptr()->sort_by_proximity(utils::fb_utilities::get_broadcast_address(), eps); - // FIXME: before dynamic snitch is implement put local address (if present) at the beginning - auto it = boost::range::find(eps, utils::fb_utilities::get_broadcast_address()); - if (it != eps.end() && it != eps.begin()) { - std::iter_swap(it, eps.begin()); - } - return eps; -} - -std::vector storage_proxy::intersection(const std::vector& l1, const std::vector& l2) { - std::vector inter; - inter.reserve(l1.size()); - std::remove_copy_if(l1.begin(), l1.end(), std::back_inserter(inter), [&l2] (const gms::inet_address& a) { - return std::find(l2.begin(), l2.end(), a) == l2.end(); - }); - return inter; -} - -/** - * Estimate the number of result rows (either cql3 rows or storage rows, as called for by the command) per - * range in the ring based on our local data. This assumes that ranges are uniformly distributed across the cluster - * and that the queried data is also uniformly distributed. - */ -float storage_proxy::estimate_result_rows_per_range(lw_shared_ptr cmd, keyspace& ks) -{ - return 1.0; -#if 0 - ColumnFamilyStore cfs = keyspace.getColumnFamilyStore(command.columnFamily); - float resultRowsPerRange = Float.POSITIVE_INFINITY; - if (command.rowFilter != null && !command.rowFilter.isEmpty()) - { - List searchers = cfs.indexManager.getIndexSearchersForQuery(command.rowFilter); - if (searchers.isEmpty()) - { - resultRowsPerRange = calculateResultRowsUsingEstimatedKeys(cfs); - } - else - { - // Secondary index query (cql3 or otherwise). Estimate result rows based on most selective 2ary index. - for (SecondaryIndexSearcher searcher : searchers) - { - // use our own mean column count as our estimate for how many matching rows each node will have - SecondaryIndex highestSelectivityIndex = searcher.highestSelectivityIndex(command.rowFilter); - resultRowsPerRange = Math.min(resultRowsPerRange, highestSelectivityIndex.estimateResultRows()); - } - } - } - else if (!command.countCQL3Rows()) - { - // non-cql3 query - resultRowsPerRange = cfs.estimateKeys(); - } - else - { - resultRowsPerRange = calculateResultRowsUsingEstimatedKeys(cfs); - } - - // adjust resultRowsPerRange by the number of tokens this node has and the replication factor for this ks - return (resultRowsPerRange / DatabaseDescriptor.getNumTokens()) / keyspace.getReplicationStrategy().getReplicationFactor(); -#endif -} - -#if 0 - private static float calculateResultRowsUsingEstimatedKeys(ColumnFamilyStore cfs) - { - if (cfs.metadata.comparator.isDense()) - { - // one storage row per result row, so use key estimate directly - return cfs.estimateKeys(); - } - else - { - float resultRowsPerStorageRow = ((float) cfs.getMeanColumns()) / cfs.metadata.regularColumns().size(); - return resultRowsPerStorageRow * (cfs.estimateKeys()); - } - } - - private static List trim(AbstractRangeCommand command, List rows) - { - // When maxIsColumns, we let the caller trim the result. - if (command.countCQL3Rows()) - return rows; - else - return rows.size() > command.limit() ? rows.subList(0, command.limit()) : rows; - } -#endif - -/** - * Compute all ranges we're going to query, in sorted order. Nodes can be replica destinations for many ranges, - * so we need to restrict each scan to the specific range we want, or else we'd get duplicate results. - */ -dht::partition_range_vector -storage_proxy::get_restricted_ranges(keyspace& ks, const schema& s, dht::partition_range range) { - locator::token_metadata& tm = get_local_storage_service().get_token_metadata(); - return service::get_restricted_ranges(tm, s, std::move(range)); -} - -dht::partition_range_vector -get_restricted_ranges(locator::token_metadata& tm, const schema& s, dht::partition_range range) { - dht::ring_position_comparator cmp(s); - - // special case for bounds containing exactly 1 token - if (start_token(range) == end_token(range)) { - if (start_token(range).is_minimum()) { - return {}; - } - return dht::partition_range_vector({std::move(range)}); - } - - dht::partition_range_vector ranges; - - auto add_range = [&ranges, &cmp] (dht::partition_range&& r) { - ranges.emplace_back(std::move(r)); - }; - - // divide the queryRange into pieces delimited by the ring - auto ring_iter = tm.ring_range(range.start(), false); - dht::partition_range remainder = std::move(range); - for (const dht::token& upper_bound_token : ring_iter) - { - /* - * remainder can be a range/bounds of token _or_ keys and we want to split it with a token: - * - if remainder is tokens, then we'll just split using the provided token. - * - if remainder is keys, we want to split using token.upperBoundKey. For instance, if remainder - * is [DK(10, 'foo'), DK(20, 'bar')], and we have 3 nodes with tokens 0, 15, 30. We want to - * split remainder to A=[DK(10, 'foo'), 15] and B=(15, DK(20, 'bar')]. But since we can't mix - * tokens and keys at the same time in a range, we uses 15.upperBoundKey() to have A include all - * keys having 15 as token and B include none of those (since that is what our node owns). - * asSplitValue() abstracts that choice. - */ - - dht::ring_position split_point(upper_bound_token, dht::ring_position::token_bound::end); - if (!remainder.contains(split_point, cmp)) { - break; // no more splits - } - - { - // We shouldn't attempt to split on upper bound, because it may result in - // an ambiguous range of the form (x; x] - if (end_token(remainder) == upper_bound_token) { - break; - } - - std::pair splits = - remainder.split(split_point, cmp); - - add_range(std::move(splits.first)); - remainder = std::move(splits.second); - } - } - add_range(std::move(remainder)); - - return ranges; -} - -bool storage_proxy::should_hint(gms::inet_address ep) noexcept { - if (is_me(ep)) { // do not hint to local address - return false; - } - - return false; -#if 0 - if (DatabaseDescriptor.shouldHintByDC()) - { - final String dc = DatabaseDescriptor.getEndpointSnitch().getDatacenter(ep); - //Disable DC specific hints - if(!DatabaseDescriptor.hintedHandoffEnabled(dc)) - { - HintedHandOffManager.instance.metrics.incrPastWindow(ep); - return false; - } - } - else if (!DatabaseDescriptor.hintedHandoffEnabled()) - { - HintedHandOffManager.instance.metrics.incrPastWindow(ep); - return false; - } - - boolean hintWindowExpired = Gossiper.instance.getEndpointDowntime(ep) > DatabaseDescriptor.getMaxHintWindow(); - if (hintWindowExpired) - { - HintedHandOffManager.instance.metrics.incrPastWindow(ep); - Tracing.trace("Not hinting {} which has been down {}ms", ep, Gossiper.instance.getEndpointDowntime(ep)); - } - return !hintWindowExpired; -#endif -} - -future<> storage_proxy::truncate_blocking(sstring keyspace, sstring cfname) { - slogger.debug("Starting a blocking truncate operation on keyspace {}, CF {}", keyspace, cfname); - - auto& gossiper = gms::get_local_gossiper(); - - if (!gossiper.get_unreachable_token_owners().empty()) { - slogger.info("Cannot perform truncate, some hosts are down"); - // Since the truncate operation is so aggressive and is typically only - // invoked by an admin, for simplicity we require that all nodes are up - // to perform the operation. - auto live_members = gossiper.get_live_members().size(); - - throw exceptions::unavailable_exception(db::consistency_level::ALL, - live_members + gossiper.get_unreachable_members().size(), - live_members); - } - - auto all_endpoints = gossiper.get_live_token_owners(); - auto& ms = netw::get_local_messaging_service(); - auto timeout = std::chrono::milliseconds(_db.local().get_config().truncate_request_timeout_in_ms()); - - slogger.trace("Enqueuing truncate messages to hosts {}", all_endpoints); - - return parallel_for_each(all_endpoints, [keyspace, cfname, &ms, timeout](auto ep) { - return ms.send_truncate(netw::messaging_service::msg_addr{ep, 0}, timeout, keyspace, cfname); - }).handle_exception([cfname](auto ep) { - try { - std::rethrow_exception(ep); - } catch (rpc::timeout_error& e) { - slogger.trace("Truncation of {} timed out: {}", cfname, e.what()); - } catch (...) { - throw; - } - }); -} - -#if 0 - public interface WritePerformer - { - public void apply(IMutation mutation, - Iterable targets, - AbstractWriteResponseHandler responseHandler, - String localDataCenter, - ConsistencyLevel consistencyLevel) throws OverloadedException; - } - - /** - * A Runnable that aborts if it doesn't start running before it times out - */ - private static abstract class DroppableRunnable implements Runnable - { - private final long constructionTime = System.nanoTime(); - private final MessagingService.Verb verb; - - public DroppableRunnable(MessagingService.Verb verb) - { - this.verb = verb; - } - - public final void run() - { - - if (TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - constructionTime) > DatabaseDescriptor.getTimeout(verb)) - { - MessagingService.instance().incrementDroppedMessages(verb); - return; - } - try - { - runMayThrow(); - } catch (Exception e) - { - throw new RuntimeException(e); - } - } - - abstract protected void runMayThrow() throws Exception; - } - - /** - * Like DroppableRunnable, but if it aborts, it will rerun (on the mutation stage) after - * marking itself as a hint in progress so that the hint backpressure mechanism can function. - */ - private static abstract class LocalMutationRunnable implements Runnable - { - private final long constructionTime = System.currentTimeMillis(); - - public final void run() - { - if (System.currentTimeMillis() > constructionTime + DatabaseDescriptor.getTimeout(MessagingService.Verb.MUTATION)) - { - MessagingService.instance().incrementDroppedMessages(MessagingService.Verb.MUTATION); - HintRunnable runnable = new HintRunnable(FBUtilities.getBroadcastAddress()) - { - protected void runMayThrow() throws Exception - { - LocalMutationRunnable.this.runMayThrow(); - } - }; - submitHint(runnable); - return; - } - - try - { - runMayThrow(); - } - catch (Exception e) - { - throw new RuntimeException(e); - } - } - - abstract protected void runMayThrow() throws Exception; - } - - /** - * HintRunnable will decrease totalHintsInProgress and targetHints when finished. - * It is the caller's responsibility to increment them initially. - */ - private abstract static class HintRunnable implements Runnable - { - public final InetAddress target; - - protected HintRunnable(InetAddress target) - { - this.target = target; - } - - public void run() - { - try - { - runMayThrow(); - } - catch (Exception e) - { - throw new RuntimeException(e); - } - finally - { - StorageMetrics.totalHintsInProgress.dec(); - getHintsInProgressFor(target).decrementAndGet(); - } - } - - abstract protected void runMayThrow() throws Exception; - } - - public long getTotalHints() - { - return StorageMetrics.totalHints.count(); - } - - public int getMaxHintsInProgress() - { - return maxHintsInProgress; - } - - public void setMaxHintsInProgress(int qs) - { - maxHintsInProgress = qs; - } - - public int getHintsInProgress() - { - return (int) StorageMetrics.totalHintsInProgress.count(); - } - - public void verifyNoHintsInProgress() - { - if (getHintsInProgress() > 0) - slogger.warn("Some hints were not written before shutdown. This is not supposed to happen. You should (a) run repair, and (b) file a bug report"); - } - - public Long getRpcTimeout() { return DatabaseDescriptor.getRpcTimeout(); } - public void setRpcTimeout(Long timeoutInMillis) { DatabaseDescriptor.setRpcTimeout(timeoutInMillis); } - - public Long getReadRpcTimeout() { return DatabaseDescriptor.getReadRpcTimeout(); } - public void setReadRpcTimeout(Long timeoutInMillis) { DatabaseDescriptor.setReadRpcTimeout(timeoutInMillis); } - - public Long getWriteRpcTimeout() { return DatabaseDescriptor.getWriteRpcTimeout(); } - public void setWriteRpcTimeout(Long timeoutInMillis) { DatabaseDescriptor.setWriteRpcTimeout(timeoutInMillis); } - - public Long getCounterWriteRpcTimeout() { return DatabaseDescriptor.getCounterWriteRpcTimeout(); } - public void setCounterWriteRpcTimeout(Long timeoutInMillis) { DatabaseDescriptor.setCounterWriteRpcTimeout(timeoutInMillis); } - - public Long getCasContentionTimeout() { return DatabaseDescriptor.getCasContentionTimeout(); } - public void setCasContentionTimeout(Long timeoutInMillis) { DatabaseDescriptor.setCasContentionTimeout(timeoutInMillis); } - - public Long getRangeRpcTimeout() { return DatabaseDescriptor.getRangeRpcTimeout(); } - public void setRangeRpcTimeout(Long timeoutInMillis) { DatabaseDescriptor.setRangeRpcTimeout(timeoutInMillis); } - - public Long getTruncateRpcTimeout() { return DatabaseDescriptor.getTruncateRpcTimeout(); } - public void setTruncateRpcTimeout(Long timeoutInMillis) { DatabaseDescriptor.setTruncateRpcTimeout(timeoutInMillis); } - public void reloadTriggerClasses() { TriggerExecutor.instance.reloadClasses(); } - - - public long getReadRepairAttempted() { - return ReadRepairMetrics.attempted.count(); - } - - public long getReadRepairRepairedBlocking() { - return ReadRepairMetrics.repairedBlocking.count(); - } - - public long getReadRepairRepairedBackground() { - return ReadRepairMetrics.repairedBackground.count(); - } -#endif - -void storage_proxy::init_messaging_service() { - auto& ms = netw::get_local_messaging_service(); - ms.register_counter_mutation([] (const rpc::client_info& cinfo, rpc::opt_time_point t, std::vector fms, db::consistency_level cl, stdx::optional trace_info) { - auto src_addr = netw::messaging_service::get_source(cinfo); - - tracing::trace_state_ptr trace_state_ptr; - if (trace_info) { - trace_state_ptr = tracing::tracing::get_local_tracing_instance().create_session(*trace_info); - tracing::begin(trace_state_ptr); - tracing::trace(trace_state_ptr, "Message received from /{}", src_addr.addr); - } - - return do_with(std::vector(), - [cl, src_addr, timeout = *t, fms = std::move(fms), trace_state_ptr = std::move(trace_state_ptr)] (std::vector& mutations) mutable { - return parallel_for_each(std::move(fms), [&mutations, src_addr] (frozen_mutation& fm) { - // FIXME: optimise for cases when all fms are in the same schema - auto schema_version = fm.schema_version(); - return get_schema_for_write(schema_version, std::move(src_addr)).then([&mutations, fm = std::move(fm)] (schema_ptr s) mutable { - mutations.emplace_back(frozen_mutation_and_schema { std::move(fm), std::move(s) }); - }); - }).then([trace_state_ptr = std::move(trace_state_ptr), &mutations, cl, timeout] { - auto sp = get_local_shared_storage_proxy(); - return sp->mutate_counters_on_leader(std::move(mutations), cl, timeout, std::move(trace_state_ptr)); - }); - }); - }); - ms.register_mutation([] (const rpc::client_info& cinfo, rpc::opt_time_point t, frozen_mutation in, std::vector forward, gms::inet_address reply_to, unsigned shard, storage_proxy::response_id_type response_id, rpc::optional> trace_info) { - tracing::trace_state_ptr trace_state_ptr; - auto src_addr = netw::messaging_service::get_source(cinfo); - - if (trace_info && *trace_info) { - tracing::trace_info& tr_info = **trace_info; - trace_state_ptr = tracing::tracing::get_local_tracing_instance().create_session(tr_info); - tracing::begin(trace_state_ptr); - tracing::trace(trace_state_ptr, "Message received from /{}", src_addr.addr); - } - - storage_proxy::clock_type::time_point timeout; - if (!t) { - auto timeout_in_ms = get_local_shared_storage_proxy()->_db.local().get_config().write_request_timeout_in_ms(); - timeout = clock_type::now() + std::chrono::milliseconds(timeout_in_ms); - } else { - timeout = *t; - } - - return do_with(std::move(in), get_local_shared_storage_proxy(), [src_addr = std::move(src_addr), &cinfo, forward = std::move(forward), reply_to, shard, response_id, trace_state_ptr, timeout] (const frozen_mutation& m, shared_ptr& p) mutable { - ++p->_stats.received_mutations; - p->_stats.forwarded_mutations += forward.size(); - return when_all( - // mutate_locally() may throw, putting it into apply() converts exception to a future. - futurize::apply([timeout, &p, &m, reply_to, src_addr = std::move(src_addr)] () mutable { - // FIXME: get_schema_for_write() doesn't timeout - return get_schema_for_write(m.schema_version(), std::move(src_addr)).then([&m, &p, timeout] (schema_ptr s) { - return p->mutate_locally(std::move(s), m, timeout); - }); - }).then([reply_to, shard, response_id, trace_state_ptr] () { - auto& ms = netw::get_local_messaging_service(); - // We wait for send_mutation_done to complete, otherwise, if reply_to is busy, we will accumulate - // lots of unsent responses, which can OOM our shard. - // - // Usually we will return immediately, since this work only involves appending data to the connection - // send buffer. - tracing::trace(trace_state_ptr, "Sending mutation_done to /{}", reply_to); - return ms.send_mutation_done(netw::messaging_service::msg_addr{reply_to, shard}, shard, response_id).then_wrapped([] (future<> f) { - f.ignore_ready_future(); - }); - }).handle_exception([reply_to, shard, &p] (std::exception_ptr eptr) { - seastar::log_level l = seastar::log_level::warn; - try { - std::rethrow_exception(eptr); - } catch (timed_out_error&) { - // ignore timeouts so that logs are not flooded. - // database total_writes_timedout counter was incremented. - l = seastar::log_level::debug; - } catch (...) { - // ignore - } - slogger.log(l, "Failed to apply mutation from {}#{}: {}", reply_to, shard, eptr); - }), - parallel_for_each(forward.begin(), forward.end(), [reply_to, shard, response_id, &m, &p, trace_state_ptr, timeout] (gms::inet_address forward) { - auto& ms = netw::get_local_messaging_service(); - tracing::trace(trace_state_ptr, "Forwarding a mutation to /{}", forward); - return ms.send_mutation(netw::messaging_service::msg_addr{forward, 0}, timeout, m, {}, reply_to, shard, response_id, tracing::make_trace_info(trace_state_ptr)).then_wrapped([&p] (future<> f) { - if (f.failed()) { - ++p->_stats.forwarding_errors; - }; - f.ignore_ready_future(); - }); - }) - ).then_wrapped([trace_state_ptr] (future, future<>>>&& f) { - // ignore ressult, since we'll be returning them via MUTATION_DONE verbs - tracing::trace(trace_state_ptr, "Mutation handling is done"); - return netw::messaging_service::no_wait(); - }); - }); - }); - ms.register_mutation_done([] (const rpc::client_info& cinfo, unsigned shard, storage_proxy::response_id_type response_id) { - auto& from = cinfo.retrieve_auxiliary("baddr"); - return get_storage_proxy().invoke_on(shard, [from, response_id] (storage_proxy& sp) { - sp.got_response(response_id, from); - return netw::messaging_service::no_wait(); - }); - }); - ms.register_read_data([] (const rpc::client_info& cinfo, query::read_command cmd, compat::wrapping_partition_range pr, rpc::optional oda) { - tracing::trace_state_ptr trace_state_ptr; - auto src_addr = netw::messaging_service::get_source(cinfo); - if (cmd.trace_info) { - trace_state_ptr = tracing::tracing::get_local_tracing_instance().create_session(*cmd.trace_info); - tracing::begin(trace_state_ptr); - tracing::trace(trace_state_ptr, "read_data: message received from /{}", src_addr.addr); - } - auto da = oda.value_or(query::digest_algorithm::MD5); - auto max_size = cinfo.retrieve_auxiliary("max_result_size"); - return do_with(std::move(pr), get_local_shared_storage_proxy(), std::move(trace_state_ptr), [&cinfo, cmd = make_lw_shared(std::move(cmd)), src_addr = std::move(src_addr), da, max_size] (compat::wrapping_partition_range& pr, shared_ptr& p, tracing::trace_state_ptr& trace_state_ptr) mutable { - p->_stats.replica_data_reads++; - auto src_ip = src_addr.addr; - return get_schema_for_read(cmd->schema_version, std::move(src_addr)).then([cmd, da, &pr, &p, &trace_state_ptr, max_size] (schema_ptr s) { - auto pr2 = compat::unwrap(std::move(pr), *s); - if (pr2.second) { - // this function assumes singular queries but doesn't validate - throw std::runtime_error("READ_DATA called with wrapping range"); - } - query::result_request qrr; - switch (da) { - case query::digest_algorithm::none: - qrr = query::result_request::only_result; - break; - case query::digest_algorithm::MD5: - qrr = query::result_request::result_and_digest; - break; - } - return p->query_singular_local(std::move(s), cmd, std::move(pr2.first), qrr, trace_state_ptr, max_size); - }).finally([&trace_state_ptr, src_ip] () mutable { - tracing::trace(trace_state_ptr, "read_data handling is done, sending a response to /{}", src_ip); - }); - }); - }); - ms.register_read_mutation_data([] (const rpc::client_info& cinfo, query::read_command cmd, compat::wrapping_partition_range pr) { - tracing::trace_state_ptr trace_state_ptr; - auto src_addr = netw::messaging_service::get_source(cinfo); - if (cmd.trace_info) { - trace_state_ptr = tracing::tracing::get_local_tracing_instance().create_session(*cmd.trace_info); - tracing::begin(trace_state_ptr); - tracing::trace(trace_state_ptr, "read_mutation_data: message received from /{}", src_addr.addr); - } - auto max_size = cinfo.retrieve_auxiliary("max_result_size"); - return do_with(std::move(pr), - get_local_shared_storage_proxy(), - std::move(trace_state_ptr), - compat::one_or_two_partition_ranges({}), - [&cinfo, cmd = make_lw_shared(std::move(cmd)), src_addr = std::move(src_addr), max_size] ( - compat::wrapping_partition_range& pr, - shared_ptr& p, - tracing::trace_state_ptr& trace_state_ptr, - compat::one_or_two_partition_ranges& unwrapped) mutable { - p->_stats.replica_mutation_data_reads++; - auto src_ip = src_addr.addr; - return get_schema_for_read(cmd->schema_version, std::move(src_addr)).then([cmd, &pr, &p, &trace_state_ptr, max_size, &unwrapped] (schema_ptr s) mutable { - unwrapped = compat::unwrap(std::move(pr), *s); - return p->query_mutations_locally(std::move(s), std::move(cmd), unwrapped, trace_state_ptr, max_size); - }).finally([&trace_state_ptr, src_ip] () mutable { - tracing::trace(trace_state_ptr, "read_mutation_data handling is done, sending a response to /{}", src_ip); - }); - }); - }); - ms.register_read_digest([] (const rpc::client_info& cinfo, query::read_command cmd, compat::wrapping_partition_range pr) { - tracing::trace_state_ptr trace_state_ptr; - auto src_addr = netw::messaging_service::get_source(cinfo); - if (cmd.trace_info) { - trace_state_ptr = tracing::tracing::get_local_tracing_instance().create_session(*cmd.trace_info); - tracing::begin(trace_state_ptr); - tracing::trace(trace_state_ptr, "read_digest: message received from /{}", src_addr.addr); - } - auto max_size = cinfo.retrieve_auxiliary("max_result_size"); - return do_with(std::move(pr), get_local_shared_storage_proxy(), std::move(trace_state_ptr), [&cinfo, cmd = make_lw_shared(std::move(cmd)), src_addr = std::move(src_addr), max_size] (compat::wrapping_partition_range& pr, shared_ptr& p, tracing::trace_state_ptr& trace_state_ptr) mutable { - p->_stats.replica_digest_reads++; - auto src_ip = src_addr.addr; - return get_schema_for_read(cmd->schema_version, std::move(src_addr)).then([cmd, &pr, &p, &trace_state_ptr, max_size] (schema_ptr s) { - auto pr2 = compat::unwrap(std::move(pr), *s); - if (pr2.second) { - // this function assumes singular queries but doesn't validate - throw std::runtime_error("READ_DIGEST called with wrapping range"); - } - return p->query_singular_local_digest(std::move(s), cmd, std::move(pr2.first), trace_state_ptr, max_size); - }).finally([&trace_state_ptr, src_ip] () mutable { - tracing::trace(trace_state_ptr, "read_digest handling is done, sending a response to /{}", src_ip); - }); - }); - }); - ms.register_truncate([](sstring ksname, sstring cfname) { - return do_with(utils::make_joinpoint([] { return db_clock::now();}), - [ksname, cfname](auto& tsf) { - return get_storage_proxy().invoke_on_all([ksname, cfname, &tsf](storage_proxy& sp) { - return sp._db.local().truncate(ksname, cfname, [&tsf] { return tsf.value(); }); - }); - }); - }); - - ms.register_get_schema_version([] (unsigned shard, table_schema_version v) { - return get_storage_proxy().invoke_on(shard, [v] (auto&& sp) { - slogger.debug("Schema version request for {}", v); - return local_schema_registry().get_frozen(v); - }); - }); -} - -void storage_proxy::uninit_messaging_service() { - auto& ms = netw::get_local_messaging_service(); - ms.unregister_mutation(); - ms.unregister_mutation_done(); - ms.unregister_read_data(); - ms.unregister_read_mutation_data(); - ms.unregister_read_digest(); - ms.unregister_truncate(); -} - -// Merges reconcilable_result:s from different shards into one -// Drops partitions which exceed the limit. -class mutation_result_merger { - schema_ptr _schema; - lw_shared_ptr _cmd; - unsigned _row_count = 0; - unsigned _partition_count = 0; - bool _short_read_allowed; - // we get a batch of partitions each time, each with a key - // partition batches should be maintained in key order - // batches that share a key should be merged and sorted in decorated_key - // order - struct partitions_batch { - std::vector partitions; - query::short_read short_read; - }; - std::multimap _partitions; - query::result_memory_accounter _memory_accounter; - stdx::optional _stop_after_key; -public: - explicit mutation_result_merger(schema_ptr schema, lw_shared_ptr cmd) - : _schema(std::move(schema)) - , _cmd(std::move(cmd)) - , _short_read_allowed(_cmd->slice.options.contains(query::partition_slice::option::allow_short_read)) { - } - query::result_memory_accounter& memory() { - return _memory_accounter; - } - const query::result_memory_accounter& memory() const { - return _memory_accounter; - } - void add_result(unsigned key, foreign_ptr> partial_result) { - if (_stop_after_key && key > *_stop_after_key) { - // A short result was added that goes before this one. - return; - } - std::vector partitions; - partitions.reserve(partial_result->partitions().size()); - // Following three lines to simplify patch; can remove later - for (const partition& p : partial_result->partitions()) { - partitions.push_back(p); - _row_count += p._row_count; - _partition_count += p._row_count > 0; - } - _memory_accounter.update(partial_result->memory_usage()); - if (partial_result->is_short_read()) { - _stop_after_key = key; - } - _partitions.emplace(key, partitions_batch { std::move(partitions), partial_result->is_short_read() }); - } - reconcilable_result get() && { - auto unsorted = std::unordered_set(); - struct partitions_and_last_key { - std::vector partitions; - stdx::optional last; // set if we had a short read - }; - auto merged = std::map(); - auto short_read = query::short_read(this->short_read()); - // merge batches with equal keys, and note if we need to sort afterwards - for (auto&& key_value : _partitions) { - auto&& key = key_value.first; - if (_stop_after_key && key > *_stop_after_key) { - break; - } - auto&& batch = key_value.second; - auto&& dest = merged[key]; - if (dest.partitions.empty()) { - dest.partitions = std::move(batch.partitions); - } else { - unsorted.insert(key); - std::move(batch.partitions.begin(), batch.partitions.end(), std::back_inserter(dest.partitions)); - } - // In case of a short read we need to remove all partitions from the - // batch that come after the last partition of the short read - // result. - if (batch.short_read) { - // Nobody sends a short read with no data. - const auto& last = dest.partitions.back().mut().decorated_key(*_schema); - if (!dest.last || last.less_compare(*_schema, *dest.last)) { - dest.last = last; - } - short_read = query::short_read::yes; - } - } - - // Sort batches that arrived with the same keys - for (auto key : unsorted) { - struct comparator { - const schema& s; - dht::decorated_key::less_comparator dkcmp; - - bool operator()(const partition& a, const partition& b) const { - return dkcmp(a.mut().decorated_key(s), b.mut().decorated_key(s)); - } - bool operator()(const dht::decorated_key& a, const partition& b) const { - return dkcmp(a, b.mut().decorated_key(s)); - } - bool operator()(const partition& a, const dht::decorated_key& b) const { - return dkcmp(a.mut().decorated_key(s), b); - } - }; - auto cmp = comparator { *_schema, dht::decorated_key::less_comparator(_schema) }; - - auto&& batch = merged[key]; - boost::sort(batch.partitions, cmp); - if (batch.last) { - // This batch was built from a result that was a short read. - // We need to remove all partitions that are after that short - // read. - auto it = boost::range::upper_bound(batch.partitions, std::move(*batch.last), cmp); - batch.partitions.erase(it, batch.partitions.end()); - } - } - - auto final = std::vector(); - final.reserve(_partition_count); - for (auto&& batch : merged | boost::adaptors::map_values) { - std::move(batch.partitions.begin(), batch.partitions.end(), std::back_inserter(final)); - } - - if (short_read) { - // Short read row and partition counts may be incorrect, recalculate. - _row_count = 0; - _partition_count = 0; - for (const auto& p : final) { - _row_count += p.row_count(); - _partition_count += p.row_count() > 0; - } - - if (_row_count >= _cmd->row_limit || _partition_count > _cmd->partition_limit) { - // Even though there was a short read contributing to the final - // result we got limited by total row limit or partition limit. - // Note that we cannot with trivial check make unset short read flag - // in case _partition_count == _cmd->partition_limit since the short - // read may have caused the last partition to contain less rows - // than asked for. - short_read = query::short_read::no; - } - } - - // Trim back partition count and row count in case we overshot. - // Should be rare for dense tables. - while ((_partition_count > _cmd->partition_limit) - || (_partition_count && (_row_count - final.back().row_count() >= _cmd->row_limit))) { - _row_count -= final.back().row_count(); - _partition_count -= final.back().row_count() > 0; - final.pop_back(); - } - if (_row_count > _cmd->row_limit) { - auto mut = final.back().mut().unfreeze(_schema); - static const auto all = std::vector({query::clustering_range::make_open_ended_both_sides()}); - auto is_reversed = _cmd->slice.options.contains(query::partition_slice::option::reversed); - auto final_rows = _cmd->row_limit - (_row_count - final.back().row_count()); - _row_count -= final.back().row_count(); - auto rc = mut.partition().compact_for_query(*_schema, _cmd->timestamp, all, is_reversed, final_rows); - final.back() = partition(rc, freeze(mut)); - _row_count += rc; - } - - return reconcilable_result(_row_count, std::move(final), short_read, std::move(_memory_accounter).done()); - } - bool short_read() const { - return bool(_stop_after_key) || (_short_read_allowed && _row_count > 0 && _memory_accounter.check()); - } - unsigned partition_count() const { - return _partition_count; - } - unsigned row_count() const { - return _row_count; - } -}; - -future>, cache_temperature> -storage_proxy::query_mutations_locally(schema_ptr s, lw_shared_ptr cmd, const dht::partition_range& pr, - tracing::trace_state_ptr trace_state, uint64_t max_size) { - if (pr.is_singular()) { - unsigned shard = _db.local().shard_of(pr.start()->value().token()); - return _db.invoke_on(shard, [max_size, cmd, &pr, gs=global_schema_ptr(s), gt = tracing::global_trace_state_ptr(std::move(trace_state))] (database& db) mutable { - return db.get_result_memory_limiter().new_mutation_read(max_size).then([&] (query::result_memory_accounter ma) { - return db.query_mutations(gs, *cmd, pr, std::move(ma), gt).then([] (reconcilable_result&& result, cache_temperature ht) { - return make_ready_future>, cache_temperature>(make_foreign(make_lw_shared(std::move(result))), ht); - }); - }); - }); - } else { - return query_nonsingular_mutations_locally(std::move(s), std::move(cmd), {pr}, std::move(trace_state), max_size); - } -} - -future>, cache_temperature> -storage_proxy::query_mutations_locally(schema_ptr s, lw_shared_ptr cmd, const compat::one_or_two_partition_ranges& pr, - tracing::trace_state_ptr trace_state, uint64_t max_size) { - if (!pr.second) { - return query_mutations_locally(std::move(s), std::move(cmd), pr.first, std::move(trace_state), max_size); - } else { - return query_nonsingular_mutations_locally(std::move(s), std::move(cmd), pr, std::move(trace_state), max_size); - } -} - -} - -namespace { - -struct element_and_shard { - unsigned element; // element in a partition range vector - unsigned shard; -}; - -bool operator==(element_and_shard a, element_and_shard b) { - return a.element == b.element && a.shard == b.shard; -} - -} - -namespace std { - -template <> -struct hash { - size_t operator()(element_and_shard es) const { - return es.element * 31 + es.shard; - } -}; - -} - -namespace service { - -struct partition_range_and_sort_key { - query::partition_range pr; - unsigned sort_key_shard_order; // for the same source partition range, we sort in shard order -}; - -future>, cache_temperature> -storage_proxy::query_nonsingular_mutations_locally(schema_ptr s, lw_shared_ptr cmd, const dht::partition_range_vector& prs, - tracing::trace_state_ptr trace_state, uint64_t max_size) { - // no one permitted us to modify *cmd, so make a copy - auto shard_cmd = make_lw_shared(*cmd); - return do_with(cmd, - shard_cmd, - 0u, - false, - static_cast(prs.size()), - std::unordered_map{}, - mutation_result_merger{s, cmd}, - dht::ring_position_exponential_vector_sharder{prs}, - global_schema_ptr(s), - tracing::global_trace_state_ptr(std::move(trace_state)), - cache_temperature(0.0f), - [this, s, max_size] (lw_shared_ptr& cmd, - lw_shared_ptr& shard_cmd, - unsigned& mutation_result_merger_key, - bool& no_more_ranges, - unsigned& partition_range_count, - std::unordered_map& shards_for_this_iteration, - mutation_result_merger& mrm, - dht::ring_position_exponential_vector_sharder& rpevs, - global_schema_ptr& gs, - tracing::global_trace_state_ptr& gt, - cache_temperature& hit_rate) { - return _db.local().get_result_memory_limiter().new_mutation_read(max_size).then([&, s] (query::result_memory_accounter ma) { - mrm.memory() = std::move(ma); - return repeat_until_value([&, s] () -> future>> { - // We don't want to query a sparsely populated table sequentially, because the latency - // will go through the roof. We don't want to query a densely populated table in parallel, - // because we'll throw away most of the results. So we'll exponentially increase - // concurrency starting at 1, so we won't waste on dense tables and at most - // `log(nr_shards) + ignore_msb_bits` latency multiplier for near-empty tables. - // - // We use the ring_position_exponential_vector_sharder to give us subranges that follow - // this scheme. - shards_for_this_iteration.clear(); - // If we're reading from less than smp::count shards, then we can just append - // each shard in order without sorting. If we're reading from more, then - // we'll read from some shards at least twice, so the partitions within will be - // out-of-order wrt. other shards - auto this_iteration_subranges = rpevs.next(*s); - auto retain_shard_order = true; - no_more_ranges = true; - if (this_iteration_subranges) { - no_more_ranges = false; - retain_shard_order = this_iteration_subranges->inorder; - auto sort_key = 0u; - for (auto&& now : this_iteration_subranges->per_shard_ranges) { - shards_for_this_iteration.emplace(element_and_shard{this_iteration_subranges->element, now.shard}, partition_range_and_sort_key{now.ring_range, sort_key++}); - } - } - - auto key_base = mutation_result_merger_key; - - // prepare for next iteration - // Each iteration uses a merger key that is either i in the loop above (so in the range [0, shards_in_parallel), - // or, the element index in prs (so in the range [0, partition_range_count). Make room for sufficient keys. - mutation_result_merger_key += std::max(smp::count, partition_range_count); - - shard_cmd->partition_limit = cmd->partition_limit - mrm.partition_count(); - shard_cmd->row_limit = cmd->row_limit - mrm.row_count(); - - return parallel_for_each(shards_for_this_iteration, [&, key_base, retain_shard_order] (const std::pair& elem_shard_range) { - auto&& elem = elem_shard_range.first.element; - auto&& shard = elem_shard_range.first.shard; - auto&& range = elem_shard_range.second.pr; - auto sort_key_shard_order = elem_shard_range.second.sort_key_shard_order; - return _db.invoke_on(shard, [&, range, gt, fstate = mrm.memory().state_for_another_shard()] (database& db) { - query::result_memory_accounter accounter(db.get_result_memory_limiter(), std::move(fstate)); - return db.query_mutations(gs, *shard_cmd, range, std::move(accounter), std::move(gt)).then([&hit_rate] (reconcilable_result&& rr, cache_temperature ht) { - hit_rate = ht; - return make_foreign(make_lw_shared(std::move(rr))); - }); - }).then([&, key_base, retain_shard_order, elem, sort_key_shard_order] (foreign_ptr> partial_result) { - // Each outer (sequential) iteration is in result order, so we pick increasing keys. - // Within the inner (parallel) iteration, the results can be in order (if retain_shard_order), or not (if !retain_shard_order). - // If the results are unordered, we still have to order them according to which element of prs they originated from. - auto key = key_base; // for outer loop - if (retain_shard_order) { - key += sort_key_shard_order; // inner loop is ordered - } else { - key += elem; // inner loop ordered only by position within prs - } - mrm.add_result(key, std::move(partial_result)); - }); - }).then([&] () -> stdx::optional> { - if (mrm.short_read() || mrm.partition_count() >= cmd->partition_limit || mrm.row_count() >= cmd->row_limit || no_more_ranges) { - return stdx::make_optional(std::make_pair(std::move(mrm).get(), hit_rate)); - } - return stdx::nullopt; - }); - }); - }); - }).then([] (std::pair&& result) { - return make_ready_future>, cache_temperature>(make_foreign(make_lw_shared(std::move(result.first))), result.second); - }); -} - -future<> -storage_proxy::stop() { - uninit_messaging_service(); - return make_ready_future<>(); -} - -} diff --git a/scylla/service/storage_proxy.hh b/scylla/service/storage_proxy.hh deleted file mode 100644 index 98420ff..0000000 --- a/scylla/service/storage_proxy.hh +++ /dev/null @@ -1,424 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "database.hh" -#include "query-request.hh" -#include "query-result.hh" -#include "query-result-set.hh" -#include "core/distributed.hh" -#include "db/consistency_level.hh" -#include "db/write_type.hh" -#include "utils/histogram.hh" -#include "utils/estimated_histogram.hh" -#include "tracing/trace_state.hh" -#include -#include "frozen_mutation.hh" - -namespace compat { - -class one_or_two_partition_ranges; - -} - -namespace service { - -class abstract_write_response_handler; -class abstract_read_executor; -class mutation_holder; - -class storage_proxy : public seastar::async_sharded_service /*implements StorageProxyMBean*/ { -public: - using clock_type = lowres_clock; -private: - struct rh_entry { - ::shared_ptr handler; - timer expire_timer; - rh_entry(::shared_ptr&& h, std::function&& cb); - }; - - using response_id_type = uint64_t; - struct unique_response_handler { - response_id_type id; - storage_proxy& p; - unique_response_handler(storage_proxy& p_, response_id_type id_); - unique_response_handler(const unique_response_handler&) = delete; - unique_response_handler& operator=(const unique_response_handler&) = delete; - unique_response_handler(unique_response_handler&& x); - ~unique_response_handler(); - response_id_type release(); - }; - - static const sstring COORDINATOR_STATS_CATEGORY; - static const sstring REPLICA_STATS_CATEGORY; - -public: - // split statistics counters - struct split_stats { - static seastar::metrics::label datacenter_label; - static seastar::metrics::label op_type_label; - private: - struct stats_counter { - uint64_t val = 0; - }; - - // counter of operations performed on a local Node - stats_counter _local; - // counters of operations performed on external Nodes aggregated per Nodes' DCs - std::unordered_map _dc_stats; - // collectd registrations container - seastar::metrics::metric_groups _metrics; - // a prefix string that will be used for a collecd counters' description - sstring _short_description_prefix; - sstring _long_description_prefix; - // a statistics category, e.g. "client" or "replica" - sstring _category; - // type of operation (data/digest/mutation_data) - sstring _op_type; - - public: - /** - * @param category a statistics category, e.g. "client" or "replica" - * @param short_description_prefix a short description prefix - * @param long_description_prefix a long description prefix - */ - split_stats(const sstring& category, const sstring& short_description_prefix, const sstring& long_description_prefix, const sstring& op_type); - - /** - * Get a reference to the statistics counter corresponding to the given - * destination. - * - * @param ep address of a destination - * - * @return a reference to the requested counter - */ - uint64_t& get_ep_stat(gms::inet_address ep); - }; - - struct stats { - utils::timed_rate_moving_average read_timeouts; - utils::timed_rate_moving_average read_unavailables; - utils::timed_rate_moving_average range_slice_timeouts; - utils::timed_rate_moving_average range_slice_unavailables; - utils::timed_rate_moving_average write_timeouts; - utils::timed_rate_moving_average write_unavailables; - - // total write attempts - split_stats writes_attempts; - split_stats writes_errors; - - // write attempts due to Read Repair logic - split_stats read_repair_write_attempts; - - uint64_t read_repair_attempts = 0; - uint64_t read_repair_repaired_blocking = 0; - uint64_t read_repair_repaired_background = 0; - uint64_t global_read_repairs_canceled_due_to_concurrent_write = 0; - - // number of mutations received as a coordinator - uint64_t received_mutations = 0; - - // number of counter updates received as a leader - uint64_t received_counter_updates = 0; - - // number of forwarded mutations - uint64_t forwarded_mutations = 0; - uint64_t forwarding_errors = 0; - - // number of read requests received as a replica - uint64_t replica_data_reads = 0; - uint64_t replica_digest_reads = 0; - uint64_t replica_mutation_data_reads = 0; - - utils::timed_rate_moving_average_and_histogram read; - utils::timed_rate_moving_average_and_histogram write; - utils::timed_rate_moving_average_and_histogram range; - utils::estimated_histogram estimated_read; - utils::estimated_histogram estimated_write; - utils::estimated_histogram estimated_range; - uint64_t writes = 0; - uint64_t background_writes = 0; // client no longer waits for the write - uint64_t background_write_bytes = 0; - uint64_t queued_write_bytes = 0; - uint64_t reads = 0; - uint64_t background_reads = 0; // client no longer waits for the read - uint64_t read_retries = 0; // read is retried with new limit - uint64_t throttled_writes = 0; // total number of writes ever delayed due to throttling - - // Data read attempts - split_stats data_read_attempts; - split_stats data_read_completed; - split_stats data_read_errors; - - // Digest read attempts - split_stats digest_read_attempts; - split_stats digest_read_completed; - split_stats digest_read_errors; - - // Mutation data read attempts - split_stats mutation_data_read_attempts; - split_stats mutation_data_read_completed; - split_stats mutation_data_read_errors; - - public: - stats(); - }; -private: - distributed& _db; - response_id_type _next_response_id = 1; // 0 is reserved for unique_response_handler - std::unordered_map _response_handlers; - // This buffer hold ids of throttled writes in case resource consumption goes - // below the threshold and we want to unthrottle some of them. Without this throttled - // request with dead or slow replica may wait for up to timeout ms before replying - // even if resource consumption will go to zero. Note that some requests here may - // be already completed by the point they tried to be unthrottled (request completion does - // not remove request from the buffer), but this is fine since request ids are unique, so we - // just skip an entry if request no longer exists. - circular_buffer _throttled_writes; - constexpr static size_t _max_hints_in_progress = 128; // origin multiplies by FBUtilities.getAvailableProcessors() but we already sharded - size_t _total_hints_in_progress = 0; - std::unordered_map _hints_in_progress; - stats _stats; - static constexpr float CONCURRENT_SUBREQUESTS_MARGIN = 0.10; - // for read repair chance calculation - std::default_random_engine _urandom; - std::uniform_real_distribution<> _read_repair_chance = std::uniform_real_distribution<>(0,1); - seastar::metrics::metric_groups _metrics; -private: - void uninit_messaging_service(); - future>> query_singular(lw_shared_ptr cmd, dht::partition_range_vector&& partition_ranges, db::consistency_level cl, tracing::trace_state_ptr trace_state); - response_id_type register_response_handler(shared_ptr&& h); - void remove_response_handler(response_id_type id); - void got_response(response_id_type id, gms::inet_address from); - future<> response_wait(response_id_type id, clock_type::time_point timeout); - ::shared_ptr& get_write_response_handler(storage_proxy::response_id_type id); - response_id_type create_write_response_handler(keyspace& ks, db::consistency_level cl, db::write_type type, std::unique_ptr m, std::unordered_set targets, - const std::vector& pending_endpoints, std::vector, tracing::trace_state_ptr tr_state); - response_id_type create_write_response_handler(const mutation&, db::consistency_level cl, db::write_type type, tracing::trace_state_ptr tr_state); - response_id_type create_write_response_handler(const std::unordered_map>&, db::consistency_level cl, db::write_type type, tracing::trace_state_ptr tr_state); - void send_to_live_endpoints(response_id_type response_id, clock_type::time_point timeout); - template - size_t hint_to_dead_endpoints(std::unique_ptr& mh, const Range& targets) noexcept; - void hint_to_dead_endpoints(response_id_type, db::consistency_level); - bool cannot_hint(gms::inet_address target); - size_t get_hints_in_progress_for(gms::inet_address target); - bool should_hint(gms::inet_address ep) noexcept; - bool submit_hint(std::unique_ptr& mh, gms::inet_address target); - std::vector get_live_endpoints(keyspace& ks, const dht::token& token); - std::vector get_live_sorted_endpoints(keyspace& ks, const dht::token& token); - db::read_repair_decision new_read_repair_decision(const schema& s); - ::shared_ptr get_read_executor(lw_shared_ptr cmd, dht::partition_range pr, db::consistency_level cl, tracing::trace_state_ptr trace_state); - future>, cache_temperature> query_singular_local(schema_ptr, lw_shared_ptr cmd, const dht::partition_range& pr, - query::result_request request, - tracing::trace_state_ptr trace_state, - uint64_t max_size = query::result_memory_limiter::maximum_result_size); - future query_singular_local_digest(schema_ptr, lw_shared_ptr cmd, const dht::partition_range& pr, tracing::trace_state_ptr trace_state, - uint64_t max_size = query::result_memory_limiter::maximum_result_size); - future>> query_partition_key_range(lw_shared_ptr cmd, dht::partition_range_vector partition_ranges, db::consistency_level cl, tracing::trace_state_ptr trace_state); - dht::partition_range_vector get_restricted_ranges(keyspace& ks, const schema& s, dht::partition_range range); - float estimate_result_rows_per_range(lw_shared_ptr cmd, keyspace& ks); - static std::vector intersection(const std::vector& l1, const std::vector& l2); - future>>> query_partition_key_range_concurrent(clock_type::time_point timeout, - std::vector>>&& results, lw_shared_ptr cmd, db::consistency_level cl, dht::partition_range_vector::iterator&& i, - dht::partition_range_vector&& ranges, int concurrency_factor, tracing::trace_state_ptr trace_state, - uint32_t remaining_row_count, uint32_t remaining_partition_count); - - future>> do_query(schema_ptr, - lw_shared_ptr cmd, - dht::partition_range_vector&& partition_ranges, - db::consistency_level cl, tracing::trace_state_ptr trace_state); - template - future> mutate_prepare(const Range& mutations, db::consistency_level cl, db::write_type type, CreateWriteHandler handler); - template - future> mutate_prepare(const Range& mutations, db::consistency_level cl, db::write_type type, tracing::trace_state_ptr tr_state); - future<> mutate_begin(std::vector ids, db::consistency_level cl, stdx::optional timeout_opt = { }); - future<> mutate_end(future<> mutate_result, utils::latency_counter, tracing::trace_state_ptr trace_state); - future<> schedule_repair(std::unordered_map>> diffs, db::consistency_level cl, tracing::trace_state_ptr trace_state); - bool need_throttle_writes() const; - void unthrottle(); - void handle_read_error(std::exception_ptr eptr, bool range); - template - future<> mutate_internal(Range mutations, db::consistency_level cl, bool counter_write, tracing::trace_state_ptr tr_state, stdx::optional timeout_opt = { }); - future>, cache_temperature> query_nonsingular_mutations_locally( - schema_ptr s, lw_shared_ptr cmd, const dht::partition_range_vector& pr, tracing::trace_state_ptr trace_state, uint64_t max_size); - - struct frozen_mutation_and_schema { - frozen_mutation fm; - schema_ptr s; - }; - future<> mutate_counters_on_leader(std::vector mutations, db::consistency_level cl, clock_type::time_point timeout, - tracing::trace_state_ptr trace_state); - future<> mutate_counter_on_leader_and_replicate(const schema_ptr& s, frozen_mutation m, db::consistency_level cl, clock_type::time_point timeout, - tracing::trace_state_ptr trace_state); - - gms::inet_address find_leader_for_counter_update(const mutation& m, db::consistency_level cl); - - future<> do_mutate(std::vector mutations, db::consistency_level cl, tracing::trace_state_ptr tr_state, bool); - friend class mutate_executor; -public: - storage_proxy(distributed& db); - ~storage_proxy(); - distributed& get_db() { - return _db; - } - - void init_messaging_service(); - - // Applies mutation on this node. - // Resolves with timed_out_error when timeout is reached. - future<> mutate_locally(const mutation& m, clock_type::time_point timeout = clock_type::time_point::max()); - // Applies mutation on this node. - // Resolves with timed_out_error when timeout is reached. - future<> mutate_locally(const schema_ptr&, const frozen_mutation& m, clock_type::time_point timeout = clock_type::time_point::max()); - // Applies mutations on this node. - // Resolves with timed_out_error when timeout is reached. - future<> mutate_locally(std::vector mutation, clock_type::time_point timeout = clock_type::time_point::max()); - - future<> mutate_streaming_mutation(const schema_ptr&, utils::UUID plan_id, const frozen_mutation& m, bool fragmented); - - /** - * Use this method to have these Mutations applied - * across all replicas. This method will take care - * of the possibility of a replica being down and hint - * the data across to some other replica. - * - * @param mutations the mutations to be applied across the replicas - * @param consistency_level the consistency level for the operation - * @param tr_state trace state handle - */ - future<> mutate(std::vector mutations, db::consistency_level cl, tracing::trace_state_ptr tr_state, bool raw_counters = false); - - future<> replicate_counter_from_leader(mutation m, db::consistency_level cl, tracing::trace_state_ptr tr_state, - clock_type::time_point timeout); - - template - future<> mutate_counters(Range&& mutations, db::consistency_level cl, tracing::trace_state_ptr tr_state); - - future<> mutate_with_triggers(std::vector mutations, db::consistency_level cl, - bool should_mutate_atomically, tracing::trace_state_ptr tr_state, bool raw_counters = false); - - /** - * See mutate. Adds additional steps before and after writing a batch. - * Before writing the batch (but after doing availability check against the FD for the row replicas): - * write the entire batch to a batchlog elsewhere in the cluster. - * After: remove the batchlog entry (after writing hints for the batch rows, if necessary). - * - * @param mutations the Mutations to be applied across the replicas - * @param consistency_level the consistency level for the operation - * @param tr_state trace state handle - */ - future<> mutate_atomically(std::vector mutations, db::consistency_level cl, tracing::trace_state_ptr tr_state); - - // Send a mutation to one specific remote target. - // Inspired by Cassandra's StorageProxy.sendToHintedEndpoints but without - // hinted handoff support, and just one target. See also - // send_to_live_endpoints() - another take on the same original function. - future<> send_to_endpoint(mutation m, gms::inet_address target, db::write_type type); - - /** - * Performs the truncate operatoin, which effectively deletes all data from - * the column family cfname - * @param keyspace - * @param cfname - */ - future<> truncate_blocking(sstring keyspace, sstring cfname); - - /* - * Executes data query on the whole cluster. - * - * Partitions for each range will be ordered according to decorated_key ordering. Results for - * each range from "partition_ranges" may appear in any order. - * - * IMPORTANT: Not all fibers started by this method have to be done by the time it returns so no - * parameter can be changed after being passed to this method. - */ - future>> query(schema_ptr, - lw_shared_ptr cmd, - dht::partition_range_vector&& partition_ranges, - db::consistency_level cl, - tracing::trace_state_ptr trace_state); - - future>, cache_temperature> query_mutations_locally( - schema_ptr, lw_shared_ptr cmd, const dht::partition_range&, - tracing::trace_state_ptr trace_state = nullptr, - uint64_t max_size = query::result_memory_limiter::maximum_result_size); - - - future>, cache_temperature> query_mutations_locally( - schema_ptr, lw_shared_ptr cmd, const compat::one_or_two_partition_ranges&, - tracing::trace_state_ptr trace_state = nullptr, - uint64_t max_size = query::result_memory_limiter::maximum_result_size); - - future>, cache_temperature> query_mutations_locally( - schema_ptr s, lw_shared_ptr cmd, const dht::partition_range_vector& pr, - tracing::trace_state_ptr trace_state = nullptr, - uint64_t max_size = query::result_memory_limiter::maximum_result_size); - - - future<> stop(); - - const stats& get_stats() const { - return _stats; - } - - friend class abstract_read_executor; - friend class abstract_write_response_handler; -}; - -extern distributed _the_storage_proxy; - -inline distributed& get_storage_proxy() { - return _the_storage_proxy; -} - -inline storage_proxy& get_local_storage_proxy() { - return _the_storage_proxy.local(); -} - -inline shared_ptr get_local_shared_storage_proxy() { - return _the_storage_proxy.local_shared(); -} - -dht::partition_range_vector get_restricted_ranges(locator::token_metadata&, - const schema&, dht::partition_range); - -} diff --git a/scylla/service/storage_service.cc b/scylla/service/storage_service.cc deleted file mode 100644 index 3da8c57..0000000 --- a/scylla/service/storage_service.cc +++ /dev/null @@ -1,3557 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - * - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "storage_service.hh" -#include "core/distributed.hh" -#include "locator/snitch_base.hh" -#include "db/system_keyspace.hh" -#include "utils/UUID.hh" -#include "gms/inet_address.hh" -#include "log.hh" -#include "service/migration_manager.hh" -#include "to_string.hh" -#include "gms/gossiper.hh" -#include "gms/failure_detector.hh" -#include -#include -#include -#include "locator/local_strategy.hh" -#include "version.hh" -#include "unimplemented.hh" -#include "streaming/stream_plan.hh" -#include "streaming/stream_state.hh" -#include "dht/range_streamer.hh" -#include -#include -#include "service/load_broadcaster.hh" -#include "thrift/server.hh" -#include "transport/server.hh" -#include "redis_server.hh" -#include -#include "db/batchlog_manager.hh" -#include "db/commitlog/commitlog.hh" -#include "auth/auth.hh" -#include -#include -#include "utils/exceptions.hh" -#include "message/messaging_service.hh" -#include "supervisor.hh" - -using token = dht::token; -using UUID = utils::UUID; -using inet_address = gms::inet_address; - -namespace service { - -static logging::logger slogger("storage_service"); - -static const sstring RANGE_TOMBSTONES_FEATURE = "RANGE_TOMBSTONES"; -static const sstring LARGE_PARTITIONS_FEATURE = "LARGE_PARTITIONS"; -static const sstring MATERIALIZED_VIEWS_FEATURE = "MATERIALIZED_VIEWS"; -static const sstring COUNTERS_FEATURE = "COUNTERS"; -static const sstring INDEXES_FEATURE = "INDEXES"; - -distributed _the_storage_service; - -int get_generation_number() { - using namespace std::chrono; - auto now = high_resolution_clock::now().time_since_epoch(); - int generation_number = duration_cast(now).count(); - return generation_number; -} - -storage_service::storage_service(distributed& db) - : _db(db) { - sstable_read_error.connect([this] { isolate_on_error(); }); - sstable_write_error.connect([this] { isolate_on_error(); }); - general_disk_error.connect([this] { isolate_on_error(); }); - commit_error.connect([this] { isolate_on_commit_error(); }); -} - -void -storage_service::isolate_on_error() { - do_isolate_on_error(disk_error::regular); -} - -void -storage_service::isolate_on_commit_error() { - do_isolate_on_error(disk_error::commit); -} - -bool storage_service::is_auto_bootstrap() { - return _db.local().get_config().auto_bootstrap(); -} - -sstring storage_service::get_config_supported_features() { - // Add features supported by this local node. When a new feature is - // introduced in scylla, update it here, e.g., - // return sstring("FEATURE1,FEATURE2") - std::vector features = { - RANGE_TOMBSTONES_FEATURE, - LARGE_PARTITIONS_FEATURE, - COUNTERS_FEATURE, - }; - if (service::get_local_storage_service()._db.local().get_config().experimental()) { - features.push_back(MATERIALIZED_VIEWS_FEATURE); - features.push_back(INDEXES_FEATURE); - } - return join(",", features); -} - -std::set get_seeds() { - // FIXME: DatabaseDescriptor.getSeeds() - auto& gossiper = gms::get_local_gossiper(); - return gossiper.get_seeds(); -} - -std::unordered_set get_replace_tokens() { - std::unordered_set ret; - std::unordered_set tokens; - auto tokens_string = get_local_storage_service().db().local().get_config().replace_token(); - try { - boost::split(tokens, tokens_string, boost::is_any_of(sstring(","))); - } catch (...) { - throw std::runtime_error(sprint("Unable to parse replace_token=%s", tokens_string)); - } - tokens.erase(""); - for (auto token_string : tokens) { - auto token = dht::global_partitioner().from_sstring(token_string); - ret.insert(token); - } - return ret; -} - -std::experimental::optional get_replace_node() { - auto replace_node = get_local_storage_service().db().local().get_config().replace_node(); - if (replace_node.empty()) { - return std::experimental::nullopt; - } - try { - return utils::UUID(replace_node); - } catch (...) { - auto msg = sprint("Unable to parse %s as host-id", replace_node); - slogger.error("{}", msg); - throw std::runtime_error(msg); - } -} - -bool get_property_join_ring() { - return get_local_storage_service().db().local().get_config().join_ring(); -} - -bool get_property_rangemovement() { - return get_local_storage_service().db().local().get_config().consistent_rangemovement(); -} - -bool get_property_load_ring_state() { - return get_local_storage_service().db().local().get_config().load_ring_state(); -} - -bool storage_service::should_bootstrap() { - return is_auto_bootstrap() && !db::system_keyspace::bootstrap_complete() && !get_seeds().count(get_broadcast_address()); -} - -// Runs inside seastar::async context -void storage_service::prepare_to_join(std::vector loaded_endpoints) { - if (_joined) { - return; - } - - std::map app_states; - if (db::system_keyspace::was_decommissioned()) { - if (db().local().get_config().override_decommission()) { - slogger.warn("This node was decommissioned, but overriding by operator request."); - db::system_keyspace::set_bootstrap_state(db::system_keyspace::bootstrap_state::COMPLETED).get(); - } else { - auto msg = sstring("This node was decommissioned and will not rejoin the ring unless override_decommission=true has been set," - "or all existing data is removed and the node is bootstrapped again"); - slogger.error(msg.c_str()); - throw std::runtime_error(msg.c_str()); - } - } - if (db().local().is_replacing() && !get_property_join_ring()) { - throw std::runtime_error("Cannot set both join_ring=false and attempt to replace a node"); - } - if (get_replace_tokens().size() > 0 || get_replace_node()) { - throw std::runtime_error("Replace method removed; use replace_address instead"); - } - if (db().local().is_replacing()) { - if (db::system_keyspace::bootstrap_complete()) { - throw std::runtime_error("Cannot replace address with a node that is already bootstrapped"); - } - if (!is_auto_bootstrap()) { - throw std::runtime_error("Trying to replace_address with auto_bootstrap disabled will not work, check your configuration"); - } - _bootstrap_tokens = prepare_replacement_info().get0(); - app_states.emplace(gms::application_state::TOKENS, value_factory.tokens(_bootstrap_tokens)); - app_states.emplace(gms::application_state::STATUS, value_factory.hibernate(true)); - } else if (should_bootstrap()) { - check_for_endpoint_collision().get(); - } else { - auto& gossiper = gms::get_local_gossiper(); - auto seeds = gms::get_local_gossiper().get_seeds(); - auto my_ep = get_broadcast_address(); - auto peer_features = db::system_keyspace::load_peer_features().get0(); - slogger.info("load_peer_features: peer_features size={}", peer_features.size()); - for (auto& x : peer_features) { - slogger.info("load_peer_features: peer={}, supported_features={}", x.first, x.second); - } - auto local_features = get_config_supported_features(); - - if (seeds.count(my_ep)) { - // This node is a seed node - if (peer_features.empty()) { - // This is a competely new seed node, skip the check - slogger.info("Checking remote features skipped, since this node is a new seed node which knows nothing about the cluster"); - } else { - // This is a existing seed node - if (seeds.size() == 1) { - // This node is the only seed node, check features with system table - slogger.info("Checking remote features with system table, since this node is the only seed node"); - gossiper.check_knows_remote_features(local_features, peer_features); - } else { - // More than one seed node in the seed list, do shadow round with other seed nodes - bool ok; - try { - slogger.info("Checking remote features with gossip"); - gossiper.do_shadow_round().get(); - ok = true; - } catch (...) { - gossiper.finish_shadow_round(); - ok = false; - } - - if (ok) { - gossiper.check_knows_remote_features(local_features); - gossiper.reset_endpoint_state_map(); - for (auto ep : loaded_endpoints) { - gossiper.add_saved_endpoint(ep); - } - } else { - // Check features with system table - slogger.info("Checking remote features with gossip failed, fallback to check with system table"); - gossiper.check_knows_remote_features(local_features, peer_features); - } - } - } - } else { - // This node is a non-seed node - // Do shadow round to check if this node knows all the features - // advertised by all other nodes, otherwise this node is too old - // (missing features) to join the cluser. - slogger.info("Checking remote features with gossip"); - gossiper.do_shadow_round().get(); - gossiper.check_knows_remote_features(local_features); - gossiper.reset_endpoint_state_map(); - for (auto ep : loaded_endpoints) { - gossiper.add_saved_endpoint(ep); - } - } - } - - // have to start the gossip service before we can see any info on other nodes. this is necessary - // for bootstrap to get the load info it needs. - // (we won't be part of the storage ring though until we add a counterId to our state, below.) - // Seed the host ID-to-endpoint map with our own ID. - auto local_host_id = db::system_keyspace::get_local_host_id().get0(); - get_storage_service().invoke_on_all([local_host_id] (auto& ss) { - ss._local_host_id = local_host_id; - }).get(); - auto features = get_config_supported_features(); - _token_metadata.update_host_id(local_host_id, get_broadcast_address()); - auto broadcast_rpc_address = utils::fb_utilities::get_broadcast_rpc_address(); - app_states.emplace(gms::application_state::NET_VERSION, value_factory.network_version()); - app_states.emplace(gms::application_state::HOST_ID, value_factory.host_id(local_host_id)); - app_states.emplace(gms::application_state::RPC_ADDRESS, value_factory.rpcaddress(broadcast_rpc_address)); - app_states.emplace(gms::application_state::RELEASE_VERSION, value_factory.release_version()); - app_states.emplace(gms::application_state::SUPPORTED_FEATURES, value_factory.supported_features(features)); - app_states.emplace(gms::application_state::CACHE_HITRATES, value_factory.cache_hitrates("")); - app_states.emplace(gms::application_state::SCHEMA_TABLES_VERSION, versioned_value(db::schema_tables::version)); - slogger.info("Starting up server gossip"); - - auto& gossiper = gms::get_local_gossiper(); - gossiper.register_(this->shared_from_this()); - auto generation_number = db::system_keyspace::increment_and_get_generation().get0(); - gossiper.start_gossiping(generation_number, app_states).get(); - - // gossip snitch infos (local DC and rack) - gossip_snitch_info().get(); - - auto& proxy = service::get_storage_proxy(); - // gossip Schema.emptyVersion forcing immediate check for schema updates (see MigrationManager#maybeScheduleSchemaPull) - update_schema_version_and_announce(proxy).get();// Ensure we know our own actual Schema UUID in preparation for updates -#if 0 - if (!MessagingService.instance().isListening()) - MessagingService.instance().listen(FBUtilities.getLocalAddress()); - LoadBroadcaster.instance.startBroadcasting(); - - HintedHandOffManager.instance.start(); - BatchlogManager.instance.start(); -#endif -} - -// Runs inside seastar::async context -void storage_service::join_token_ring(int delay) { - // This function only gets called on shard 0, but we want to set _joined - // on all shards, so this variable can be later read locally. - get_storage_service().invoke_on_all([] (auto&& ss) { - ss._joined = true; - }).get(); - // We bootstrap if we haven't successfully bootstrapped before, as long as we are not a seed. - // If we are a seed, or if the user manually sets auto_bootstrap to false, - // we'll skip streaming data from other nodes and jump directly into the ring. - // - // The seed check allows us to skip the RING_DELAY sleep for the single-node cluster case, - // which is useful for both new users and testing. - // - // We attempted to replace this with a schema-presence check, but you need a meaningful sleep - // to get schema info from gossip which defeats the purpose. See CASSANDRA-4427 for the gory details. - std::unordered_set current; - slogger.debug("Bootstrap variables: {} {} {} {}", - is_auto_bootstrap(), - db::system_keyspace::bootstrap_in_progress(), - db::system_keyspace::bootstrap_complete(), - get_seeds().count(get_broadcast_address())); - if (is_auto_bootstrap() && !db::system_keyspace::bootstrap_complete() && get_seeds().count(get_broadcast_address())) { - slogger.info("This node will not auto bootstrap because it is configured to be a seed node."); - } - if (should_bootstrap()) { - if (db::system_keyspace::bootstrap_in_progress()) { - slogger.warn("Detected previous bootstrap failure; retrying"); - } else { - db::system_keyspace::set_bootstrap_state(db::system_keyspace::bootstrap_state::IN_PROGRESS).get(); - } - set_mode(mode::JOINING, "waiting for ring information", true); - // first sleep the delay to make sure we see all our peers - for (int i = 0; i < delay; i += 1000) { - // if we see schema, we can proceed to the next check directly - if (_db.local().get_version() != database::empty_version) { - slogger.debug("got schema: {}", _db.local().get_version()); - break; - } - sleep(std::chrono::seconds(1)).get(); - } - // if our schema hasn't matched yet, keep sleeping until it does - // (post CASSANDRA-1391 we don't expect this to be necessary very often, but it doesn't hurt to be careful) - while (!get_local_migration_manager().is_ready_for_bootstrap()) { - set_mode(mode::JOINING, "waiting for schema information to complete", true); - sleep(std::chrono::seconds(1)).get(); - } - set_mode(mode::JOINING, "schema complete, ready to bootstrap", true); - set_mode(mode::JOINING, "waiting for pending range calculation", true); - update_pending_ranges().get(); - set_mode(mode::JOINING, "calculation complete, ready to bootstrap", true); - slogger.debug("... got ring + schema info"); - - auto t = gms::gossiper::clk::now(); - while (get_property_rangemovement() && - (!_token_metadata.get_bootstrap_tokens().empty() || - !_token_metadata.get_leaving_endpoints().empty() || - !_token_metadata.get_moving_endpoints().empty())) { - auto elapsed = std::chrono::duration_cast(gms::gossiper::clk::now() - t).count(); - slogger.info("Checking bootstrapping/leaving/moving nodes: tokens {}, leaving {}, moving {}, sleep 1 second and check again ({} seconds elapsed)", - _token_metadata.get_bootstrap_tokens().size(), - _token_metadata.get_leaving_endpoints().size(), - _token_metadata.get_moving_endpoints().size(), - elapsed); - - sleep(std::chrono::seconds(1)).get(); - - if (gms::gossiper::clk::now() > t + std::chrono::seconds(60)) { - throw std::runtime_error("Other bootstrapping/leaving/moving nodes detected, cannot bootstrap while consistent_rangemovement is true"); - } - - // Check the schema and pending range again - while (!get_local_migration_manager().is_ready_for_bootstrap()) { - set_mode(mode::JOINING, "waiting for schema information to complete", true); - sleep(std::chrono::seconds(1)).get(); - } - update_pending_ranges().get(); - } - slogger.info("Checking bootstrapping/leaving/moving nodes: ok"); - - if (!db().local().is_replacing()) { - if (_token_metadata.is_member(get_broadcast_address())) { - throw std::runtime_error("This node is already a member of the token ring; bootstrap aborted. (If replacing a dead node, remove the old one from the ring first.)"); - } - set_mode(mode::JOINING, "getting bootstrap token", true); - _bootstrap_tokens = boot_strapper::get_bootstrap_tokens(_token_metadata, _db.local()); - } else { - auto replace_addr = db().local().get_replace_address(); - if (replace_addr && *replace_addr != get_broadcast_address()) { - // Sleep additionally to make sure that the server actually is not alive - // and giving it more time to gossip if alive. - sleep(service::load_broadcaster::BROADCAST_INTERVAL).get(); - - // check for operator errors... - for (auto token : _bootstrap_tokens) { - auto existing = _token_metadata.get_endpoint(token); - if (existing) { - auto& gossiper = gms::get_local_gossiper(); - auto eps = gossiper.get_endpoint_state_for_endpoint(*existing); - if (eps && eps->get_update_timestamp() > gms::gossiper::clk::now() - std::chrono::milliseconds(delay)) { - throw std::runtime_error("Cannot replace a live node..."); - } - current.insert(*existing); - } else { - throw std::runtime_error(sprint("Cannot replace token %s which does not exist!", token)); - } - } - } else { - sleep(get_ring_delay()).get(); - } - std::stringstream ss; - ss << _bootstrap_tokens; - set_mode(mode::JOINING, sprint("Replacing a node with token(s): %s", ss.str()), true); - } - bootstrap(_bootstrap_tokens); - // bootstrap will block until finished - if (_is_bootstrap_mode) { - auto err = sprint("We are not supposed in bootstrap mode any more"); - slogger.warn(err.c_str()); - throw std::runtime_error(err); - } - } else { - size_t num_tokens = _db.local().get_config().num_tokens(); - _bootstrap_tokens = db::system_keyspace::get_saved_tokens().get0(); - if (_bootstrap_tokens.empty()) { - auto initial_tokens = _db.local().get_initial_tokens(); - if (initial_tokens.size() < 1) { - _bootstrap_tokens = boot_strapper::get_random_tokens(_token_metadata, num_tokens); - if (num_tokens == 1) { - slogger.warn("Generated random token {}. Random tokens will result in an unbalanced ring; see http://wiki.apache.org/cassandra/Operations", _bootstrap_tokens); - } else { - slogger.info("Generated random tokens. tokens are {}", _bootstrap_tokens); - } - } else { - for (auto token_string : initial_tokens) { - auto token = dht::global_partitioner().from_sstring(token_string); - _bootstrap_tokens.insert(token); - } - slogger.info("Saved tokens not found. Using configuration value: {}", _bootstrap_tokens); - } - } else { - if (_bootstrap_tokens.size() != num_tokens) { - throw std::runtime_error(sprint("Cannot change the number of tokens from %ld to %ld", _bootstrap_tokens.size(), num_tokens)); - } else { - slogger.info("Using saved tokens {}", _bootstrap_tokens); - } - } - } -#if 0 - // if we don't have system_traces keyspace at this point, then create it manually - if (Schema.instance.getKSMetaData(TraceKeyspace.NAME) == null) - MigrationManager.announceNewKeyspace(TraceKeyspace.definition(), 0, false); -#endif - - if (!_is_survey_mode) { - // We have to create the system_auth and system_traces keyspaces and - // their tables before Node moves to the NORMAL state so that other - // Nodes joining the newly created cluster and serializing on this event - // "see" these new objects and don't try to create them. - // - // Otherwise there is a high chance to hit the issue #420. - auth::auth::setup().get(); - supervisor::notify("starting tracing"); - tracing::tracing::start_tracing().get(); - - // start participating in the ring. - db::system_keyspace::set_bootstrap_state(db::system_keyspace::bootstrap_state::COMPLETED).get(); - set_tokens(_bootstrap_tokens); - // remove the existing info about the replaced node. - if (!current.empty()) { - auto& gossiper = gms::get_local_gossiper(); - for (auto existing : current) { - gossiper.replaced_endpoint(existing); - } - } - if (_token_metadata.sorted_tokens().empty()) { - auto err = sprint("join_token_ring: Sorted token in token_metadata is empty"); - slogger.error(err.c_str()); - throw std::runtime_error(err); - } - } else { - slogger.info("Startup complete, but write survey mode is active, not becoming an active ring member. Use JMX (StorageService->joinRing()) to finalize ring joining."); - } -} - -future<> storage_service::join_ring() { - return run_with_api_lock(sstring("join_ring"), [] (storage_service& ss) { - return seastar::async([&ss] { - if (!ss._joined) { - slogger.info("Joining ring by operator request"); - ss.join_token_ring(0); - } else if (ss._is_survey_mode) { - auto tokens = db::system_keyspace::get_saved_tokens().get0(); - ss.set_tokens(std::move(tokens)); - db::system_keyspace::set_bootstrap_state(db::system_keyspace::bootstrap_state::COMPLETED).get(); - ss._is_survey_mode = false; - slogger.info("Leaving write survey mode and joining ring at operator request"); - if (ss._token_metadata.sorted_tokens().empty()) { - auto err = sprint("join_ring: Sorted token in token_metadata is empty"); - slogger.error(err.c_str()); - throw std::runtime_error(err); - } - auth::auth::setup().get(); - } - }); - }); -} - -bool storage_service::is_joined() { - // Every time we set _joined, we do it on all shards, so we can read its - // value locally. - return _joined && !_is_survey_mode; -} - -// Runs inside seastar::async context -void storage_service::bootstrap(std::unordered_set tokens) { - _is_bootstrap_mode = true; - // DON'T use set_token, that makes us part of the ring locally which is incorrect until we are done bootstrapping - db::system_keyspace::update_tokens(tokens).get(); - auto& gossiper = gms::get_local_gossiper(); - if (!db().local().is_replacing()) { - // if not an existing token then bootstrap - gossiper.add_local_application_state(gms::application_state::TOKENS, value_factory.tokens(tokens)).get(); - gossiper.add_local_application_state(gms::application_state::STATUS, value_factory.bootstrapping(tokens)).get(); - set_mode(mode::JOINING, sprint("sleeping %s ms for pending range setup", get_ring_delay().count()), true); - sleep(get_ring_delay()).get(); - } else { - // Dont set any state for the node which is bootstrapping the existing token... - _token_metadata.update_normal_tokens(tokens, get_broadcast_address()); - auto replace_addr = db().local().get_replace_address(); - if (replace_addr) { - slogger.debug("Removing replaced endpoint {} from system.peers", *replace_addr); - db::system_keyspace::remove_endpoint(*replace_addr).get(); - } - } - if (!gossiper.seen_any_seed()) { - throw std::runtime_error("Unable to contact any seeds!"); - } - set_mode(mode::JOINING, "Starting to bootstrap...", true); - dht::boot_strapper bs(_db, get_broadcast_address(), tokens, _token_metadata); - bs.bootstrap().get(); // handles token update - slogger.info("Bootstrap completed! for the tokens {}", tokens); -} - -sstring -storage_service::get_rpc_address(const inet_address& endpoint) const { - if (endpoint != get_broadcast_address()) { - auto v = gms::get_local_gossiper().get_endpoint_state_for_endpoint(endpoint)->get_application_state(gms::application_state::RPC_ADDRESS); - if (v) { - return v.value().value; - } - } - return boost::lexical_cast(endpoint); -} - -std::unordered_map> -storage_service::get_range_to_address_map(const sstring& keyspace) const { - return get_range_to_address_map(keyspace, _token_metadata.sorted_tokens()); -} - -std::unordered_map> -storage_service::get_range_to_address_map_in_local_dc( - const sstring& keyspace) const { - std::function filter = [this](const inet_address& address) { - return is_local_dc(address); - }; - - auto orig_map = get_range_to_address_map(keyspace, get_tokens_in_local_dc()); - std::unordered_map> filtered_map; - for (auto entry : orig_map) { - auto& addresses = filtered_map[entry.first]; - addresses.reserve(entry.second.size()); - std::copy_if(entry.second.begin(), entry.second.end(), std::back_inserter(addresses), filter); - } - - return filtered_map; -} - -std::vector -storage_service::get_tokens_in_local_dc() const { - std::vector filtered_tokens; - for (auto token : _token_metadata.sorted_tokens()) { - auto endpoint = _token_metadata.get_endpoint(token); - if (is_local_dc(*endpoint)) - filtered_tokens.push_back(token); - } - return filtered_tokens; -} - -bool -storage_service::is_local_dc(const inet_address& targetHost) const { - auto remote_dc = locator::i_endpoint_snitch::get_local_snitch_ptr()->get_datacenter(targetHost); - auto local_dc = locator::i_endpoint_snitch::get_local_snitch_ptr()->get_datacenter(get_broadcast_address()); - return remote_dc == local_dc; -} - -std::unordered_map> -storage_service::get_range_to_address_map(const sstring& keyspace, - const std::vector& sorted_tokens) const { - // some people just want to get a visual representation of things. Allow null and set it to the first - // non-system keyspace. - if (keyspace == "" && _db.local().get_non_system_keyspaces().empty()) { - throw std::runtime_error("No keyspace provided and no non system kespace exist"); - } - const sstring& ks = (keyspace == "") ? _db.local().get_non_system_keyspaces()[0] : keyspace; - return construct_range_to_endpoint_map(ks, get_all_ranges(sorted_tokens)); -} - -void storage_service::handle_state_bootstrap(inet_address endpoint) { - slogger.debug("endpoint={} handle_state_bootstrap", endpoint); - // explicitly check for TOKENS, because a bootstrapping node might be bootstrapping in legacy mode; that is, not using vnodes and no token specified - auto tokens = get_tokens_for(endpoint); - - slogger.debug("Node {} state bootstrapping, token {}", endpoint, tokens); - - // if this node is present in token metadata, either we have missed intermediate states - // or the node had crashed. Print warning if needed, clear obsolete stuff and - // continue. - if (_token_metadata.is_member(endpoint)) { - // If isLeaving is false, we have missed both LEAVING and LEFT. However, if - // isLeaving is true, we have only missed LEFT. Waiting time between completing - // leave operation and rebootstrapping is relatively short, so the latter is quite - // common (not enough time for gossip to spread). Therefore we report only the - // former in the log. - if (!_token_metadata.is_leaving(endpoint)) { - slogger.info("Node {} state jump to bootstrap", endpoint); - } - _token_metadata.remove_endpoint(endpoint); - } - - _token_metadata.add_bootstrap_tokens(tokens, endpoint); - update_pending_ranges().get(); - - auto& gossiper = gms::get_local_gossiper(); - if (gossiper.uses_host_id(endpoint)) { - _token_metadata.update_host_id(gossiper.get_host_id(endpoint), endpoint); - } -} - -void storage_service::handle_state_normal(inet_address endpoint) { - slogger.debug("endpoint={} handle_state_normal", endpoint); - auto tokens = get_tokens_for(endpoint); - auto& gossiper = gms::get_local_gossiper(); - - std::unordered_set tokens_to_update_in_metadata; - std::unordered_set tokens_to_update_in_system_keyspace; - std::unordered_set local_tokens_to_remove; - std::unordered_set endpoints_to_remove; - - slogger.debug("Node {} state normal, token {}", endpoint, tokens); - - if (_token_metadata.is_member(endpoint)) { - slogger.info("Node {} state jump to normal", endpoint); - } - update_peer_info(endpoint); - - // Order Matters, TM.updateHostID() should be called before TM.updateNormalToken(), (see CASSANDRA-4300). - if (gossiper.uses_host_id(endpoint)) { - auto host_id = gossiper.get_host_id(endpoint); - auto existing = _token_metadata.get_endpoint_for_host_id(host_id); - if (db().local().is_replacing() && - db().local().get_replace_address() && - gossiper.get_endpoint_state_for_endpoint(db().local().get_replace_address().value()) && - (host_id == gossiper.get_host_id(db().local().get_replace_address().value()))) { - slogger.warn("Not updating token metadata for {} because I am replacing it", endpoint); - } else { - if (existing && *existing != endpoint) { - if (*existing == get_broadcast_address()) { - slogger.warn("Not updating host ID {} for {} because it's mine", host_id, endpoint); - _token_metadata.remove_endpoint(endpoint); - endpoints_to_remove.insert(endpoint); - } else if (gossiper.compare_endpoint_startup(endpoint, *existing) > 0) { - slogger.warn("Host ID collision for {} between {} and {}; {} is the new owner", host_id, *existing, endpoint, endpoint); - _token_metadata.remove_endpoint(*existing); - endpoints_to_remove.insert(*existing); - _token_metadata.update_host_id(host_id, endpoint); - } else { - slogger.warn("Host ID collision for {} between {} and {}; ignored {}", host_id, *existing, endpoint, endpoint); - _token_metadata.remove_endpoint(endpoint); - endpoints_to_remove.insert(endpoint); - } - } else { - _token_metadata.update_host_id(host_id, endpoint); - } - } - } - - for (auto t : tokens) { - // we don't want to update if this node is responsible for the token and it has a later startup time than endpoint. - auto current_owner = _token_metadata.get_endpoint(t); - if (!current_owner) { - slogger.debug("handle_state_normal: New node {} at token {}", endpoint, t); - tokens_to_update_in_metadata.insert(t); - tokens_to_update_in_system_keyspace.insert(t); - } else if (endpoint == *current_owner) { - slogger.debug("handle_state_normal: endpoint={} == current_owner={} token {}", endpoint, *current_owner, t); - // set state back to normal, since the node may have tried to leave, but failed and is now back up - tokens_to_update_in_metadata.insert(t); - tokens_to_update_in_system_keyspace.insert(t); - } else if (gossiper.compare_endpoint_startup(endpoint, *current_owner) > 0) { - slogger.debug("handle_state_normal: endpoint={} > current_owner={}, token {}", endpoint, *current_owner, t); - tokens_to_update_in_metadata.insert(t); - tokens_to_update_in_system_keyspace.insert(t); - // currentOwner is no longer current, endpoint is. Keep track of these moves, because when - // a host no longer has any tokens, we'll want to remove it. - std::multimap ep_to_token_copy = get_token_metadata().get_endpoint_to_token_map_for_reading(); - auto rg = ep_to_token_copy.equal_range(*current_owner); - for (auto it = rg.first; it != rg.second; it++) { - if (it->second == t) { - slogger.info("handle_state_normal: remove endpoint={} token={}", *current_owner, t); - ep_to_token_copy.erase(it); - } - } - if (ep_to_token_copy.count(*current_owner) < 1) { - slogger.info("handle_state_normal: endpoints_to_remove endpoint={}", *current_owner); - endpoints_to_remove.insert(*current_owner); - } - slogger.info("handle_state_normal: Nodes {} and {} have the same token {}. {} is the new owner", endpoint, *current_owner, t, endpoint); - } else { - slogger.info("handle_state_normal: Nodes {} and {} have the same token {}. Ignoring {}", endpoint, *current_owner, t, endpoint); - } - } - - bool is_moving = _token_metadata.is_moving(endpoint); // capture because updateNormalTokens clears moving status - - // Update pending ranges after update of normal tokens immediately to avoid - // a race where natural endpoint was updated to contain node A, but A was - // not yet removed from pending endpoints - _token_metadata.update_normal_tokens(tokens_to_update_in_metadata, endpoint); - do_update_pending_ranges(); - - for (auto ep : endpoints_to_remove) { - remove_endpoint(ep); - auto replace_addr = db().local().get_replace_address(); - if (db().local().is_replacing() && replace_addr && *replace_addr == ep) { - gossiper.replacement_quarantine(ep); // quarantine locally longer than normally; see CASSANDRA-8260 - } - } - slogger.debug("handle_state_normal: endpoint={} tokens_to_update_in_system_keyspace = {}", endpoint, tokens_to_update_in_system_keyspace); - if (!tokens_to_update_in_system_keyspace.empty()) { - db::system_keyspace::update_tokens(endpoint, tokens_to_update_in_system_keyspace).then_wrapped([endpoint] (auto&& f) { - try { - f.get(); - } catch (...) { - slogger.error("handle_state_normal: fail to update tokens for {}: {}", endpoint, std::current_exception()); - } - return make_ready_future<>(); - }).get(); - } - if (!local_tokens_to_remove.empty()) { - db::system_keyspace::update_local_tokens(std::unordered_set(), local_tokens_to_remove).discard_result().get(); - } - - if (is_moving || _operation_mode == mode::MOVING) { - _token_metadata.remove_from_moving(endpoint); - get_storage_service().invoke_on_all([endpoint] (auto&& ss) { - for (auto&& subscriber : ss._lifecycle_subscribers) { - try { - subscriber->on_move(endpoint); - } catch (...) { - slogger.warn("Move notification failed {}: {}", endpoint, std::current_exception()); - } - } - }).get(); - } else { - get_storage_service().invoke_on_all([endpoint] (auto&& ss) { - for (auto&& subscriber : ss._lifecycle_subscribers) { - try { - subscriber->on_join_cluster(endpoint); - } catch (...) { - slogger.warn("Join cluster notification failed {}: {}", endpoint, std::current_exception()); - } - } - }).get(); - } - - update_pending_ranges().get(); - if (slogger.is_enabled(logging::log_level::debug)) { - auto ver = _token_metadata.get_ring_version(); - for (auto& x : _token_metadata.get_token_to_endpoint()) { - slogger.debug("handle_state_normal: token_metadata.ring_version={}, token={} -> endpoint={}", ver, x.first, x.second); - } - } -} - -void storage_service::handle_state_leaving(inet_address endpoint) { - slogger.debug("endpoint={} handle_state_leaving", endpoint); - - auto tokens = get_tokens_for(endpoint); - - slogger.debug("Node {} state leaving, tokens {}", endpoint, tokens); - - // If the node is previously unknown or tokens do not match, update tokenmetadata to - // have this node as 'normal' (it must have been using this token before the - // leave). This way we'll get pending ranges right. - if (!_token_metadata.is_member(endpoint)) { - slogger.info("Node {} state jump to leaving", endpoint); - _token_metadata.update_normal_tokens(tokens, endpoint); - } else { - auto tokens_ = _token_metadata.get_tokens(endpoint); - std::set tmp(tokens.begin(), tokens.end()); - if (!std::includes(tokens_.begin(), tokens_.end(), tmp.begin(), tmp.end())) { - slogger.warn("Node {} 'leaving' token mismatch. Long network partition?", endpoint); - slogger.debug("tokens_={}, tokens={}", tokens_, tmp); - _token_metadata.update_normal_tokens(tokens, endpoint); - } - } - - // at this point the endpoint is certainly a member with this token, so let's proceed - // normally - _token_metadata.add_leaving_endpoint(endpoint); - update_pending_ranges().get(); -} - -void storage_service::handle_state_left(inet_address endpoint, std::vector pieces) { - slogger.debug("endpoint={} handle_state_left", endpoint); - if (pieces.size() < 2) { - slogger.warn("Fail to handle_state_left endpoint={} pieces={}", endpoint, pieces); - return; - } - auto tokens = get_tokens_for(endpoint); - slogger.debug("Node {} state left, tokens {}", endpoint, tokens); - excise(tokens, endpoint, extract_expire_time(pieces)); -} - -void storage_service::handle_state_moving(inet_address endpoint, std::vector pieces) { - slogger.debug("endpoint={} handle_state_moving", endpoint); - if (pieces.size() < 2) { - slogger.warn("Fail to handle_state_moving endpoint={} pieces={}", endpoint, pieces); - return; - } - auto token = dht::global_partitioner().from_sstring(pieces[1]); - slogger.debug("Node {} state moving, new token {}", endpoint, token); - _token_metadata.add_moving_endpoint(token, endpoint); - update_pending_ranges().get(); -} - -void storage_service::handle_state_removing(inet_address endpoint, std::vector pieces) { - slogger.debug("endpoint={} handle_state_removing", endpoint); - if (pieces.empty()) { - slogger.warn("Fail to handle_state_removing endpoint={} pieces={}", endpoint, pieces); - return; - } - if (endpoint == get_broadcast_address()) { - slogger.info("Received removenode gossip about myself. Is this node rejoining after an explicit removenode?"); - try { - drain().get(); - } catch (...) { - slogger.error("Fail to drain: {}", std::current_exception()); - throw; - } - return; - } - if (_token_metadata.is_member(endpoint)) { - auto state = pieces[0]; - auto remove_tokens = _token_metadata.get_tokens(endpoint); - if (sstring(gms::versioned_value::REMOVED_TOKEN) == state) { - std::unordered_set tmp(remove_tokens.begin(), remove_tokens.end()); - excise(std::move(tmp), endpoint, extract_expire_time(pieces)); - } else if (sstring(gms::versioned_value::REMOVING_TOKEN) == state) { - auto& gossiper = gms::get_local_gossiper(); - slogger.debug("Tokens {} removed manually (endpoint was {})", remove_tokens, endpoint); - // Note that the endpoint is being removed - _token_metadata.add_leaving_endpoint(endpoint); - update_pending_ranges().get(); - // find the endpoint coordinating this removal that we need to notify when we're done - auto state = gossiper.get_endpoint_state_for_endpoint(endpoint); - if (!state) { - auto err = sprint("Can not find endpoint_state for endpoint=%s", endpoint); - slogger.warn(err.c_str()); - throw std::runtime_error(err); - } - auto value = state->get_application_state(application_state::REMOVAL_COORDINATOR); - if (!value) { - auto err = sprint("Can not find application_state for endpoint=%s", endpoint); - slogger.warn(err.c_str()); - throw std::runtime_error(err); - } - std::vector coordinator; - boost::split(coordinator, value->value, boost::is_any_of(sstring(versioned_value::DELIMITER_STR))); - if (coordinator.size() != 2) { - auto err = sprint("Can not split REMOVAL_COORDINATOR for endpoint=%s, value=%s", endpoint, value->value); - slogger.warn(err.c_str()); - throw std::runtime_error(err); - } - UUID host_id(coordinator[1]); - // grab any data we are now responsible for and notify responsible node - auto ep = _token_metadata.get_endpoint_for_host_id(host_id); - if (!ep) { - auto err = sprint("Can not find host_id=%s", host_id); - slogger.warn(err.c_str()); - throw std::runtime_error(err); - } - restore_replica_count(endpoint, ep.value()).get(); - } - } else { // now that the gossiper has told us about this nonexistent member, notify the gossiper to remove it - if (sstring(gms::versioned_value::REMOVED_TOKEN) == pieces[0]) { - add_expire_time_if_found(endpoint, extract_expire_time(pieces)); - } - remove_endpoint(endpoint); - } -} - -void storage_service::on_join(gms::inet_address endpoint, gms::endpoint_state ep_state) { - slogger.debug("endpoint={} on_join", endpoint); - for (const auto& e : ep_state.get_application_state_map()) { - on_change(endpoint, e.first, e.second); - } - get_local_migration_manager().schedule_schema_pull(endpoint, ep_state).handle_exception([endpoint] (auto ep) { - slogger.warn("Fail to pull schema from {}: {}", endpoint, ep); - }); -} - -void storage_service::on_alive(gms::inet_address endpoint, gms::endpoint_state state) { - slogger.debug("endpoint={} on_alive", endpoint); - get_local_migration_manager().schedule_schema_pull(endpoint, state).handle_exception([endpoint] (auto ep) { - slogger.warn("Fail to pull schema from {}: {}", endpoint, ep); - }); - if (_token_metadata.is_member(endpoint)) { -#if 0 - HintedHandOffManager.instance.scheduleHintDelivery(endpoint, true); -#endif - get_storage_service().invoke_on_all([endpoint] (auto&& ss) { - for (auto&& subscriber : ss._lifecycle_subscribers) { - try { - subscriber->on_up(endpoint); - } catch (...) { - slogger.warn("Up notification failed {}: {}", endpoint, std::current_exception()); - } - } - }).get(); - } -} - -void storage_service::before_change(gms::inet_address endpoint, gms::endpoint_state current_state, gms::application_state new_state_key, const gms::versioned_value& new_value) { - slogger.debug("endpoint={} before_change: new app_state={}, new versioned_value={}", endpoint, new_state_key, new_value); -} - -void storage_service::on_change(inet_address endpoint, application_state state, const versioned_value& value) { - slogger.debug("endpoint={} on_change: app_state={}, versioned_value={}", endpoint, state, value); - if (state == application_state::STATUS) { - std::vector pieces; - boost::split(pieces, value.value, boost::is_any_of(sstring(versioned_value::DELIMITER_STR))); - if (pieces.empty()) { - slogger.warn("Fail to split status in on_change: endpoint={}, app_state={}, value={}", endpoint, state, value); - } - sstring move_name = pieces[0]; - if (move_name == sstring(versioned_value::STATUS_BOOTSTRAPPING)) { - handle_state_bootstrap(endpoint); - } else if (move_name == sstring(versioned_value::STATUS_NORMAL) || - move_name == sstring(versioned_value::SHUTDOWN)) { - handle_state_normal(endpoint); - } else if (move_name == sstring(versioned_value::REMOVING_TOKEN) || - move_name == sstring(versioned_value::REMOVED_TOKEN)) { - handle_state_removing(endpoint, pieces); - } else if (move_name == sstring(versioned_value::STATUS_LEAVING)) { - handle_state_leaving(endpoint); - } else if (move_name == sstring(versioned_value::STATUS_LEFT)) { - handle_state_left(endpoint, pieces); - } else if (move_name == sstring(versioned_value::STATUS_MOVING)) { - handle_state_moving(endpoint, pieces); - } - } else { - auto& gossiper = gms::get_local_gossiper(); - auto ep_state = gossiper.get_endpoint_state_for_endpoint(endpoint); - if (!ep_state || gossiper.is_dead_state(*ep_state)) { - slogger.debug("Ignoring state change for dead or unknown endpoint: {}", endpoint); - return; - } - if (get_token_metadata().is_member(endpoint)) { - do_update_system_peers_table(endpoint, state, value); - if (state == application_state::SCHEMA) { - get_local_migration_manager().schedule_schema_pull(endpoint, *ep_state).handle_exception([endpoint] (auto ep) { - slogger.warn("Failed to pull schema from {}: {}", endpoint, ep); - }); - } - } - } - replicate_to_all_cores().get(); -} - - -void storage_service::on_remove(gms::inet_address endpoint) { - slogger.debug("endpoint={} on_remove", endpoint); - _token_metadata.remove_endpoint(endpoint); - update_pending_ranges().get(); -} - -void storage_service::on_dead(gms::inet_address endpoint, gms::endpoint_state state) { - slogger.debug("endpoint={} on_dead", endpoint); - netw::get_local_messaging_service().remove_rpc_client(netw::msg_addr{endpoint, 0}); - get_storage_service().invoke_on_all([endpoint] (auto&& ss) { - for (auto&& subscriber : ss._lifecycle_subscribers) { - try { - subscriber->on_down(endpoint); - } catch (...) { - slogger.warn("Down notification failed {}: {}", endpoint, std::current_exception()); - } - } - }).get(); -} - -void storage_service::on_restart(gms::inet_address endpoint, gms::endpoint_state state) { - slogger.debug("endpoint={} on_restart", endpoint); - // If we have restarted before the node was even marked down, we need to reset the connection pool - if (state.is_alive()) { - on_dead(endpoint, state); - } -} - -// Runs inside seastar::async context -template -static void update_table(gms::inet_address endpoint, sstring col, T value) { - db::system_keyspace::update_peer_info(endpoint, col, value).then_wrapped([col, endpoint] (auto&& f) { - try { - f.get(); - } catch (...) { - slogger.error("fail to update {} for {}: {}", col, endpoint, std::current_exception()); - } - return make_ready_future<>(); - }).get(); -} - -// Runs inside seastar::async context -void storage_service::do_update_system_peers_table(gms::inet_address endpoint, const application_state& state, const versioned_value& value) { - slogger.debug("Update system.peers table: endpoint={}, app_state={}, versioned_value={}", endpoint, state, value); - if (state == application_state::RELEASE_VERSION) { - update_table(endpoint, "release_version", value.value); - } else if (state == application_state::DC) { - update_table(endpoint, "data_center", value.value); - } else if (state == application_state::RACK) { - update_table(endpoint, "rack", value.value); - } else if (state == application_state::RPC_ADDRESS) { - auto col = sstring("rpc_address"); - inet_address ep; - try { - ep = gms::inet_address(value.value); - } catch (...) { - slogger.error("fail to update {} for {}: invalid rcpaddr {}", col, endpoint, value.value); - return; - } - update_table(endpoint, col, ep.addr()); - } else if (state == application_state::SCHEMA) { - update_table(endpoint, "schema_version", utils::UUID(value.value)); - } else if (state == application_state::HOST_ID) { - update_table(endpoint, "host_id", utils::UUID(value.value)); - } else if (state == application_state::SUPPORTED_FEATURES) { - update_table(endpoint, "supported_features", value.value); - } -} - -// Runs inside seastar::async context -void storage_service::update_peer_info(gms::inet_address endpoint) { - using namespace gms; - auto& gossiper = gms::get_local_gossiper(); - auto ep_state = gossiper.get_endpoint_state_for_endpoint(endpoint); - if (!ep_state) { - return; - } - for (auto& entry : ep_state->get_application_state_map()) { - auto& app_state = entry.first; - auto& value = entry.second; - do_update_system_peers_table(endpoint, app_state, value); - } -} - -sstring storage_service::get_application_state_value(inet_address endpoint, application_state appstate) { - auto& gossiper = gms::get_local_gossiper(); - auto eps = gossiper.get_endpoint_state_for_endpoint(endpoint); - if (!eps) { - return {}; - } - auto v = eps->get_application_state(appstate); - if (!v) { - return {}; - } - return v->value; -} - -std::unordered_set storage_service::get_tokens_for(inet_address endpoint) { - auto tokens_string = get_application_state_value(endpoint, application_state::TOKENS); - slogger.trace("endpoint={}, tokens_string={}", endpoint, tokens_string); - if (tokens_string.size() == 0) { - return {}; // boost::split produces one element for emty string - } - std::vector tokens; - std::unordered_set ret; - boost::split(tokens, tokens_string, boost::is_any_of(";")); - for (auto str : tokens) { - auto t = dht::global_partitioner().from_sstring(str); - slogger.trace("endpoint={}, token_str={} token={}", endpoint, str, t); - ret.emplace(std::move(t)); - } - return ret; -} - -// Runs inside seastar::async context -void storage_service::set_tokens(std::unordered_set tokens) { - slogger.debug("Setting tokens to {}", tokens); - db::system_keyspace::update_tokens(tokens).get(); - _token_metadata.update_normal_tokens(tokens, get_broadcast_address()); - auto local_tokens = get_local_tokens().get0(); - set_gossip_tokens(local_tokens); - set_mode(mode::NORMAL, "node is now in normal status", true); - replicate_to_all_cores().get(); -} - -void storage_service::set_gossip_tokens(const std::unordered_set& local_tokens) { - auto& gossiper = gms::get_local_gossiper(); - gossiper.add_local_application_state(gms::application_state::TOKENS, value_factory.tokens(local_tokens)).get(); - gossiper.add_local_application_state(gms::application_state::STATUS, value_factory.normal(local_tokens)).get(); -} - -void storage_service::register_subscriber(endpoint_lifecycle_subscriber* subscriber) -{ - _lifecycle_subscribers.emplace_back(subscriber); -} - -void storage_service::unregister_subscriber(endpoint_lifecycle_subscriber* subscriber) -{ - _lifecycle_subscribers.erase(std::remove(_lifecycle_subscribers.begin(), _lifecycle_subscribers.end(), subscriber), _lifecycle_subscribers.end()); -} - -static stdx::optional> drain_in_progress; - -future<> storage_service::stop_transport() { - return run_with_no_api_lock([] (storage_service& ss) { - return seastar::async([&ss] { - slogger.info("Stop transport: starts"); - - gms::stop_gossiping().get(); - slogger.info("Stop transport: stop_gossiping done"); - - ss.shutdown_client_servers().get(); - slogger.info("Stop transport: shutdown rpc and cql server done"); - - ss.do_stop_ms().get(); - slogger.info("Stop transport: shutdown messaging_service done"); - - ss.do_stop_stream_manager().get(); - slogger.info("Stop transport: shutdown stream_manager done"); - - auth::auth::shutdown().get(); - slogger.info("Stop transport: auth shutdown"); - - slogger.info("Stop transport: done"); - }); - }); -} - -future<> storage_service::drain_on_shutdown() { - return run_with_no_api_lock([] (storage_service& ss) { - if (drain_in_progress) { - return std::move(*drain_in_progress); - } - return seastar::async([&ss] { - slogger.info("Drain on shutdown: starts"); - - ss.stop_transport().get(); - slogger.info("Drain on shutdown: stop_transport done"); - - tracing::tracing::tracing_instance().invoke_on_all([] (auto& tr) { - return tr.shutdown(); - }).get(); - - tracing::tracing::tracing_instance().stop().get(); - slogger.info("Drain on shutdown: tracing is stopped"); - - ss.flush_column_families(); - slogger.info("Drain on shutdown: flush column_families done"); - - ss.db().invoke_on_all([] (auto& db) { - return db.commitlog()->shutdown(); - }).get(); - slogger.info("Drain on shutdown: shutdown commitlog done"); - - // NOTE: We currently don't destory migration_manager nor - // storage_service in scylla, so when we reach here - // migration_manager should to be still alive. Be careful, when - // scylla starts to destroy migration_manager in the shutdown - // process. - service::get_local_migration_manager().unregister_listener(&ss); - - slogger.info("Drain on shutdown: done"); - }); - }); -#if 0 - // daemon threads, like our executors', continue to run while shutdown hooks are invoked - drainOnShutdown = new Thread(new WrappedRunnable() - { - @Override - public void runMayThrow() throws InterruptedException - { - ExecutorService counterMutationStage = StageManager.getStage(Stage.COUNTER_MUTATION); - ExecutorService mutationStage = StageManager.getStage(Stage.MUTATION); - if (mutationStage.isShutdown() && counterMutationStage.isShutdown()) - return; // drained already - - if (daemon != null) - shutdownClientServers(); - ScheduledExecutors.optionalTasks.shutdown(); - Gossiper.instance.stop(); - - // In-progress writes originating here could generate hints to be written, so shut down MessagingService - // before mutation stage, so we can get all the hints saved before shutting down - MessagingService.instance().shutdown(); - counterMutationStage.shutdown(); - mutationStage.shutdown(); - counterMutationStage.awaitTermination(3600, TimeUnit.SECONDS); - mutationStage.awaitTermination(3600, TimeUnit.SECONDS); - StorageProxy.instance.verifyNoHintsInProgress(); - - List> flushes = new ArrayList<>(); - for (Keyspace keyspace : Keyspace.all()) - { - KSMetaData ksm = Schema.instance.getKSMetaData(keyspace.getName()); - if (!ksm.durableWrites) - { - for (ColumnFamilyStore cfs : keyspace.getColumnFamilyStores()) - flushes.add(cfs.forceFlush()); - } - } - try - { - FBUtilities.waitOnFutures(flushes); - } - catch (Throwable t) - { - JVMStabilityInspector.inspectThrowable(t); - // don't let this stop us from shutting down the commitlog and other thread pools - slogger.warn("Caught exception while waiting for memtable flushes during shutdown hook", t); - } - - CommitLog.instance.shutdownBlocking(); - - // wait for miscellaneous tasks like sstable and commitlog segment deletion - ScheduledExecutors.nonPeriodicTasks.shutdown(); - if (!ScheduledExecutors.nonPeriodicTasks.awaitTermination(1, TimeUnit.MINUTES)) - slogger.warn("Miscellaneous task executor still busy after one minute; proceeding with shutdown"); - } - }, "StorageServiceShutdownHook"); - Runtime.getRuntime().addShutdownHook(drainOnShutdown); -#endif -} - -future<> storage_service::init_server(int delay) { - return seastar::async([this, delay] { - get_storage_service().invoke_on_all([] (auto& ss) { - ss.init_messaging_service(); - }).get(); - auto& gossiper = gms::get_local_gossiper(); -#if 0 - slogger.info("Cassandra version: {}", FBUtilities.getReleaseVersionString()); - slogger.info("Thrift API version: {}", cassandraConstants.VERSION); - slogger.info("CQL supported versions: {} (default: {})", StringUtils.join(ClientState.getCQLSupportedVersion(), ","), ClientState.DEFAULT_CQL_VERSION); -#endif - _initialized = true; - - // Register storage_service to migration_manager so we can update - // pending ranges when keyspace is chagned - service::get_local_migration_manager().register_listener(this); -#if 0 - try - { - // Ensure StorageProxy is initialized on start-up; see CASSANDRA-3797. - Class.forName("org.apache.cassandra.service.StorageProxy"); - // also IndexSummaryManager, which is otherwise unreferenced - Class.forName("org.apache.cassandra.io.sstable.IndexSummaryManager"); - } - catch (ClassNotFoundException e) - { - throw new AssertionError(e); - } -#endif - - std::vector loaded_endpoints; - if (get_property_load_ring_state()) { - slogger.info("Loading persisted ring state"); - auto loaded_tokens = db::system_keyspace::load_tokens().get0(); - auto loaded_host_ids = db::system_keyspace::load_host_ids().get0(); - - for (auto& x : loaded_tokens) { - slogger.debug("Loaded tokens: endpoint={}, tokens={}", x.first, x.second); - } - - for (auto& x : loaded_host_ids) { - slogger.debug("Loaded host_id: endpoint={}, uuid={}", x.first, x.second); - } - - for (auto x : loaded_tokens) { - auto ep = x.first; - auto tokens = x.second; - if (ep == get_broadcast_address()) { - // entry has been mistakenly added, delete it - db::system_keyspace::remove_endpoint(ep).get(); - } else { - _token_metadata.update_normal_tokens(tokens, ep); - if (loaded_host_ids.count(ep)) { - _token_metadata.update_host_id(loaded_host_ids.at(ep), ep); - } - loaded_endpoints.push_back(ep); - gossiper.add_saved_endpoint(ep); - } - } - } - - prepare_to_join(std::move(loaded_endpoints)); -#if 0 - // Has to be called after the host id has potentially changed in prepareToJoin(). - for (ColumnFamilyStore cfs : ColumnFamilyStore.all()) - if (cfs.metadata.isCounter()) - cfs.initCounterCache(); -#endif - - if (get_property_join_ring()) { - join_token_ring(delay); - } else { - auto tokens = db::system_keyspace::get_saved_tokens().get0(); - if (!tokens.empty()) { - _token_metadata.update_normal_tokens(tokens, get_broadcast_address()); - // order is important here, the gossiper can fire in between adding these two states. It's ok to send TOKENS without STATUS, but *not* vice versa. - gossiper.add_local_application_state(gms::application_state::TOKENS, value_factory.tokens(tokens)).get(); - gossiper.add_local_application_state(gms::application_state::STATUS, value_factory.hibernate(true)).get(); - } - slogger.info("Not joining ring as requested. Use JMX (StorageService->joinRing()) to initiate ring joining"); - } - - get_storage_service().invoke_on_all([] (auto& ss) { - ss._range_tombstones_feature = gms::feature(RANGE_TOMBSTONES_FEATURE); - ss._large_partitions_feature = gms::feature(LARGE_PARTITIONS_FEATURE); - ss._counters_feature = gms::feature(COUNTERS_FEATURE); - - if (ss._db.local().get_config().experimental()) { - ss._materialized_views_feature = gms::feature(MATERIALIZED_VIEWS_FEATURE); - ss._indexes_feature = gms::feature(INDEXES_FEATURE); - } - }).get(); - }); -} - -// should run under _replicate_task lock -future<> storage_service::replicate_tm_only() { - _shadow_token_metadata = _token_metadata; - - return get_storage_service().invoke_on_all([this](storage_service& local_ss){ - if (engine().cpu_id() != 0) { - local_ss._token_metadata = _shadow_token_metadata; - } - }); -} - -// should run under _replicate_task and gossiper::timer_callback locks -future<> storage_service::replicate_tm_and_ep_map(shared_ptr g0) { - // sanity: check that gossiper is fully initialized like we expect it to be - return get_storage_service().invoke_on_all([](storage_service& local_ss) { - if (!gms::get_gossiper().local_is_initialized()) { - auto err = sprint("replicate_to_all_cores is called before gossiper is fully initialized"); - slogger.warn(err.c_str()); - throw std::runtime_error(err); - } - }).then([this, g0] { - _shadow_token_metadata = _token_metadata; - g0->shadow_endpoint_state_map = g0->endpoint_state_map; - - return get_storage_service().invoke_on_all([g0, this](storage_service& local_ss) { - if (engine().cpu_id() != 0) { - gms::get_local_gossiper().endpoint_state_map = g0->shadow_endpoint_state_map; - local_ss._token_metadata = _shadow_token_metadata; - } - }); - }); -} - -future<> storage_service::replicate_to_all_cores() { - // sanity checks: this function is supposed to be run on shard 0 only and - // when gossiper has already been initialized. - if (engine().cpu_id() != 0) { - auto err = sprint("replicate_to_all_cores is not ran on cpu zero"); - slogger.warn(err.c_str()); - throw std::runtime_error(err); - } - - if (!gms::get_gossiper().local_is_initialized()) { - auto err = sprint("replicate_to_all_cores is called before gossiper on shard0 is initialized"); - slogger.warn(err.c_str()); - throw std::runtime_error(err); - } - - // FIXME: There is no back pressure. If the remote cores are slow, and - // replication is called often, it will queue tasks to the semaphore - // without end. - return _replicate_task.wait().then([this] { - - auto g0 = gms::get_local_gossiper().shared_from_this(); - - return g0->timer_callback_lock().then([this, g0] { - bool endpoint_map_changed = g0->shadow_endpoint_state_map != g0->endpoint_state_map; - - if (endpoint_map_changed) { - return replicate_tm_and_ep_map(g0).finally([g0] { - g0->timer_callback_unlock(); - }); - } else { - g0->timer_callback_unlock(); - return replicate_tm_only(); - } - }); - }).then_wrapped([this, ss0 = this->shared_from_this()](auto&& f){ - try { - _replicate_task.signal(); - f.get(); - } catch (...) { - slogger.error("Fail to replicate _token_metadata"); - } - return make_ready_future<>(); - }); -} - -future<> storage_service::gossip_snitch_info() { - auto& snitch = locator::i_endpoint_snitch::get_local_snitch_ptr(); - auto addr = get_broadcast_address(); - auto dc = snitch->get_datacenter(addr); - auto rack = snitch->get_rack(addr); - auto& gossiper = gms::get_local_gossiper(); - return gossiper.add_local_application_state(gms::application_state::DC, value_factory.datacenter(dc)).then([this, &gossiper, rack] { - return gossiper.add_local_application_state(gms::application_state::RACK, value_factory.rack(rack)); - }); -} - -future<> storage_service::stop() { - uninit_messaging_service(); - return make_ready_future<>(); -} - -future<> storage_service::check_for_endpoint_collision() { - slogger.debug("Starting shadow gossip round to check for endpoint collision"); -#if 0 - if (!MessagingService.instance().isListening()) - MessagingService.instance().listen(FBUtilities.getLocalAddress()); -#endif - return seastar::async([this] { - auto& gossiper = gms::get_local_gossiper(); - auto t = gms::gossiper::clk::now(); - bool found_bootstrapping_node = false; - do { - slogger.info("Checking remote features with gossip"); - gossiper.do_shadow_round().get(); - gossiper.check_knows_remote_features(get_config_supported_features()); - auto addr = get_broadcast_address(); - if (!gossiper.is_safe_for_bootstrap(addr)) { - throw std::runtime_error(sprint("A node with address %s already exists, cancelling join. " - "Use replace_address if you want to replace this node.", addr)); - } - if (dht::range_streamer::use_strict_consistency()) { - found_bootstrapping_node = false; - for (auto& x : gossiper.get_endpoint_states()) { - auto state = gossiper.get_gossip_status(x.second); - if (state.empty()) { - continue; - } - slogger.debug("Checking bootstrapping/leaving/moving nodes: node={}, status={} (check_for_endpoint_collision)", x.first, state); - if (state == sstring(versioned_value::STATUS_BOOTSTRAPPING) || - state == sstring(versioned_value::STATUS_LEAVING) || - state == sstring(versioned_value::STATUS_MOVING)) { - if (gms::gossiper::clk::now() > t + std::chrono::seconds(60)) { - throw std::runtime_error("Other bootstrapping/leaving/moving nodes detected, cannot bootstrap while consistent_rangemovement is true (check_for_endpoint_collision)"); - } else { - gossiper.goto_shadow_round(); - gossiper.reset_endpoint_state_map(); - found_bootstrapping_node = true; - auto elapsed = std::chrono::duration_cast(gms::gossiper::clk::now() - t).count(); - slogger.info("Checking bootstrapping/leaving/moving nodes: node={}, status={}, sleep 1 second and check again ({} seconds elapsed) (check_for_endpoint_collision)", x.first, state, elapsed); - sleep(std::chrono::seconds(1)).get(); - break; - } - } - } - } - } while (found_bootstrapping_node); - slogger.info("Checking bootstrapping/leaving/moving nodes: ok (check_for_endpoint_collision)"); - gossiper.reset_endpoint_state_map(); - }); -} - -// Runs inside seastar::async context -void storage_service::remove_endpoint(inet_address endpoint) { - auto& gossiper = gms::get_local_gossiper(); - gossiper.remove_endpoint(endpoint); - db::system_keyspace::remove_endpoint(endpoint).then_wrapped([endpoint] (auto&& f) { - try { - f.get(); - } catch (...) { - slogger.error("fail to remove endpoint={}: {}", endpoint, std::current_exception()); - } - return make_ready_future<>(); - }).get(); -} - -future> storage_service::prepare_replacement_info() { - if (!db().local().get_replace_address()) { - throw std::runtime_error(sprint("replace_address is empty")); - } - auto replace_address = db().local().get_replace_address().value(); - slogger.info("Gathering node replacement information for {}", replace_address); - - // if (!MessagingService.instance().isListening()) - // MessagingService.instance().listen(FBUtilities.getLocalAddress()); - auto seeds = gms::get_local_gossiper().get_seeds(); - if (seeds.size() == 1 && seeds.count(replace_address)) { - throw std::runtime_error(sprint("Cannot replace_address %s because no seed node is up", replace_address)); - } - - // make magic happen - slogger.info("Checking remote features with gossip"); - return gms::get_local_gossiper().do_shadow_round().then([this, replace_address] { - auto& gossiper = gms::get_local_gossiper(); - gossiper.check_knows_remote_features(get_config_supported_features()); - // now that we've gossiped at least once, we should be able to find the node we're replacing - auto state = gossiper.get_endpoint_state_for_endpoint(replace_address); - if (!state) { - throw std::runtime_error(sprint("Cannot replace_address %s because it doesn't exist in gossip", replace_address)); - } - auto host_id = gossiper.get_host_id(replace_address); - auto eps = gossiper.get_endpoint_state_for_endpoint(replace_address); - if (!eps) { - throw std::runtime_error(sprint("Cannot replace_address %s because can not find gossip endpoint state", replace_address)); - } - auto value = eps->get_application_state(application_state::TOKENS); - if (!value) { - throw std::runtime_error(sprint("Could not find tokens for %s to replace", replace_address)); - } - auto tokens = get_tokens_for(replace_address); - // use the replacee's host Id as our own so we receive hints, etc - return db::system_keyspace::set_local_host_id(host_id).discard_result().then([replace_address, tokens = std::move(tokens)] { - gms::get_local_gossiper().reset_endpoint_state_map(); // clean up since we have what we need - return make_ready_future>(std::move(tokens)); - }); - }); -} - -future> storage_service::get_ownership() { - return run_with_no_api_lock([] (storage_service& ss) { - auto token_map = dht::global_partitioner().describe_ownership(ss._token_metadata.sorted_tokens()); - // describeOwnership returns tokens in an unspecified order, let's re-order them - std::map ownership; - for (auto entry : token_map) { - gms::inet_address endpoint = ss._token_metadata.get_endpoint(entry.first).value(); - auto token_ownership = entry.second; - ownership[endpoint] += token_ownership; - } - return ownership; - }); -} - -future> storage_service::effective_ownership(sstring keyspace_name) { - return run_with_no_api_lock([keyspace_name] (storage_service& ss) mutable { - if (keyspace_name != "") { - //find throws no such keyspace if it is missing - const keyspace& ks = ss._db.local().find_keyspace(keyspace_name); - // This is ugly, but it follows origin - auto&& rs = ks.get_replication_strategy(); // clang complains about typeid(ks.get_replication_strategy()); - if (typeid(rs) == typeid(locator::local_strategy)) { - throw std::runtime_error("Ownership values for keyspaces with LocalStrategy are meaningless"); - } - } else { - auto non_system_keyspaces = ss._db.local().get_non_system_keyspaces(); - - //system_traces is a non-system keyspace however it needs to be counted as one for this process - size_t special_table_count = 0; - if (std::find(non_system_keyspaces.begin(), non_system_keyspaces.end(), "system_traces") != - non_system_keyspaces.end()) { - special_table_count += 1; - } - if (non_system_keyspaces.size() > special_table_count) { - throw std::runtime_error("Non-system keyspaces don't have the same replication settings, effective ownership information is meaningless"); - } - keyspace_name = "system_traces"; - } - auto token_ownership = dht::global_partitioner().describe_ownership(ss._token_metadata.sorted_tokens()); - - std::map final_ownership; - - // calculate ownership per dc - for (auto endpoints : ss._token_metadata.get_topology().get_datacenter_endpoints()) { - // calculate the ownership with replication and add the endpoint to the final ownership map - for (const gms::inet_address& endpoint : endpoints.second) { - float ownership = 0.0f; - for (range r : ss.get_ranges_for_endpoint(keyspace_name, endpoint)) { - // get_ranges_for_endpoint will unwrap the first range. - // With t0 t1 t2 t3, the first range (t3,t0] will be splitted - // as (min,t0] and (t3,max]. Skippping the range (t3,max] - // we will get the correct ownership number as if the first - // range were not splitted. - if (!r.end()) { - continue; - } - auto end_token = r.end()->value(); - if (token_ownership.find(end_token) != token_ownership.end()) { - ownership += token_ownership[end_token]; - } - } - final_ownership[endpoint] = ownership; - } - } - return final_ownership; - }); -} - -static const std::map mode_names = { - {storage_service::mode::STARTING, "STARTING"}, - {storage_service::mode::NORMAL, "NORMAL"}, - {storage_service::mode::JOINING, "JOINING"}, - {storage_service::mode::LEAVING, "LEAVING"}, - {storage_service::mode::DECOMMISSIONED, "DECOMMISSIONED"}, - {storage_service::mode::MOVING, "MOVING"}, - {storage_service::mode::DRAINING, "DRAINING"}, - {storage_service::mode::DRAINED, "DRAINED"}, -}; - -std::ostream& operator<<(std::ostream& os, const storage_service::mode& m) { - os << mode_names.at(m); - return os; -} - -void storage_service::set_mode(mode m, bool log) { - set_mode(m, "", log); -} - -void storage_service::set_mode(mode m, sstring msg, bool log) { - _operation_mode = m; - if (log) { - slogger.info("{}: {}", m, msg); - } else { - slogger.debug("{}: {}", m, msg); - } -} - -future> storage_service::get_local_tokens() { - return db::system_keyspace::get_saved_tokens().then([] (auto&& tokens) { - // should not be called before initServer sets this - if (tokens.empty()) { - auto err = sprint("get_local_tokens: tokens is empty"); - slogger.error(err.c_str()); - throw std::runtime_error(err); - } - return tokens; - }); -} - -sstring storage_service::get_release_version() { - return version::release(); -} - -sstring storage_service::get_schema_version() { - return _db.local().get_version().to_sstring(); -} - -static constexpr auto UNREACHABLE = "UNREACHABLE"; - -future>> storage_service::describe_schema_versions() { - auto live_hosts = gms::get_local_gossiper().get_live_members(); - std::unordered_map> results; - return map_reduce(std::move(live_hosts), [] (auto host) { - auto f0 = netw::get_messaging_service().local().send_schema_check(netw::msg_addr{ host, 0 }); - return std::move(f0).then_wrapped([host] (auto f) { - if (f.failed()) { - return std::pair>(host, stdx::nullopt); - } - return std::pair>(host, f.get0()); - }); - }, std::move(results), [] (auto results, auto host_and_version) { - auto version = host_and_version.second ? host_and_version.second->to_sstring() : UNREACHABLE; - auto it = results.find(version); - if (it == results.end()) { - results.emplace(std::move(version), std::vector { host_and_version.first.to_sstring() }); - } else { - it->second.emplace_back(host_and_version.first.to_sstring()); - } - return results; - }).then([] (auto results) { - // we're done: the results map is ready to return to the client. the rest is just debug logging: - auto it_unreachable = results.find(UNREACHABLE); - if (it_unreachable != results.end()) { - slogger.debug("Hosts not in agreement. Didn't get a response from everybody: {}", ::join( ",", it_unreachable->second)); - } - auto my_version = get_local_storage_service().get_schema_version(); - for (auto&& entry : results) { - // check for version disagreement. log the hosts that don't agree. - if (entry.first == UNREACHABLE || entry.first == my_version) { - continue; - } - for (auto&& host : entry.second) { - slogger.debug("{} disagrees ({})", host, entry.first); - } - } - if (results.size() == 1) { - slogger.debug("Schemas are in agreement."); - } - return results; - }); -}; - -future storage_service::get_operation_mode() { - return run_with_no_api_lock([] (storage_service& ss) { - auto mode = ss._operation_mode; - return make_ready_future(sprint("%s", mode)); - }); -} - -future storage_service::is_starting() { - return run_with_no_api_lock([] (storage_service& ss) { - auto mode = ss._operation_mode; - return mode == storage_service::mode::STARTING; - }); -} - -future storage_service::is_gossip_running() { - return run_with_no_api_lock([] (storage_service& ss) { - return gms::get_local_gossiper().is_enabled(); - }); -} - -future<> storage_service::start_gossiping() { - return run_with_api_lock(sstring("start_gossiping"), [] (storage_service& ss) { - return seastar::async([&ss] { - if (!ss._initialized) { - slogger.warn("Starting gossip by operator request"); - ss.set_gossip_tokens(ss.get_local_tokens().get0()); - gms::get_local_gossiper().force_newer_generation(); - gms::get_local_gossiper().start_gossiping(get_generation_number()).then([&ss] { - ss._initialized = true; - }).get(); - } - }); - }); -} - -future<> storage_service::stop_gossiping() { - return run_with_api_lock(sstring("stop_gossiping"), [] (storage_service& ss) { - if (ss._initialized) { - slogger.warn("Stopping gossip by operator request"); - return gms::stop_gossiping().then([&ss] { - ss._initialized = false; - }); - } - return make_ready_future<>(); - }); -} - -future<> storage_service::do_stop_ms() { - if (_ms_stopped) { - return make_ready_future<>(); - } - _ms_stopped = true; - return netw::get_messaging_service().invoke_on_all([] (auto& ms) { - return ms.stop(); - }).then([] { - slogger.info("messaging_service stopped"); - }); -} - -future<> storage_service::do_stop_stream_manager() { - if (_stream_manager_stopped) { - return make_ready_future<>(); - } - _stream_manager_stopped = true; - return streaming::get_stream_manager().invoke_on_all([] (auto& sm) { - return sm.stop(); - }).then([] { - slogger.info("stream_manager stopped"); - }); -} - -future<> check_snapshot_not_exist(database& db, sstring ks_name, sstring name) { - auto& ks = db.find_keyspace(ks_name); - return parallel_for_each(ks.metadata()->cf_meta_data(), [&db, ks_name = std::move(ks_name), name = std::move(name)] (auto& pair) { - auto& cf = db.find_column_family(pair.second); - return cf.snapshot_exists(name).then([ks_name = std::move(ks_name), name] (bool exists) { - if (exists) { - throw std::runtime_error(sprint("Keyspace %s: snapshot %s already exists.", ks_name, name)); - } - }); - }); -} - -future<> storage_service::take_snapshot(sstring tag, std::vector keyspace_names) { - if (tag.empty()) { - throw std::runtime_error("You must supply a snapshot name."); - } - - if (keyspace_names.size() == 0) { - boost::copy(_db.local().get_keyspaces() | boost::adaptors::map_keys, std::back_inserter(keyspace_names)); - }; - - return smp::submit_to(0, [] { - auto mode = get_local_storage_service()._operation_mode; - if (mode == storage_service::mode::JOINING) { - throw std::runtime_error("Cannot snapshot until bootstrap completes"); - } - }).then([tag = std::move(tag), keyspace_names = std::move(keyspace_names), this] { - return parallel_for_each(keyspace_names, [tag, this] (auto& ks_name) { - return check_snapshot_not_exist(_db.local(), ks_name, tag); - }).then([this, tag, keyspace_names] { - return _db.invoke_on_all([tag = std::move(tag), keyspace_names] (database& db) { - return parallel_for_each(keyspace_names, [&db, tag = std::move(tag)] (auto& ks_name) { - auto& ks = db.find_keyspace(ks_name); - return parallel_for_each(ks.metadata()->cf_meta_data(), [&db, tag = std::move(tag)] (auto& pair) { - auto& cf = db.find_column_family(pair.second); - return cf.snapshot(tag); - }); - }); - }); - }); - }); -} - -future<> storage_service::take_column_family_snapshot(sstring ks_name, sstring cf_name, sstring tag) { - if (ks_name.empty()) { - throw std::runtime_error("You must supply a keyspace name"); - } - if (cf_name.empty()) { - throw std::runtime_error("You must supply a table name"); - } - if (cf_name.find(".") != sstring::npos) { - throw std::invalid_argument("Cannot take a snapshot of a secondary index by itself. Run snapshot on the table that owns the index."); - } - - if (tag.empty()) { - throw std::runtime_error("You must supply a snapshot name."); - } - - return smp::submit_to(0, [] { - auto mode = get_local_storage_service()._operation_mode; - if (mode == storage_service::mode::JOINING) { - throw std::runtime_error("Cannot snapshot until bootstrap completes"); - } - }).then([this, ks_name = std::move(ks_name), cf_name = std::move(cf_name), tag = std::move(tag)] { - return check_snapshot_not_exist(_db.local(), ks_name, tag).then([this, ks_name, cf_name, tag] { - return _db.invoke_on_all([ks_name, cf_name, tag] (database &db) { - auto& cf = db.find_column_family(ks_name, cf_name); - return cf.snapshot(tag); - }); - }); - }); -} - -future<> storage_service::clear_snapshot(sstring tag, std::vector keyspace_names) { - return _db.local().clear_snapshot(tag, keyspace_names); -} - -future>> -storage_service::get_snapshot_details() { - using cf_snapshot_map = std::unordered_map; - using snapshot_map = std::unordered_map; - - class snapshot_reducer { - private: - snapshot_map _result; - public: - future<> operator()(const snapshot_map& value) { - for (auto&& vp: value) { - if (_result.count(vp.first) == 0) { - _result.emplace(vp.first, std::move(vp.second)); - continue; - } - - auto& rp = _result.at(vp.first); - for (auto&& cf: vp.second) { - if (rp.count(cf.first) == 0) { - rp.emplace(cf.first, std::move(cf.second)); - continue; - } - auto& rcf = rp.at(cf.first); - rcf.live = cf.second.live; - rcf.total = cf.second.total; - } - } - return make_ready_future<>(); - } - snapshot_map get() && { - return std::move(_result); - } - }; - - return _db.map_reduce(snapshot_reducer(), [] (database& db) { - auto local_snapshots = make_lw_shared(); - return parallel_for_each(db.get_column_families(), [local_snapshots] (auto& cf_pair) { - return cf_pair.second->get_snapshot_details().then([uuid = cf_pair.first, local_snapshots] (auto map) { - for (auto&& snap_map: map) { - if (local_snapshots->count(snap_map.first) == 0) { - local_snapshots->emplace(snap_map.first, cf_snapshot_map()); - } - local_snapshots->at(snap_map.first).emplace(uuid, snap_map.second); - } - return make_ready_future<>(); - }); - }).then([local_snapshots] { - return make_ready_future(std::move(*local_snapshots)); - }); - }).then([this] (snapshot_map&& map) { - std::unordered_map> result; - for (auto&& pair: map) { - std::vector details; - - for (auto&& snap_map: pair.second) { - auto& cf = _db.local().find_column_family(snap_map.first); - details.push_back({ snap_map.second.live, snap_map.second.total, cf.schema()->cf_name(), cf.schema()->ks_name() }); - } - result.emplace(pair.first, std::move(details)); - } - - return make_ready_future>>(std::move(result)); - }); -} - -future storage_service::true_snapshots_size() { - return _db.map_reduce(adder(), [] (database& db) { - return do_with(int64_t(0), [&db] (auto& local_total) { - return parallel_for_each(db.get_column_families(), [&local_total] (auto& cf_pair) { - return cf_pair.second->get_snapshot_details().then([&local_total] (auto map) { - for (auto&& snap_map: map) { - local_total += snap_map.second.live; - } - return make_ready_future<>(); - }); - }).then([&local_total] { - return make_ready_future(local_total); - }); - }); - }); -} - -future<> storage_service::start_rpc_server() { - return run_with_api_lock(sstring("start_rpc_server"), [] (storage_service& ss) { - if (ss._thrift_server) { - return make_ready_future<>(); - } - - auto tserver = make_shared>(); - ss._thrift_server = tserver; - - auto& cfg = ss._db.local().get_config(); - auto port = cfg.rpc_port(); - auto addr = cfg.rpc_address(); - auto keepalive = cfg.rpc_keepalive(); - return seastar::net::dns::resolve_name(addr).then([&ss, tserver, addr, port, keepalive] (seastar::net::inet_address ip) { - return tserver->start(std::ref(ss._db), std::ref(cql3::get_query_processor())).then([tserver, port, addr, ip, keepalive] { - // #293 - do not stop anything - //engine().at_exit([tserver] { - // return tserver->stop(); - //}); - return tserver->invoke_on_all(&thrift_server::listen, ipv4_addr{ip, port}, keepalive); - }); - }).then([addr, port] { - slogger.info("Thrift server listening on {}:{} ...", addr, port); - }); - }); -} - -future<> storage_service::do_stop_rpc_server() { - auto tserver = _thrift_server; - _thrift_server = {}; - if (tserver) { - // FIXME: thrift_server::stop() doesn't kill existing connections and wait for them - // Note: We must capture tserver so that it will not be freed before tserver->stop - return tserver->stop().then([tserver] { - slogger.info("Thrift server stopped"); - }); - } - return make_ready_future<>(); -} - -future<> storage_service::stop_rpc_server() { - return run_with_api_lock(sstring("stop_rpc_server"), [] (storage_service& ss) { - return ss.do_stop_rpc_server(); - }); -} - -future storage_service::is_rpc_server_running() { - return run_with_no_api_lock([] (storage_service& ss) { - return bool(ss._thrift_server); - }); -} - -future<> storage_service::start_native_transport() { - return run_with_api_lock(sstring("start_native_transport"), [] (storage_service& ss) { - if (ss._cql_server) { - return make_ready_future<>(); - } - auto cserver = make_shared>(); - ss._cql_server = cserver; - - auto& cfg = ss._db.local().get_config(); - auto addr = cfg.rpc_address(); - auto ceo = cfg.client_encryption_options(); - auto keepalive = cfg.rpc_keepalive(); - cql_transport::cql_load_balance lb = cql_transport::parse_load_balance(cfg.load_balance()); - return seastar::net::dns::resolve_name(addr).then([cserver, addr, &cfg, lb, keepalive, ceo = std::move(ceo)] (seastar::net::inet_address ip) { - return cserver->start(std::ref(service::get_storage_proxy()), std::ref(cql3::get_query_processor()), lb).then([cserver, &cfg, addr, ip, ceo, keepalive]() { - // #293 - do not stop anything - //engine().at_exit([cserver] { - // return cserver->stop(); - //}); - - auto f = make_ready_future(); - - struct listen_cfg { - ipv4_addr addr; - std::shared_ptr cred; - }; - - std::vector configs({ { ipv4_addr{ip, cfg.native_transport_port()} }}); - - // main should have made sure values are clean and neatish - if (ceo.at("enabled") == "true") { - auto cred = std::make_shared(); - - cred->set_dh_level(seastar::tls::dh_params::level::MEDIUM); - - if (ceo.count("priority_string")) { - cred->set_priority_string(ceo.at("priority_string")); - } - if (ceo.count("require_client_auth") && ceo.at("require_client_auth") == "true") { - cred->set_client_auth(seastar::tls::client_auth::REQUIRE); - } - - f = cred->set_x509_key_file(ceo.at("certificate"), ceo.at("keyfile"), seastar::tls::x509_crt_format::PEM); - - if (ceo.count("truststore")) { - f = f.then([cred, f = ceo.at("truststore")] { return cred->set_x509_trust_file(f, seastar::tls::x509_crt_format::PEM); }); - } - - slogger.info("Enabling encrypted CQL connections between client and server"); - - if (cfg.native_transport_port_ssl.is_set() && cfg.native_transport_port_ssl() != cfg.native_transport_port()) { - configs.emplace_back(listen_cfg{ipv4_addr{ip, cfg.native_transport_port_ssl()}, std::move(cred)}); - } else { - configs.back().cred = std::move(cred); - } - } - - return f.then([cserver, configs = std::move(configs), keepalive] { - return parallel_for_each(configs, [cserver, keepalive](const listen_cfg & cfg) { - return cserver->invoke_on_all(&cql_transport::cql_server::listen, cfg.addr, cfg.cred, keepalive).then([cfg] { - slogger.info("Starting listening for CQL clients on {} ({})" - , cfg.addr, cfg.cred ? "encrypted" : "unencrypted" - ); - }); - }); - - }); - }); - }); - }); -} -future<> storage_service::start_redis_transport() { - return run_with_api_lock(sstring("start_redis_transport"), [] (storage_service& ss) { - if (ss._redis_server) { - return make_ready_future<>(); - } - auto rserver = make_shared>(); - ss._redis_server = rserver; - - auto& cfg = ss._db.local().get_config(); - auto addr = cfg.redis_rpc_address(); - auto ceo = cfg.client_encryption_options(); - auto keepalive = cfg.rpc_keepalive(); - return seastar::net::dns::resolve_name(addr).then([rserver, addr, &cfg, keepalive, ceo = std::move(ceo)] (seastar::net::inet_address ip) { - return rserver->start(std::ref(service::get_redis_storage_proxy())).then([rserver, &cfg, addr, ip, ceo, keepalive]() { - auto f = make_ready_future(); - struct listen_cfg { - ipv4_addr addr; - std::shared_ptr cred; - }; - - std::vector configs({ { ipv4_addr{ip, cfg.native_redis_transport_port()} }}); - - // main should have made sure values are clean and neatish - if (ceo.at("enabled") == "true") { - auto cred = std::make_shared(); - - cred->set_dh_level(seastar::tls::dh_params::level::MEDIUM); - - if (ceo.count("priority_string")) { - cred->set_priority_string(ceo.at("priority_string")); - } - if (ceo.count("require_client_auth") && ceo.at("require_client_auth") == "true") { - cred->set_client_auth(seastar::tls::client_auth::REQUIRE); - } - - f = cred->set_x509_key_file(ceo.at("certificate"), ceo.at("keyfile"), seastar::tls::x509_crt_format::PEM); - - if (ceo.count("truststore")) { - f = f.then([cred, f = ceo.at("truststore")] { return cred->set_x509_trust_file(f, seastar::tls::x509_crt_format::PEM); }); - } - - slogger.info("Enabling encrypted CQL connections between client and server"); - - if (cfg.native_redis_transport_port_ssl.is_set() && cfg.native_redis_transport_port_ssl() != cfg.native_redis_transport_port()) { - configs.emplace_back(listen_cfg{ipv4_addr{ip, cfg.native_redis_transport_port_ssl()}, std::move(cred)}); - } else { - configs.back().cred = std::move(cred); - } - } - - return f.then([rserver, configs = std::move(configs), keepalive] { - return parallel_for_each(configs, [rserver, keepalive](const listen_cfg & cfg) { - return rserver->invoke_on_all(&cql_transport::redis_server::listen, cfg.addr, cfg.cred, keepalive).then([cfg] { - slogger.info("Starting listening for REDIS clients on {} ({})", cfg.addr, cfg.cred ? "encrypted" : "unencrypted"); - }); - }); - }); - }); - }); - }); -} -future<> storage_service::do_stop_native_transport() { - auto cserver = _cql_server; - _cql_server = {}; - if (cserver) { - // FIXME: cql_server::stop() doesn't kill existing connections and wait for them - // Note: We must capture cserver so that it will not be freed before cserver->stop - return cserver->stop().then([cserver] { - slogger.info("CQL server stopped"); - }); - } - return make_ready_future<>(); -} - -future<> storage_service::stop_native_transport() { - return run_with_api_lock(sstring("stop_native_transport"), [] (storage_service& ss) { - return ss.do_stop_native_transport(); - }); -} - -future<> storage_service::stop_redis_transport() { - return run_with_api_lock(sstring("stop_native_transport"), [] (storage_service& ss) { - return ss.do_stop_redis_transport(); - }); -} - -future<> storage_service::do_stop_redis_transport() { - auto rserver = _redis_server; - _redis_server = {}; - if (rserver) { - return rserver->stop().then([rserver] { - slogger.info("REDIS server stopped"); - }); - } - return make_ready_future<>(); -} - - -future storage_service::is_native_transport_running() { - return run_with_no_api_lock([] (storage_service& ss) { - return bool(ss._cql_server); - }); -} - -future<> storage_service::decommission() { - return run_with_api_lock(sstring("decommission"), [] (storage_service& ss) { - return seastar::async([&ss] { - auto& tm = ss.get_token_metadata(); - auto& db = ss.db().local(); - if (!tm.is_member(ss.get_broadcast_address())) { - throw std::runtime_error("local node is not a member of the token ring yet"); - } - - if (tm.clone_after_all_left().sorted_tokens().size() < 2) { - throw std::runtime_error("no other normal nodes in the ring; decommission would be pointless"); - } - - if (ss._operation_mode != mode::NORMAL) { - throw std::runtime_error(sprint("Node in %s state; wait for status to become normal or restart", ss._operation_mode)); - } - - ss.update_pending_ranges().get(); - - auto non_system_keyspaces = db.get_non_system_keyspaces(); - for (const auto& keyspace_name : non_system_keyspaces) { - if (tm.get_pending_ranges(keyspace_name, ss.get_broadcast_address()).size() > 0) { - throw std::runtime_error("data is currently moving to this node; unable to leave the ring"); - } - } - - slogger.info("DECOMMISSIONING: starts"); - ss.start_leaving().get(); - // FIXME: long timeout = Math.max(RING_DELAY, BatchlogManager.instance.getBatchlogTimeout()); - auto timeout = ss.get_ring_delay(); - ss.set_mode(mode::LEAVING, sprint("sleeping %s ms for batch processing and pending range setup", timeout.count()), true); - sleep(timeout).get(); - - slogger.info("DECOMMISSIONING: unbootstrap starts"); - ss.unbootstrap(); - slogger.info("DECOMMISSIONING: unbootstrap done"); - - ss.shutdown_client_servers().get(); - slogger.info("DECOMMISSIONING: shutdown rpc and cql server done"); - - db::get_batchlog_manager().invoke_on_all([] (auto& bm) { - return bm.stop(); - }).get(); - slogger.info("DECOMMISSIONING: stop batchlog_manager done"); - - gms::stop_gossiping().get(); - slogger.info("DECOMMISSIONING: stop_gossiping done"); - ss.do_stop_ms().get(); - slogger.info("DECOMMISSIONING: stop messaging_service done"); - // StageManager.shutdownNow(); - db::system_keyspace::set_bootstrap_state(db::system_keyspace::bootstrap_state::DECOMMISSIONED).get(); - slogger.info("DECOMMISSIONING: set_bootstrap_state done"); - ss.set_mode(mode::DECOMMISSIONED, true); - slogger.info("DECOMMISSIONING: done"); - // let op be responsible for killing the process - }); - }); -} - -future<> storage_service::removenode(sstring host_id_string) { - return run_with_api_lock(sstring("removenode"), [host_id_string] (storage_service& ss) mutable { - return seastar::async([&ss, host_id_string] { - slogger.debug("removenode: host_id = {}", host_id_string); - auto my_address = ss.get_broadcast_address(); - auto& tm = ss._token_metadata; - auto local_host_id = tm.get_host_id(my_address); - auto host_id = utils::UUID(host_id_string); - auto endpoint_opt = tm.get_endpoint_for_host_id(host_id); - auto& gossiper = gms::get_local_gossiper(); - if (!endpoint_opt) { - throw std::runtime_error("Host ID not found."); - } - auto endpoint = *endpoint_opt; - - auto tokens = tm.get_tokens(endpoint); - - slogger.debug("removenode: endpoint = {}", endpoint); - - if (endpoint == my_address) { - throw std::runtime_error("Cannot remove self"); - } - - if (gossiper.get_live_members().count(endpoint)) { - throw std::runtime_error(sprint("Node %s is alive and owns this ID. Use decommission command to remove it from the ring", endpoint)); - } - - // A leaving endpoint that is dead is already being removed. - if (tm.is_leaving(endpoint)) { - slogger.warn("Node {} is already being removed, continuing removal anyway", endpoint); - } - - if (!ss._replicating_nodes.empty()) { - throw std::runtime_error("This node is already processing a removal. Wait for it to complete, or use 'removenode force' if this has failed."); - } - - auto non_system_keyspaces = ss.db().local().get_non_system_keyspaces(); - // Find the endpoints that are going to become responsible for data - for (const auto& keyspace_name : non_system_keyspaces) { - auto& ks = ss.db().local().find_keyspace(keyspace_name); - // if the replication factor is 1 the data is lost so we shouldn't wait for confirmation - if (ks.get_replication_strategy().get_replication_factor() == 1) { - slogger.warn("keyspace={} has replication factor 1, the data is probably lost", keyspace_name); - continue; - } - - // get all ranges that change ownership (that is, a node needs - // to take responsibility for new range) - std::unordered_multimap changed_ranges = - ss.get_changed_ranges_for_leaving(keyspace_name, endpoint); - auto& fd = gms::get_local_failure_detector(); - for (auto& x: changed_ranges) { - auto ep = x.second; - if (fd.is_alive(ep)) { - ss._replicating_nodes.emplace(ep); - } else { - slogger.warn("Endpoint {} is down and will not receive data for re-replication of {}", ep, endpoint); - } - } - } - slogger.info("removenode: endpoint = {}, replicating_nodes = {}", endpoint, ss._replicating_nodes); - ss._removing_node = endpoint; - tm.add_leaving_endpoint(endpoint); - ss.update_pending_ranges().get(); - - // the gossiper will handle spoofing this node's state to REMOVING_TOKEN for us - // we add our own token so other nodes to let us know when they're done - gossiper.advertise_removing(endpoint, host_id, local_host_id).get(); - - // kick off streaming commands - // No need to wait for restore_replica_count to complete, since - // when it completes, the node will be removed from _replicating_nodes, - // and we wait for _replicating_nodes to become empty below - ss.restore_replica_count(endpoint, my_address).handle_exception([endpoint, my_address] (auto ep) { - slogger.info("Failed to restore_replica_count for node {} on node {}", endpoint, my_address); - }); - - // wait for ReplicationFinishedVerbHandler to signal we're done - while (!(ss._replicating_nodes.empty() || ss._force_remove_completion)) { - sleep(std::chrono::milliseconds(100)).get(); - } - - if (ss._force_remove_completion) { - ss._force_remove_completion = false; - throw std::runtime_error("nodetool removenode force is called by user"); - } - - std::unordered_set tmp(tokens.begin(), tokens.end()); - ss.excise(std::move(tmp), endpoint); - - // gossiper will indicate the token has left - gossiper.advertise_token_removed(endpoint, host_id).get(); - - ss._replicating_nodes.clear(); - ss._removing_node = std::experimental::nullopt; - }); - }); -} - -// Runs inside seastar::async context -void storage_service::flush_column_families() { - service::get_storage_service().invoke_on_all([] (auto& ss) { - auto& local_db = ss.db().local(); - auto non_system_cfs = local_db.get_column_families() | boost::adaptors::filtered([] (auto& uuid_and_cf) { - auto cf = uuid_and_cf.second; - return !is_system_keyspace(cf->schema()->ks_name()); - }); - // count CFs first - auto total_cfs = boost::distance(non_system_cfs); - ss._drain_progress.total_cfs = total_cfs; - ss._drain_progress.remaining_cfs = total_cfs; - // flush - return parallel_for_each(non_system_cfs, [&ss] (auto&& uuid_and_cf) { - auto cf = uuid_and_cf.second; - return cf->flush().then([&ss] { - ss._drain_progress.remaining_cfs--; - }); - }); - }).get(); - // flush the system ones after all the rest are done, just in case flushing modifies any system state - // like CASSANDRA-5151. don't bother with progress tracking since system data is tiny. - service::get_storage_service().invoke_on_all([] (auto& ss) { - auto& local_db = ss.db().local(); - auto system_cfs = local_db.get_column_families() | boost::adaptors::filtered([] (auto& uuid_and_cf) { - auto cf = uuid_and_cf.second; - return is_system_keyspace(cf->schema()->ks_name()); - }); - return parallel_for_each(system_cfs, [&ss] (auto&& uuid_and_cf) { - auto cf = uuid_and_cf.second; - return cf->flush(); - }); - }).get(); -} - -future<> storage_service::drain() { - return run_with_api_lock(sstring("drain"), [] (storage_service& ss) { - return seastar::async([&ss] { - if (ss._operation_mode == mode::DRAINED) { - slogger.warn("Cannot drain node (did it already happen?)"); - return; - } - if (drain_in_progress) { - drain_in_progress->get(); - ss.set_mode(mode::DRAINED, true); - return; - } - promise<> p; - drain_in_progress = p.get_future(); - - ss.set_mode(mode::DRAINING, "starting drain process", true); - ss.shutdown_client_servers().get(); - gms::stop_gossiping().get(); - - ss.set_mode(mode::DRAINING, "shutting down messaging_service", false); - ss.do_stop_ms().get(); - -#if 0 - StorageProxy.instance.verifyNoHintsInProgress(); -#endif - - ss.set_mode(mode::DRAINING, "flushing column families", false); - ss.flush_column_families(); - - db::get_batchlog_manager().invoke_on_all([] (auto& bm) { - return bm.stop(); - }).get(); - - // Interrupt on going compaction and shutdown to prevent further compaction - ss.db().invoke_on_all([] (auto& db) { - // FIXME: ongoing compaction tasks should be interrupted, not - // waited for which is what compaction_manager::stop() does now. - return db.get_compaction_manager().stop(); - }).get(); - -#if 0 - // whilst we've flushed all the CFs, which will have recycled all completed segments, we want to ensure - // there are no segments to replay, so we force the recycling of any remaining (should be at most one) - CommitLog.instance.forceRecycleAllSegments(); -#endif - - ss.db().invoke_on_all([] (auto& db) { - return db.commitlog()->shutdown(); - }).get(); - - ss.set_mode(mode::DRAINED, true); - p.set_value(); - }); - }); -} - -double storage_service::get_load() { - double bytes = 0; -#if 0 - for (String keyspaceName : Schema.instance.getKeyspaces()) - { - Keyspace keyspace = Schema.instance.getKeyspaceInstance(keyspaceName); - if (keyspace == null) - continue; - for (ColumnFamilyStore cfs : keyspace.getColumnFamilyStores()) - bytes += cfs.getLiveDiskSpaceUsed(); - } -#endif - return bytes; -} - -sstring storage_service::get_load_string() { - return sprint("%f", get_load()); -} - -future> storage_service::get_load_map() { - return run_with_no_api_lock([] (storage_service& ss) { - std::map load_map; - auto& lb = ss.get_load_broadcaster(); - if (lb) { - for (auto& x : lb->get_load_info()) { - load_map.emplace(sprint("%s", x.first), x.second); - slogger.debug("get_load_map endpoint={}, load={}", x.first, x.second); - } - } else { - slogger.debug("load_broadcaster is not set yet!"); - } - load_map.emplace(sprint("%s", ss.get_broadcast_address()), ss.get_load()); - return load_map; - }); -} - - -future<> storage_service::rebuild(sstring source_dc) { - return run_with_api_lock(sstring("rebuild"), [source_dc] (storage_service& ss) { - slogger.info("rebuild from dc: {}", source_dc == "" ? "(any dc)" : source_dc); - auto streamer = make_lw_shared(ss._db, ss._token_metadata, ss.get_broadcast_address(), "Rebuild"); - streamer->add_source_filter(std::make_unique(gms::get_local_failure_detector())); - if (source_dc != "") { - streamer->add_source_filter(std::make_unique(source_dc)); - } - for (const auto& keyspace_name : ss._db.local().get_non_system_keyspaces()) { - streamer->add_ranges(keyspace_name, ss.get_local_ranges(keyspace_name)); - } - return streamer->fetch_async().then_wrapped([streamer] (auto&& f) { - try { - auto state = f.get0(); - } catch (...) { - // This is used exclusively through JMX, so log the full trace but only throw a simple RTE - slogger.error("Error while rebuilding node: {}", std::current_exception()); - throw std::runtime_error(sprint("Error while rebuilding node: %s", std::current_exception())); - } - return make_ready_future<>(); - }); - }); -} - -int32_t storage_service::get_exception_count() { - // FIXME - // We return 0 for no exceptions, it should probably be - // replaced by some general exception handling that would count - // the unhandled exceptions. - //return (int)StorageMetrics.exceptions.count(); - return 0; -} - -future storage_service::is_initialized() { - return run_with_no_api_lock([] (storage_service& ss) { - return ss._initialized; - }); -} - -std::unordered_multimap storage_service::get_changed_ranges_for_leaving(sstring keyspace_name, inet_address endpoint) { - // First get all ranges the leaving endpoint is responsible for - auto ranges = get_ranges_for_endpoint(keyspace_name, endpoint); - - slogger.debug("Node {} ranges [{}]", endpoint, ranges); - - std::unordered_map> current_replica_endpoints; - - // Find (for each range) all nodes that store replicas for these ranges as well - auto metadata = _token_metadata.clone_only_token_map(); // don't do this in the loop! #7758 - for (auto& r : ranges) { - auto& ks = _db.local().find_keyspace(keyspace_name); - auto end_token = r.end() ? r.end()->value() : dht::maximum_token(); - auto eps = ks.get_replication_strategy().calculate_natural_endpoints(end_token, metadata); - current_replica_endpoints.emplace(r, std::move(eps)); - } - - auto temp = _token_metadata.clone_after_all_left(); - - // endpoint might or might not be 'leaving'. If it was not leaving (that is, removenode - // command was used), it is still present in temp and must be removed. - if (temp.is_member(endpoint)) { - temp.remove_endpoint(endpoint); - } - - std::unordered_multimap changed_ranges; - - // Go through the ranges and for each range check who will be - // storing replicas for these ranges when the leaving endpoint - // is gone. Whoever is present in newReplicaEndpoints list, but - // not in the currentReplicaEndpoints list, will be needing the - // range. - for (auto& r : ranges) { - auto& ks = _db.local().find_keyspace(keyspace_name); - auto end_token = r.end() ? r.end()->value() : dht::maximum_token(); - auto new_replica_endpoints = ks.get_replication_strategy().calculate_natural_endpoints(end_token, temp); - - auto rg = current_replica_endpoints.equal_range(r); - for (auto it = rg.first; it != rg.second; it++) { - const dht::token_range& range_ = it->first; - std::vector& current_eps = it->second; - slogger.debug("range={}, current_replica_endpoints={}, new_replica_endpoints={}", range_, current_eps, new_replica_endpoints); - for (auto ep : it->second) { - auto beg = new_replica_endpoints.begin(); - auto end = new_replica_endpoints.end(); - new_replica_endpoints.erase(std::remove(beg, end, ep), end); - } - } - - if (slogger.is_enabled(logging::log_level::debug)) { - if (new_replica_endpoints.empty()) { - slogger.debug("Range {} already in all replicas", r); - } else { - slogger.debug("Range {} will be responsibility of {}", r, new_replica_endpoints); - } - } - for (auto& ep : new_replica_endpoints) { - changed_ranges.emplace(r, ep); - } - } - - return changed_ranges; -} - -// Runs inside seastar::async context -void storage_service::unbootstrap() { - std::unordered_map> ranges_to_stream; - - auto non_system_keyspaces = _db.local().get_non_system_keyspaces(); - for (const auto& keyspace_name : non_system_keyspaces) { - auto ranges_mm = get_changed_ranges_for_leaving(keyspace_name, get_broadcast_address()); - if (slogger.is_enabled(logging::log_level::debug)) { - std::vector> ranges; - for (auto& x : ranges_mm) { - ranges.push_back(x.first); - } - slogger.debug("Ranges needing transfer for keyspace={} are [{}]", keyspace_name, ranges); - } - ranges_to_stream.emplace(keyspace_name, std::move(ranges_mm)); - } - - set_mode(mode::LEAVING, "replaying batch log and streaming data to other nodes", true); - - auto stream_success = stream_ranges(ranges_to_stream); - // Wait for batch log to complete before streaming hints. - slogger.debug("waiting for batch log processing."); - // Start with BatchLog replay, which may create hints but no writes since this is no longer a valid endpoint. - db::get_local_batchlog_manager().do_batch_log_replay().get(); - - set_mode(mode::LEAVING, "streaming hints to other nodes", true); - - auto hints_success = stream_hints(); - - // wait for the transfer runnables to signal the latch. - slogger.debug("waiting for stream acks."); - try { - stream_success.get(); - hints_success.get(); - } catch (...) { - slogger.warn("unbootstrap fails to stream : {}", std::current_exception()); - throw; - } - slogger.debug("stream acks all received."); - leave_ring(); -} - -future<> storage_service::restore_replica_count(inet_address endpoint, inet_address notify_endpoint) { - std::unordered_multimap> ranges_to_fetch; - - auto my_address = get_broadcast_address(); - - auto non_system_keyspaces = _db.local().get_non_system_keyspaces(); - for (const auto& keyspace_name : non_system_keyspaces) { - std::unordered_multimap changed_ranges = get_changed_ranges_for_leaving(keyspace_name, endpoint); - dht::token_range_vector my_new_ranges; - for (auto& x : changed_ranges) { - if (x.second == my_address) { - my_new_ranges.emplace_back(x.first); - } - } - std::unordered_multimap source_ranges = get_new_source_ranges(keyspace_name, my_new_ranges); - std::unordered_map tmp; - for (auto& x : source_ranges) { - tmp[x.first].emplace_back(x.second); - } - ranges_to_fetch.emplace(keyspace_name, std::move(tmp)); - } - auto sp = make_lw_shared("Restore replica count"); - for (auto& x: ranges_to_fetch) { - const sstring& keyspace_name = x.first; - std::unordered_map& maps = x.second; - for (auto& m : maps) { - auto source = m.first; - auto ranges = m.second; - slogger.debug("Requesting from {} ranges {}", source, ranges); - sp->request_ranges(source, keyspace_name, ranges); - } - } - return sp->execute().then_wrapped([this, sp, notify_endpoint] (auto&& f) { - try { - auto state = f.get0(); - return this->send_replication_notification(notify_endpoint); - } catch (...) { - slogger.warn("Streaming to restore replica count failed: {}", std::current_exception()); - // We still want to send the notification - return this->send_replication_notification(notify_endpoint); - } - return make_ready_future<>(); - }); -} - -// Runs inside seastar::async context -void storage_service::excise(std::unordered_set tokens, inet_address endpoint) { - slogger.info("Removing tokens {} for {}", tokens, endpoint); - // FIXME: HintedHandOffManager.instance.deleteHintsForEndpoint(endpoint); - remove_endpoint(endpoint); - _token_metadata.remove_endpoint(endpoint); - _token_metadata.remove_bootstrap_tokens(tokens); - - get_storage_service().invoke_on_all([endpoint] (auto&& ss) { - for (auto&& subscriber : ss._lifecycle_subscribers) { - try { - subscriber->on_leave_cluster(endpoint); - } catch (...) { - slogger.warn("Leave cluster notification failed {}: {}", endpoint, std::current_exception()); - } - } - }).get(); - - update_pending_ranges().get(); -} - -void storage_service::excise(std::unordered_set tokens, inet_address endpoint, int64_t expire_time) { - add_expire_time_if_found(endpoint, expire_time); - excise(tokens, endpoint); -} - -future<> storage_service::send_replication_notification(inet_address remote) { - // notify the remote token - auto done = make_shared(false); - auto local = get_broadcast_address(); - slogger.debug("Notifying {} of replication completion", remote); - return do_until( - [done, remote] { - return *done || !gms::get_local_failure_detector().is_alive(remote); - }, - [done, remote, local] { - auto& ms = netw::get_local_messaging_service(); - netw::msg_addr id{remote, 0}; - return ms.send_replication_finished(id, local).then_wrapped([id, done] (auto&& f) { - try { - f.get(); - *done = true; - } catch (...) { - slogger.warn("Fail to send REPLICATION_FINISHED to {}: {}", id, std::current_exception()); - } - }); - } - ); -} - -future<> storage_service::confirm_replication(inet_address node) { - return run_with_no_api_lock([node] (storage_service& ss) { - auto removing_node = bool(ss._removing_node) ? sprint("%s", *ss._removing_node) : "NONE"; - slogger.info("Got confirm_replication from {}, removing_node {}", node, removing_node); - // replicatingNodes can be empty in the case where this node used to be a removal coordinator, - // but restarted before all 'replication finished' messages arrived. In that case, we'll - // still go ahead and acknowledge it. - if (!ss._replicating_nodes.empty()) { - ss._replicating_nodes.erase(node); - } else { - slogger.info("Received unexpected REPLICATION_FINISHED message from {}. Was this node recently a removal coordinator?", node); - } - }); -} - -// Runs inside seastar::async context -void storage_service::leave_ring() { - db::system_keyspace::set_bootstrap_state(db::system_keyspace::bootstrap_state::NEEDS_BOOTSTRAP).get(); - _token_metadata.remove_endpoint(get_broadcast_address()); - update_pending_ranges().get(); - - auto& gossiper = gms::get_local_gossiper(); - auto expire_time = gossiper.compute_expire_time().time_since_epoch().count(); - gossiper.add_local_application_state(gms::application_state::STATUS, value_factory.left(get_local_tokens().get0(), expire_time)).get(); - auto delay = std::max(get_ring_delay(), gms::gossiper::INTERVAL); - slogger.info("Announcing that I have left the ring for {}ms", delay.count()); - sleep(delay).get(); -} - -future<> -storage_service::stream_ranges(std::unordered_map> ranges_to_stream_by_keyspace) { - // First, we build a list of ranges to stream to each host, per table - std::unordered_map> sessions_to_stream_by_keyspace; - for (auto& entry : ranges_to_stream_by_keyspace) { - const auto& keyspace = entry.first; - auto& ranges_with_endpoints = entry.second; - - if (ranges_with_endpoints.empty()) { - continue; - } - - std::unordered_map ranges_per_endpoint; - for (auto& end_point_entry : ranges_with_endpoints) { - dht::token_range r = end_point_entry.first; - inet_address endpoint = end_point_entry.second; - ranges_per_endpoint[endpoint].emplace_back(r); - } - sessions_to_stream_by_keyspace.emplace(keyspace, std::move(ranges_per_endpoint)); - } - auto sp = make_lw_shared("Unbootstrap"); - for (auto& entry : sessions_to_stream_by_keyspace) { - const auto& keyspace_name = entry.first; - // TODO: we can move to avoid copy of std::vector - auto& ranges_per_endpoint = entry.second; - - for (auto& ranges_entry : ranges_per_endpoint) { - auto& ranges = ranges_entry.second; - auto new_endpoint = ranges_entry.first; - // TODO each call to transferRanges re-flushes, this is potentially a lot of waste - sp->transfer_ranges(new_endpoint, keyspace_name, ranges); - } - } - return sp->execute().discard_result().then([sp] { - slogger.info("stream_ranges successful"); - }).handle_exception([] (auto ep) { - slogger.info("stream_ranges failed: {}", ep); - return make_exception_future(std::runtime_error("stream_ranges failed")); - }); -} - -future<> storage_service::stream_hints() { - // FIXME: flush hits column family -#if 0 - // StreamPlan will not fail if there are zero files to transfer, so flush anyway (need to get any in-memory hints, as well) - ColumnFamilyStore hintsCF = Keyspace.open(SystemKeyspace.NAME).getColumnFamilyStore(SystemKeyspace.HINTS); - FBUtilities.waitOnFuture(hintsCF.forceFlush()); -#endif - - // gather all live nodes in the cluster that aren't also leaving - auto candidates = get_local_storage_service().get_token_metadata().clone_after_all_left().get_all_endpoints(); - auto beg = candidates.begin(); - auto end = candidates.end(); - auto remove_fn = [br = get_broadcast_address()] (const inet_address& ep) { - return ep == br || !gms::get_local_failure_detector().is_alive(ep); - }; - candidates.erase(std::remove_if(beg, end, remove_fn), end); - - if (candidates.empty()) { - slogger.warn("Unable to stream hints since no live endpoints seen"); - throw std::runtime_error("Unable to stream hints since no live endpoints seen"); - } else { - // stream to the closest peer as chosen by the snitch - auto& snitch = locator::i_endpoint_snitch::get_local_snitch_ptr(); - - snitch->sort_by_proximity(get_broadcast_address(), candidates); - auto hints_destination_host = candidates.front(); - - // stream all hints -- range list will be a singleton of "the entire ring" - dht::token_range_vector ranges = {dht::token_range::make_open_ended_both_sides()}; - slogger.debug("stream_hints: ranges={}", ranges); - - auto sp = make_lw_shared("Hints"); - std::vector column_families = { db::system_keyspace::HINTS }; - auto keyspace = db::system_keyspace::NAME; - sp->transfer_ranges(hints_destination_host, keyspace, ranges, column_families); - return sp->execute().discard_result().then([sp] { - slogger.info("stream_hints successful"); - }).handle_exception([] (auto ep) { - slogger.info("stream_hints failed: {}", ep); - return make_exception_future(std::runtime_error("stream_hints failed")); - }); - } -} - -future<> storage_service::start_leaving() { - auto& gossiper = gms::get_local_gossiper(); - return gossiper.add_local_application_state(application_state::STATUS, value_factory.leaving(get_local_tokens().get0())).then([this] { - _token_metadata.add_leaving_endpoint(get_broadcast_address()); - return update_pending_ranges(); - }); -} - -void storage_service::add_expire_time_if_found(inet_address endpoint, int64_t expire_time) { - if (expire_time != 0L) { - using clk = gms::gossiper::clk; - auto time = clk::time_point(clk::duration(expire_time)); - gms::get_local_gossiper().add_expire_time_for_endpoint(endpoint, time); - } -} - -// For more details, see the commends on column_family::load_new_sstables -// All the global operations are going to happen here, and just the reloading happens -// in there. -future<> storage_service::load_new_sstables(sstring ks_name, sstring cf_name) { - class max_element { - int64_t _result = 0; - public: - future<> operator()(int64_t value) { - _result = std::max(value, _result); - return make_ready_future<>(); - } - int64_t get() && { - return _result; - } - }; - - if (_loading_new_sstables) { - throw std::runtime_error("Already loading SSTables. Try again later"); - } else { - _loading_new_sstables = true; - } - - slogger.info("Loading new SSTables for {}.{}...", ks_name, cf_name); - - // First, we need to stop SSTable creation for that CF in all shards. This is a really horrible - // thing to do, because under normal circumnstances this can make dirty memory go up to the point - // of explosion. - // - // Remember, however, that we are assuming this is going to be ran on an empty CF. In that scenario, - // stopping the SSTables should have no effect, while guaranteeing we will see no data corruption - // * in case * this is ran on a live CF. - // - // The statement above is valid at least from the Scylla side of things: it is still totally possible - // that someones just copies the table over existing ones. There isn't much we can do about it. - return _db.map_reduce(max_element(), [ks_name, cf_name] (database& db) { - auto& cf = db.find_column_family(ks_name, cf_name); - return cf.disable_sstable_write(); - }).then([this, cf_name, ks_name] (int64_t max_seen_sstable) { - // Then, we will reshuffle the tables to make sure that the generation numbers don't go too high. - // We will do all of it the same CPU, to make sure that we won't have two parallel shufflers stepping - // onto each other. - - class all_generations { - std::set _result; - public: - future<> operator()(std::set value) { - _result.insert(value.begin(), value.end()); - return make_ready_future<>(); - } - std::set get() && { - return _result; - } - }; - - // We provide to reshuffle_sstables() the generation of all existing sstables, such that it will - // easily know which sstables are new. - return _db.map_reduce(all_generations(), [ks_name, cf_name] (database& db) { - auto& cf = db.find_column_family(ks_name, cf_name); - std::set generations; - for (auto& p : *(cf.get_sstables())) { - generations.insert(p->generation()); - } - return make_ready_future>(std::move(generations)); - }).then([this, max_seen_sstable, ks_name, cf_name] (std::set all_generations) { - auto shard = std::hash()(cf_name) % smp::count; - return _db.invoke_on(shard, [ks_name, cf_name, max_seen_sstable, all_generations = std::move(all_generations)] (database& db) { - auto& cf = db.find_column_family(ks_name, cf_name); - return cf.reshuffle_sstables(std::move(all_generations), max_seen_sstable + 1); - }); - }); - }).then_wrapped([this, ks_name, cf_name] (future> f) { - std::vector new_tables; - std::exception_ptr eptr; - int64_t new_gen = -1; - - try { - new_tables = f.get0(); - } catch(std::exception& e) { - slogger.error("Loading of new tables failed to {}.{} due to {}", ks_name, cf_name, e.what()); - eptr = std::current_exception(); - } catch(...) { - slogger.error("Loading of new tables failed to {}.{} due to unexpected reason", ks_name, cf_name); - eptr = std::current_exception(); - } - - if (new_tables.size() > 0) { - new_gen = new_tables.back().generation; - } - - slogger.debug("Now accepting writes for sstables with generation larger or equal than {}", new_gen); - return _db.invoke_on_all([ks_name, cf_name, new_gen] (database& db) { - auto& cf = db.find_column_family(ks_name, cf_name); - auto disabled = std::chrono::duration_cast(cf.enable_sstable_write(new_gen)).count(); - slogger.info("CF {}.{} at shard {} had SSTables writes disabled for {} usec", ks_name, cf_name, engine().cpu_id(), disabled); - return make_ready_future<>(); - }).then([new_tables = std::move(new_tables), eptr = std::move(eptr)] { - if (eptr) { - return make_exception_future>(eptr); - } - return make_ready_future>(std::move(new_tables)); - }); - }).then([this, ks_name, cf_name] (std::vector new_tables) { - auto f = distributed_loader::flush_upload_dir(_db, ks_name, cf_name); - return f.then([new_tables = std::move(new_tables), ks_name, cf_name] (std::vector new_tables_from_upload) mutable { - if (new_tables.empty() && new_tables_from_upload.empty()) { - slogger.info("No new SSTables were found for {}.{}", ks_name, cf_name); - } - // merge new sstables found in both column family and upload directories, if any. - new_tables.insert(new_tables.end(), new_tables_from_upload.begin(), new_tables_from_upload.end()); - return make_ready_future>(std::move(new_tables)); - }); - }).then([this, ks_name, cf_name] (std::vector new_tables) { - return distributed_loader::load_new_sstables(_db, ks_name, cf_name, std::move(new_tables)).then([ks_name, cf_name] { - slogger.info("Done loading new SSTables for {}.{} for all shards", ks_name, cf_name); - }); - }).finally([this] { - _loading_new_sstables = false; - }); -} - -void storage_service::set_load_broadcaster(shared_ptr lb) { - _lb = lb; -} - -shared_ptr& storage_service::get_load_broadcaster() { - return _lb; -} - -future<> storage_service::shutdown_client_servers() { - return do_stop_rpc_server().then([this] { return do_stop_native_transport(); }); -} - -std::unordered_multimap -storage_service::get_new_source_ranges(const sstring& keyspace_name, const dht::token_range_vector& ranges) { - auto my_address = get_broadcast_address(); - auto& fd = gms::get_local_failure_detector(); - auto& ks = _db.local().find_keyspace(keyspace_name); - auto& strat = ks.get_replication_strategy(); - auto tm = _token_metadata.clone_only_token_map(); - std::unordered_multimap range_addresses = strat.get_range_addresses(tm); - std::unordered_multimap source_ranges; - - // find alive sources for our new ranges - for (auto r : ranges) { - std::unordered_set possible_ranges; - auto rg = range_addresses.equal_range(r); - for (auto it = rg.first; it != rg.second; it++) { - possible_ranges.emplace(it->second); - } - auto& snitch = locator::i_endpoint_snitch::get_local_snitch_ptr(); - std::vector sources = snitch->get_sorted_list_by_proximity(my_address, possible_ranges); - - if (std::find(sources.begin(), sources.end(), my_address) != sources.end()) { - auto err = sprint("get_new_source_ranges: sources=%s, my_address=%s", sources, my_address); - slogger.warn(err.c_str()); - throw std::runtime_error(err); - } - - - for (auto& source : sources) { - if (fd.is_alive(source)) { - source_ranges.emplace(source, r); - break; - } - } - } - return source_ranges; -} - -std::pair, std::unordered_set> -storage_service::calculate_stream_and_fetch_ranges(const dht::token_range_vector& current, const dht::token_range_vector& updated) { - std::unordered_set to_stream; - std::unordered_set to_fetch; - - for (auto r1 : current) { - bool intersect = false; - for (auto r2 : updated) { - if (r1.overlaps(r2, dht::token_comparator())) { - // adding difference ranges to fetch from a ring - for (auto r : r1.subtract(r2, dht::token_comparator())) { - to_stream.emplace(r); - } - intersect = true; - } - } - if (!intersect) { - to_stream.emplace(r1); // should seed whole old range - } - } - - for (auto r2 : updated) { - bool intersect = false; - for (auto r1 : current) { - if (r2.overlaps(r1, dht::token_comparator())) { - // adding difference ranges to fetch from a ring - for (auto r : r2.subtract(r1, dht::token_comparator())) { - to_fetch.emplace(r); - } - intersect = true; - } - } - if (!intersect) { - to_fetch.emplace(r2); // should fetch whole old range - } - } - - if (slogger.is_enabled(logging::log_level::debug)) { - slogger.debug("current = {}", current); - slogger.debug("updated = {}", updated); - slogger.debug("to_stream = {}", to_stream); - slogger.debug("to_fetch = {}", to_fetch); - } - - return std::pair, std::unordered_set>(to_stream, to_fetch); -} - -void storage_service::range_relocator::calculate_to_from_streams(std::unordered_set new_tokens, std::vector keyspace_names) { - auto& ss = get_local_storage_service(); - - auto local_address = ss.get_broadcast_address(); - auto& snitch = locator::i_endpoint_snitch::get_local_snitch_ptr(); - - auto token_meta_clone_all_settled = ss._token_metadata.clone_after_all_settled(); - // clone to avoid concurrent modification in calculateNaturalEndpoints - auto token_meta_clone = ss._token_metadata.clone_only_token_map(); - - for (auto keyspace : keyspace_names) { - slogger.debug("Calculating ranges to stream and request for keyspace {}", keyspace); - for (auto new_token : new_tokens) { - // replication strategy of the current keyspace (aka table) - auto& ks = ss._db.local().find_keyspace(keyspace); - auto& strategy = ks.get_replication_strategy(); - // getting collection of the currently used ranges by this keyspace - dht::token_range_vector current_ranges = ss.get_ranges_for_endpoint(keyspace, local_address); - // collection of ranges which this node will serve after move to the new token - dht::token_range_vector updated_ranges = strategy.get_pending_address_ranges(token_meta_clone, new_token, local_address); - - // ring ranges and endpoints associated with them - // this used to determine what nodes should we ping about range data - std::unordered_multimap range_addresses = strategy.get_range_addresses(token_meta_clone); - std::unordered_map> range_addresses_map; - for (auto& x : range_addresses) { - range_addresses_map[x.first].emplace_back(x.second); - } - - // calculated parts of the ranges to request/stream from/to nodes in the ring - // std::pair(to_stream, to_fetch) - std::pair, std::unordered_set> ranges_per_keyspace = - ss.calculate_stream_and_fetch_ranges(current_ranges, updated_ranges); - /** - * In this loop we are going through all ranges "to fetch" and determining - * nodes in the ring responsible for data we are interested in - */ - std::unordered_multimap ranges_to_fetch_with_preferred_endpoints; - for (dht::token_range to_fetch : ranges_per_keyspace.second) { - for (auto& x : range_addresses_map) { - const dht::token_range& r = x.first; - std::vector& eps = x.second; - if (r.contains(to_fetch, dht::token_comparator())) { - std::vector endpoints; - if (dht::range_streamer::use_strict_consistency()) { - auto end_token = to_fetch.end() ? to_fetch.end()->value() : dht::maximum_token(); - std::vector old_endpoints = eps; - std::vector new_endpoints = strategy.calculate_natural_endpoints(end_token, token_meta_clone_all_settled); - - //Due to CASSANDRA-5953 we can have a higher RF then we have endpoints. - //So we need to be careful to only be strict when endpoints == RF - if (old_endpoints.size() == strategy.get_replication_factor()) { - for (auto n : new_endpoints) { - auto beg = old_endpoints.begin(); - auto end = old_endpoints.end(); - old_endpoints.erase(std::remove(beg, end, n), end); - } - //No relocation required - if (old_endpoints.empty()) { - continue; - } - - if (old_endpoints.size() != 1) { - throw std::runtime_error(sprint("Expected 1 endpoint but found %d", old_endpoints.size())); - } - } - endpoints.emplace_back(old_endpoints.front()); - } else { - std::unordered_set eps_set(eps.begin(), eps.end()); - endpoints = snitch->get_sorted_list_by_proximity(local_address, eps_set); - } - - // storing range and preferred endpoint set - for (auto ep : endpoints) { - ranges_to_fetch_with_preferred_endpoints.emplace(to_fetch, ep); - } - } - } - - std::vector address_list; - auto rg = ranges_to_fetch_with_preferred_endpoints.equal_range(to_fetch); - for (auto it = rg.first; it != rg.second; it++) { - address_list.push_back(it->second); - } - - if (address_list.empty()) { - continue; - } - - if (dht::range_streamer::use_strict_consistency()) { - if (address_list.size() > 1) { - throw std::runtime_error(sprint("Multiple strict sources found for %s", to_fetch)); - } - - auto source_ip = address_list.front(); - auto& gossiper = gms::get_local_gossiper(); - auto state = gossiper.get_endpoint_state_for_endpoint(source_ip); - if (gossiper.is_enabled() && state && !state->is_alive()) - throw std::runtime_error(sprint("A node required to move the data consistently is down (%s). If you wish to move the data from a potentially inconsistent replica, restart the node with consistent_rangemovement=false", source_ip)); - } - } - // calculating endpoints to stream current ranges to if needed - // in some situations node will handle current ranges as part of the new ranges - std::unordered_multimap endpoint_ranges; - std::unordered_map endpoint_ranges_map; - for (dht::token_range to_stream : ranges_per_keyspace.first) { - auto end_token = to_stream.end() ? to_stream.end()->value() : dht::maximum_token(); - std::vector current_endpoints = strategy.calculate_natural_endpoints(end_token, token_meta_clone); - std::vector new_endpoints = strategy.calculate_natural_endpoints(end_token, token_meta_clone_all_settled); - slogger.debug("Range: {} Current endpoints: {} New endpoints: {}", to_stream, current_endpoints, new_endpoints); - std::sort(current_endpoints.begin(), current_endpoints.end()); - std::sort(new_endpoints.begin(), new_endpoints.end()); - - std::vector diff; - std::set_difference(new_endpoints.begin(), new_endpoints.end(), - current_endpoints.begin(), current_endpoints.end(), std::back_inserter(diff)); - for (auto address : diff) { - slogger.debug("Range {} has new owner {}", to_stream, address); - endpoint_ranges.emplace(address, to_stream); - } - } - for (auto& x : endpoint_ranges) { - endpoint_ranges_map[x.first].emplace_back(x.second); - } - - // stream ranges - for (auto& x : endpoint_ranges_map) { - auto& address = x.first; - auto& ranges = x.second; - slogger.debug("Will stream range {} of keyspace {} to endpoint {}", ranges , keyspace, address); - _stream_plan.transfer_ranges(address, keyspace, ranges); - } - - // stream requests - std::unordered_multimap work = - dht::range_streamer::get_work_map(ranges_to_fetch_with_preferred_endpoints, keyspace); - std::unordered_map work_map; - for (auto& x : work) { - work_map[x.first].emplace_back(x.second); - } - - for (auto& x : work_map) { - auto& address = x.first; - auto& ranges = x.second; - slogger.debug("Will request range {} of keyspace {} from endpoint {}", ranges, keyspace, address); - _stream_plan.request_ranges(address, keyspace, ranges); - } - if (slogger.is_enabled(logging::log_level::debug)) { - for (auto& x : work) { - slogger.debug("Keyspace {}: work map ep = {} --> range = {}", keyspace, x.first, x.second); - } - } - } - } -} - -future<> storage_service::move(token new_token) { - return run_with_api_lock(sstring("move"), [new_token] (storage_service& ss) mutable { - return seastar::async([new_token, &ss] { - auto tokens = ss._token_metadata.sorted_tokens(); - if (std::find(tokens.begin(), tokens.end(), new_token) != tokens.end()) { - throw std::runtime_error(sprint("target token %s is already owned by another node.", new_token)); - } - - // address of the current node - auto local_address = ss.get_broadcast_address(); - - // This doesn't make any sense in a vnodes environment. - if (ss.get_token_metadata().get_tokens(local_address).size() > 1) { - slogger.error("Invalid request to move(Token); This node has more than one token and cannot be moved thusly."); - throw std::runtime_error("This node has more than one token and cannot be moved thusly."); - } - - auto keyspaces_to_process = ss._db.local().get_non_system_keyspaces(); - - ss.update_pending_ranges().get(); - - // checking if data is moving to this node - for (auto keyspace_name : keyspaces_to_process) { - if (ss._token_metadata.get_pending_ranges(keyspace_name, local_address).size() > 0) { - throw std::runtime_error("data is currently moving to this node; unable to leave the ring"); - } - } - - gms::get_local_gossiper().add_local_application_state(application_state::STATUS, ss.value_factory.moving(new_token)).get(); - ss.set_mode(mode::MOVING, sprint("Moving %s from %s to %s.", local_address, *(ss.get_local_tokens().get0().begin()), new_token), true); - - ss.set_mode(mode::MOVING, sprint("Sleeping %d ms before start streaming/fetching ranges", ss.get_ring_delay().count()), true); - sleep(ss.get_ring_delay()).get(); - - storage_service::range_relocator relocator(std::unordered_set{new_token}, keyspaces_to_process); - - if (relocator.streams_needed()) { - ss.set_mode(mode::MOVING, "fetching new ranges and streaming old ranges", true); - try { - relocator.stream().get(); - } catch (...) { - throw std::runtime_error(sprint("Interrupted while waiting for stream/fetch ranges to finish: %s", std::current_exception())); - } - } else { - ss.set_mode(mode::MOVING, "No ranges to fetch/stream", true); - } - - ss.set_tokens(std::unordered_set{new_token}); // setting new token as we have everything settled - - slogger.debug("Successfully moved to new token {}", *(ss.get_local_tokens().get0().begin())); - }); - }); -} - -std::vector -storage_service::describe_ring(const sstring& keyspace, bool include_only_local_dc) const { - std::vector ranges; - //Token.TokenFactory tf = getPartitioner().getTokenFactory(); - - std::unordered_map> range_to_address_map = - include_only_local_dc - ? get_range_to_address_map_in_local_dc(keyspace) - : get_range_to_address_map(keyspace); - for (auto entry : range_to_address_map) { - auto range = entry.first; - auto addresses = entry.second; - token_range_endpoints tr; - if (range.start()) { - tr._start_token = dht::global_partitioner().to_sstring(range.start()->value()); - } - if (range.end()) { - tr._end_token = dht::global_partitioner().to_sstring(range.end()->value()); - } - for (auto endpoint : addresses) { - endpoint_details details; - details._host = boost::lexical_cast(endpoint); - details._datacenter = locator::i_endpoint_snitch::get_local_snitch_ptr()->get_datacenter(endpoint); - details._rack = locator::i_endpoint_snitch::get_local_snitch_ptr()->get_rack(endpoint); - tr._rpc_endpoints.push_back(get_rpc_address(endpoint)); - tr._endpoints.push_back(details._host); - tr._endpoint_details.push_back(details); - } - ranges.push_back(tr); - } - // Convert to wrapping ranges - auto left_inf = boost::find_if(ranges, [] (const token_range_endpoints& tr) { - return tr._start_token.empty(); - }); - auto right_inf = boost::find_if(ranges, [] (const token_range_endpoints& tr) { - return tr._end_token.empty(); - }); - using set = std::unordered_set; - if (left_inf != right_inf - && left_inf != ranges.end() - && right_inf != ranges.end() - && (boost::copy_range(left_inf->_endpoints) - == boost::copy_range(right_inf->_endpoints))) { - left_inf->_start_token = std::move(right_inf->_start_token); - ranges.erase(right_inf); - } - return ranges; -} - -std::unordered_map> -storage_service::construct_range_to_endpoint_map( - const sstring& keyspace, - const dht::token_range_vector& ranges) const { - std::unordered_map> res; - for (auto r : ranges) { - res[r] = _db.local().find_keyspace(keyspace).get_replication_strategy().get_natural_endpoints( - r.end() ? r.end()->value() : dht::maximum_token()); - } - return res; -} - - -std::map storage_service::get_token_to_endpoint_map() { - return _token_metadata.get_normal_and_bootstrapping_token_to_endpoint_map(); -} - -std::chrono::milliseconds storage_service::get_ring_delay() { - auto ring_delay = _db.local().get_config().ring_delay_ms(); - slogger.trace("Set RING_DELAY to {}ms", ring_delay); - return std::chrono::milliseconds(ring_delay); -} - -void storage_service::do_update_pending_ranges() { - if (engine().cpu_id() != 0) { - throw std::runtime_error("do_update_pending_ranges should be called on cpu zero"); - } - // long start = System.currentTimeMillis(); - auto keyspaces = _db.local().get_non_system_keyspaces(); - for (auto& keyspace_name : keyspaces) { - auto& ks = _db.local().find_keyspace(keyspace_name); - auto& strategy = ks.get_replication_strategy(); - get_local_storage_service().get_token_metadata().calculate_pending_ranges(strategy, keyspace_name); - } - // slogger.debug("finished calculation for {} keyspaces in {}ms", keyspaces.size(), System.currentTimeMillis() - start); -} - -future<> storage_service::update_pending_ranges() { - return get_storage_service().invoke_on(0, [] (auto& ss){ - ss._update_jobs++; - ss.do_update_pending_ranges(); - // calculate_pending_ranges will modify token_metadata, we need to repliate to other cores - return ss.replicate_to_all_cores().finally([&ss, ss0 = ss.shared_from_this()] { - ss._update_jobs--; - }); - }); -} - -future<> storage_service::keyspace_changed(const sstring& ks_name) { - // Update pending ranges since keyspace can be changed after we calculate pending ranges. - return update_pending_ranges().handle_exception([ks_name] (auto ep) { - slogger.warn("Failed to update pending ranges for ks = {}: {}", ks_name, ep); - }); -} - -void storage_service::init_messaging_service() { - auto& ms = netw::get_local_messaging_service(); - ms.register_replication_finished([] (gms::inet_address from) { - return get_local_storage_service().confirm_replication(from); - }); -} - -void storage_service::uninit_messaging_service() { - auto& ms = netw::get_local_messaging_service(); - ms.unregister_replication_finished(); -} - -static std::atomic isolated = { false }; - -void storage_service::do_isolate_on_error(disk_error type) -{ - if (!isolated.exchange(true)) { - slogger.warn("Shutting down communications due to I/O errors until operator intervention"); - // isolated protect us against multiple stops - service::get_local_storage_service().stop_transport(); - } -} - -future storage_service::get_removal_status() { - return run_with_no_api_lock([] (storage_service& ss) { - if (!ss._removing_node) { - return make_ready_future(sstring("No token removals in process.")); - } - auto tokens = ss._token_metadata.get_tokens(*ss._removing_node); - if (tokens.empty()) { - return make_ready_future(sstring("Node has no token")); - } - auto status = sprint("Removing token (%s). Waiting for replication confirmation from [%s].", - tokens.front(), join(",", ss._replicating_nodes)); - return make_ready_future(status); - }); -} - -future<> storage_service::force_remove_completion() { - return run_with_no_api_lock([] (storage_service& ss) { - return seastar::async([&ss] { - if (!ss._operation_in_progress.empty()) { - if (ss._operation_in_progress != sstring("removenode")) { - throw std::runtime_error(sprint("Operation %s is in progress, try again", ss._operation_in_progress)); - } else { - // This flag will make removenode stop waiting for the confirmation - ss._force_remove_completion = true; - while (!ss._operation_in_progress.empty()) { - // Wait removenode operation to complete - slogger.info("Operation {} is in progress, wait for it to complete", ss._operation_in_progress); - sleep(std::chrono::seconds(1)).get(); - } - ss._force_remove_completion = false; - } - } - ss._operation_in_progress = sstring("removenode_force"); - try { - if (!ss._replicating_nodes.empty() || !ss._token_metadata.get_leaving_endpoints().empty()) { - auto leaving = ss._token_metadata.get_leaving_endpoints(); - slogger.warn("Removal not confirmed for {}, Leaving={}", join(",", ss._replicating_nodes), leaving); - for (auto endpoint : leaving) { - utils::UUID host_id; - auto tokens = ss._token_metadata.get_tokens(endpoint); - try { - host_id = ss._token_metadata.get_host_id(endpoint); - } catch (...) { - slogger.warn("No host_id is found for endpoint {}", endpoint); - continue; - } - gms::get_local_gossiper().advertise_token_removed(endpoint, host_id).get(); - std::unordered_set tokens_set(tokens.begin(), tokens.end()); - ss.excise(tokens_set, endpoint); - } - ss._replicating_nodes.clear(); - ss._removing_node = std::experimental::nullopt; - } else { - slogger.warn("No tokens to force removal on, call 'removenode' first"); - } - ss._operation_in_progress = {}; - } catch (...) { - ss._operation_in_progress = {}; - throw; - } - }); - }); -} - -/** - * Takes an ordered list of adjacent tokens and divides them in the specified number of ranges. - */ -static std::vector> -calculate_splits(std::vector tokens, uint32_t split_count, column_family& cf) { - auto sstables = cf.get_sstables(); - const double step = static_cast(tokens.size() - 1) / split_count; - auto prev_token_idx = 0; - std::vector> splits; - splits.reserve(split_count); - for (uint32_t i = 1; i <= split_count; ++i) { - auto index = static_cast(std::round(i * step)); - dht::token_range range({{ std::move(tokens[prev_token_idx]), false }}, {{ tokens[index], true }}); - // always return an estimate > 0 (see CASSANDRA-7322) - uint64_t estimated_keys_for_range = 0; - for (auto&& sst : *sstables) { - estimated_keys_for_range += sst->estimated_keys_for_range(range); - } - splits.emplace_back(std::move(range), std::max(static_cast(cf.schema()->min_index_interval()), estimated_keys_for_range)); - prev_token_idx = index; - } - return splits; -}; - -std::vector> -storage_service::get_splits(const sstring& ks_name, const sstring& cf_name, range range, uint32_t keys_per_split) { - using range_type = dht::token_range; - auto& cf = _db.local().find_column_family(ks_name, cf_name); - auto schema = cf.schema(); - auto sstables = cf.get_sstables(); - uint64_t total_row_count_estimate = 0; - std::vector tokens; - std::vector unwrapped; - if (range.is_wrap_around(dht::token_comparator())) { - auto uwr = range.unwrap(); - unwrapped.emplace_back(std::move(uwr.second)); - unwrapped.emplace_back(std::move(uwr.first)); - } else { - unwrapped.emplace_back(std::move(range)); - } - tokens.push_back(std::move(unwrapped[0].start().value_or(range_type::bound(dht::minimum_token()))).value()); - for (auto&& r : unwrapped) { - std::vector range_tokens; - for (auto &&sst : *sstables) { - total_row_count_estimate += sst->estimated_keys_for_range(r); - auto keys = sst->get_key_samples(*cf.schema(), r); - std::transform(keys.begin(), keys.end(), std::back_inserter(range_tokens), [](auto&& k) { return std::move(k.token()); }); - } - std::sort(range_tokens.begin(), range_tokens.end()); - std::move(range_tokens.begin(), range_tokens.end(), std::back_inserter(tokens)); - } - tokens.push_back(std::move(unwrapped[unwrapped.size() - 1].end().value_or(range_type::bound(dht::maximum_token()))).value()); - - // split_count should be much smaller than number of key samples, to avoid huge sampling error - constexpr uint32_t min_samples_per_split = 4; - uint64_t max_split_count = tokens.size() / min_samples_per_split + 1; - uint32_t split_count = std::max(uint32_t(1), static_cast(std::min(max_split_count, total_row_count_estimate / keys_per_split))); - - return calculate_splits(std::move(tokens), split_count, cf); -}; - -dht::token_range_vector -storage_service::get_ranges_for_endpoint(const sstring& name, const gms::inet_address& ep) const { - return _db.local().find_keyspace(name).get_replication_strategy().get_ranges(ep); -} - -dht::token_range_vector -storage_service::get_all_ranges(const std::vector& sorted_tokens) const { - if (sorted_tokens.empty()) - return dht::token_range_vector(); - int size = sorted_tokens.size(); - dht::token_range_vector ranges; - ranges.push_back(dht::token_range::make_ending_with(range_bound(sorted_tokens[0], true))); - for (int i = 1; i < size; ++i) { - dht::token_range r(range::bound(sorted_tokens[i - 1], false), range::bound(sorted_tokens[i], true)); - ranges.push_back(r); - } - ranges.push_back(dht::token_range::make_starting_with(range_bound(sorted_tokens[size-1], false))); - - return ranges; -} - -std::vector -storage_service::get_natural_endpoints(const sstring& keyspace, - const sstring& cf, const sstring& key) const { - sstables::key_view key_view = sstables::key_view(bytes_view(reinterpret_cast(key.c_str()), key.size())); - dht::token token = dht::global_partitioner().get_token(key_view); - return get_natural_endpoints(keyspace, token); -} - -std::vector -storage_service::get_natural_endpoints(const sstring& keyspace, const token& pos) const { - return _db.local().find_keyspace(keyspace).get_replication_strategy().get_natural_endpoints(pos); -} - -} // namespace service - diff --git a/scylla/service/storage_service.hh b/scylla/service/storage_service.hh deleted file mode 100644 index 05cf389..0000000 --- a/scylla/service/storage_service.hh +++ /dev/null @@ -1,2254 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - * - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "gms/i_endpoint_state_change_subscriber.hh" -#include "service/endpoint_lifecycle_subscriber.hh" -#include "locator/token_metadata.hh" -#include "gms/gossiper.hh" -#include "utils/UUID_gen.hh" -#include "core/distributed.hh" -#include "dht/i_partitioner.hh" -#include "dht/boot_strapper.hh" -#include "dht/token_range_endpoints.hh" -#include "core/sleep.hh" -#include "gms/application_state.hh" -#include "db/system_keyspace.hh" -#include "core/semaphore.hh" -#include "utils/fb_utilities.hh" -#include "database.hh" -#include "streaming/stream_state.hh" -#include "streaming/stream_plan.hh" -#include -#include "disk-error-handler.hh" -#include "gms/feature.hh" - -namespace cql_transport { - class cql_server; - class redis_server; -} -class thrift_server; - -namespace service { - -class load_broadcaster; -class storage_service; - -extern distributed _the_storage_service; -inline distributed& get_storage_service() { - return _the_storage_service; -} -inline storage_service& get_local_storage_service() { - return _the_storage_service.local(); -} - -int get_generation_number(); - -enum class disk_error { regular, commit }; - -/** - * This abstraction contains the token/identifier of this node - * on the identifier space. This token gets gossiped around. - * This class will also maintain histograms of the load information - * of other nodes in the cluster. - */ -class storage_service : public service::migration_listener, public gms::i_endpoint_state_change_subscriber, public seastar::async_sharded_service { -public: - struct snapshot_details { - int64_t live; - int64_t total; - sstring cf; - sstring ks; - }; -private: - using token = dht::token; - using token_range_endpoints = dht::token_range_endpoints; - using endpoint_details = dht::endpoint_details; - using boot_strapper = dht::boot_strapper; - using token_metadata = locator::token_metadata; - using application_state = gms::application_state; - using inet_address = gms::inet_address; - using versioned_value = gms::versioned_value; -#if 0 - private static final Logger logger = LoggerFactory.getLogger(StorageService.class); - - /* JMX notification serial number counter */ - private final AtomicLong notificationSerialNumber = new AtomicLong(); -#endif - distributed& _db; - int _update_jobs{0}; - // Note that this is obviously only valid for the current shard. Users of - // this facility should elect a shard to be the coordinator based on any - // given objective criteria - // - // It shouldn't be impossible to actively serialize two callers if the need - // ever arise. - bool _loading_new_sstables = false; - shared_ptr _lb; - shared_ptr> _cql_server; - shared_ptr> _thrift_server; - shared_ptr> _redis_server; - sstring _operation_in_progress; - bool _force_remove_completion = false; - bool _ms_stopped = false; - bool _stream_manager_stopped = false; -public: - storage_service(distributed& db); - void isolate_on_error(); - void isolate_on_commit_error(); - - // Needed by distributed<> - future<> stop(); - void init_messaging_service(); - void uninit_messaging_service(); - -private: - void do_update_pending_ranges(); - -public: - future<> keyspace_changed(const sstring& ks_name); - future<> update_pending_ranges(); - - const locator::token_metadata& get_token_metadata() const { - return _token_metadata; - } - - locator::token_metadata& get_token_metadata() { - return _token_metadata; - } - - future<> gossip_snitch_info(); - - void set_load_broadcaster(shared_ptr lb); - shared_ptr& get_load_broadcaster(); - - distributed& db() { - return _db; - } - -private: - bool is_auto_bootstrap(); - inet_address get_broadcast_address() const { - return utils::fb_utilities::get_broadcast_address(); - } - /* This abstraction maintains the token/endpoint metadata information */ - token_metadata _token_metadata; - token_metadata _shadow_token_metadata; -public: - std::chrono::milliseconds get_ring_delay(); - gms::versioned_value::factory value_factory; -#if 0 - public volatile VersionedValue.VersionedValueFactory valueFactory = new VersionedValue.VersionedValueFactory(getPartitioner()); - - private Thread drainOnShutdown = null; - - public static final StorageService instance = new StorageService(); - - public static IPartitioner getPartitioner() - { - return DatabaseDescriptor.getPartitioner(); - } -#endif -public: - dht::token_range_vector get_local_ranges(const sstring& keyspace_name) { - return get_ranges_for_endpoint(keyspace_name, get_broadcast_address()); - } -#if 0 - public Collection> getPrimaryRanges(String keyspace) - { - return getPrimaryRangesForEndpoint(keyspace, FBUtilities.getBroadcastAddress()); - } - - public Collection> getPrimaryRangesWithinDC(String keyspace) - { - return getPrimaryRangeForEndpointWithinDC(keyspace, FBUtilities.getBroadcastAddress()); - } - - private CassandraDaemon daemon; -#endif -private: - - std::unordered_set _replicating_nodes; - - std::experimental::optional _removing_node; - - /* Are we starting this node in bootstrap mode? */ - bool _is_bootstrap_mode; - - /* we bootstrap but do NOT join the ring unless told to do so */ - // FIXME: System.getProperty("cassandra.write_survey", "false") - bool _is_survey_mode = false; - - bool _initialized; - - bool _joined = false; - -public: - enum class mode { STARTING, NORMAL, JOINING, LEAVING, DECOMMISSIONED, MOVING, DRAINING, DRAINED }; -private: - mode _operation_mode = mode::STARTING; - friend std::ostream& operator<<(std::ostream& os, const mode& mode); -#if 0 - /* the probability for tracing any particular request, 0 disables tracing and 1 enables for all */ - private double traceProbability = 0.0; -#endif - /* Used for tracking drain progress */ -public: - struct drain_progress { - int32_t total_cfs; - int32_t remaining_cfs; - - drain_progress& operator+=(const drain_progress& other) { - total_cfs += other.total_cfs; - remaining_cfs += other.remaining_cfs; - return *this; - } - }; -private: - drain_progress _drain_progress{}; -#if 0 - - private static final AtomicInteger nextRepairCommand = new AtomicInteger(); -#endif - - - std::vector _lifecycle_subscribers; - -#if 0 - private static final BackgroundActivityMonitor bgMonitor = new BackgroundActivityMonitor(); - - private final ObjectName jmxObjectName; - -#endif -private: - std::unordered_set _bootstrap_tokens; - - gms::feature _range_tombstones_feature; - gms::feature _large_partitions_feature; - gms::feature _materialized_views_feature; - gms::feature _counters_feature; - gms::feature _indexes_feature; - -public: - void enable_all_features() { - _range_tombstones_feature.enable(); - _large_partitions_feature.enable(); - _materialized_views_feature.enable(); - _counters_feature.enable(); - _indexes_feature.enable(); - } - - void finish_bootstrapping() { - _is_bootstrap_mode = false; - } - - /** This method updates the local token on disk */ - void set_tokens(std::unordered_set tokens); - void set_gossip_tokens(const std::unordered_set& local_tokens); -#if 0 - - public void registerDaemon(CassandraDaemon daemon) - { - this.daemon = daemon; - } -#endif - - void register_subscriber(endpoint_lifecycle_subscriber* subscriber); - - void unregister_subscriber(endpoint_lifecycle_subscriber* subscriber); - - // should only be called via JMX - future<> stop_gossiping(); - - // should only be called via JMX - future<> start_gossiping(); - - // should only be called via JMX - future is_gossip_running(); - - // should only be called via JMX - future<> start_rpc_server(); - - future<> stop_rpc_server(); - - future is_rpc_server_running(); - - future<> start_native_transport(); - future<> start_redis_transport(); - - future<> stop_native_transport(); - future<> stop_redis_transport(); - - future is_native_transport_running(); - -private: - future<> do_stop_rpc_server(); - future<> do_stop_native_transport(); - future<> do_stop_redis_transport(); - future<> do_stop_ms(); - future<> do_stop_stream_manager(); -#if 0 - public void stopTransports() - { - if (isInitialized()) - { - logger.error("Stopping gossiper"); - stopGossiping(); - } - if (isRPCServerRunning()) - { - logger.error("Stopping RPC server"); - stopRPCServer(); - } - if (isNativeTransportRunning()) - { - logger.error("Stopping native transport"); - stopNativeTransport(); - } - } -#endif -private: - future<> shutdown_client_servers(); -#if 0 - public void stopClient() - { - Gossiper.instance.unregister(this); - Gossiper.instance.stop(); - MessagingService.instance().shutdown(); - // give it a second so that task accepted before the MessagingService shutdown gets submitted to the stage (to avoid RejectedExecutionException) - Uninterruptibles.sleepUninterruptibly(1, TimeUnit.SECONDS); - StageManager.shutdownNow(); - } -#endif -public: - future is_initialized(); -#if 0 - - public void stopDaemon() - { - if (daemon == null) - throw new IllegalStateException("No configured daemon"); - daemon.deactivate(); - } -#endif -public: - future> prepare_replacement_info(); - - future<> check_for_endpoint_collision(); -#if 0 - - // for testing only - public void unsafeInitialize() throws ConfigurationException - { - _initialized = true; - Gossiper.instance.register(this); - Gossiper.instance.start((int) (System.currentTimeMillis() / 1000)); // needed for node-ring gathering. - Gossiper.instance.addLocalApplicationState(ApplicationState.NET_VERSION, valueFactory.networkVersion()); - if (!MessagingService.instance().isListening()) - MessagingService.instance().listen(FBUtilities.getLocalAddress()); - } -#endif -public: - future<> init_server() { - return init_server(get_ring_delay().count()); - } - - future<> init_server(int delay); - - future<> drain_on_shutdown(); - - future<> stop_transport(); - - void flush_column_families(); -#if 0 - /** - * In the event of forceful termination we need to remove the shutdown hook to prevent hanging (OOM for instance) - */ - public void removeShutdownHook() - { - if (drainOnShutdown != null) - Runtime.getRuntime().removeShutdownHook(drainOnShutdown); - } -#endif -private: - bool should_bootstrap(); - void prepare_to_join(std::vector loaded_endpoints); - void join_token_ring(int delay); -public: - future<> join_ring(); - bool is_joined(); - - future<> rebuild(sstring source_dc); - -#if 0 - public void setStreamThroughputMbPerSec(int value) - { - DatabaseDescriptor.setStreamThroughputOutboundMegabitsPerSec(value); - logger.info("setstreamthroughput: throttle set to {}", value); - } - - public int getStreamThroughputMbPerSec() - { - return DatabaseDescriptor.getStreamThroughputOutboundMegabitsPerSec(); - } - - public int getCompactionThroughputMbPerSec() - { - return DatabaseDescriptor.getCompactionThroughputMbPerSec(); - } - - public void setCompactionThroughputMbPerSec(int value) - { - DatabaseDescriptor.setCompactionThroughputMbPerSec(value); - } - - public boolean isIncrementalBackupsEnabled() - { - return DatabaseDescriptor.isIncrementalBackupsEnabled(); - } - - public void setIncrementalBackupsEnabled(boolean value) - { - DatabaseDescriptor.setIncrementalBackupsEnabled(value); - } -#endif - -private: - void set_mode(mode m, bool log); - void set_mode(mode m, sstring msg, bool log); -public: - void bootstrap(std::unordered_set tokens); - - bool is_bootstrap_mode() { - return _is_bootstrap_mode; - } - -#if 0 - - public TokenMetadata getTokenMetadata() - { - return _token_metadata; - } - - /** - * Increment about the known Compaction severity of the events in this node - */ - public void reportSeverity(double incr) - { - bgMonitor.incrCompactionSeverity(incr); - } - - public void reportManualSeverity(double incr) - { - bgMonitor.incrManualSeverity(incr); - } - - public double getSeverity(InetAddress endpoint) - { - return bgMonitor.getSeverity(endpoint); - } - - /** - * for a keyspace, return the ranges and corresponding listen addresses. - * @param keyspace - * @return the endpoint map - */ - public Map, List> getRangeToEndpointMap(String keyspace) - { - /* All the ranges for the tokens */ - Map, List> map = new HashMap<>(); - for (Map.Entry,List> entry : getRangeToAddressMap(keyspace).entrySet()) - { - map.put(entry.getKey().asList(), stringify(entry.getValue())); - } - return map; - } -#endif - /** - * Return the rpc address associated with an endpoint as a string. - * @param endpoint The endpoint to get rpc address for - * @return the rpc address - */ - sstring get_rpc_address(const inet_address& endpoint) const; -#if 0 - /** - * for a keyspace, return the ranges and corresponding RPC addresses for a given keyspace. - * @param keyspace - * @return the endpoint map - */ - public Map, List> getRangeToRpcaddressMap(String keyspace) - { - /* All the ranges for the tokens */ - Map, List> map = new HashMap<>(); - for (Map.Entry, List> entry : getRangeToAddressMap(keyspace).entrySet()) - { - List rpcaddrs = new ArrayList<>(entry.getValue().size()); - for (InetAddress endpoint: entry.getValue()) - { - rpcaddrs.add(getRpcaddress(endpoint)); - } - map.put(entry.getKey().asList(), rpcaddrs); - } - return map; - } - - public Map, List> getPendingRangeToEndpointMap(String keyspace) - { - // some people just want to get a visual representation of things. Allow null and set it to the first - // non-system keyspace. - if (keyspace == null) - keyspace = Schema.instance.getNonSystemKeyspaces().get(0); - - Map, List> map = new HashMap<>(); - for (Map.Entry, Collection> entry : _token_metadata.getPendingRanges(keyspace).entrySet()) - { - List l = new ArrayList<>(entry.getValue()); - map.put(entry.getKey().asList(), stringify(l)); - } - return map; - } -#endif - std::unordered_map> get_range_to_address_map(const sstring& keyspace) const; - - std::unordered_map> get_range_to_address_map_in_local_dc( - const sstring& keyspace) const; - - std::vector get_tokens_in_local_dc() const; - - bool is_local_dc(const inet_address& targetHost) const; - - std::unordered_map> get_range_to_address_map(const sstring& keyspace, - const std::vector& sorted_tokens) const; - - /** - * The same as {@code describeRing(String)} but converts TokenRange to the String for JMX compatibility - * - * @param keyspace The keyspace to fetch information about - * - * @return a List of TokenRange(s) converted to String for the given keyspace - */ - - /* - * describeRingJMX will be implemented in the API - * It is left here just as a marker that there is no need to implement it - * here - */ - //std::vector describeRingJMX(const sstring& keyspace) const { - -#if 0 - - /** - * The same as {@code describeRing(String)} but considers only the part of the ring formed by nodes in the local DC. - */ - public List describeLocalRing(String keyspace) throws InvalidRequestException - { - return describeRing(keyspace, true); - } -#endif - std::vector describe_ring(const sstring& keyspace, bool include_only_local_dc = false) const; - - /** - * Retrieve a map of tokens to endpoints, including the bootstrapping ones. - * - * @return a map of tokens to endpoints in ascending order - */ - std::map get_token_to_endpoint_map(); - -#if 0 - - public String getLocalHostId() - { - return getTokenMetadata().getHostId(FBUtilities.getBroadcastAddress()).toString(); - } - - public Map getHostIdMap() - { - Map mapOut = new HashMap<>(); - for (Map.Entry entry : getTokenMetadata().getEndpointToHostIdMapForReading().entrySet()) - mapOut.put(entry.getKey().getHostAddress(), entry.getValue().toString()); - return mapOut; - } -#endif - /** - * Construct the range to endpoint mapping based on the true view - * of the world. - * @param ranges - * @return mapping of ranges to the replicas responsible for them. - */ - std::unordered_map> construct_range_to_endpoint_map( - const sstring& keyspace, - const dht::token_range_vector& ranges) const; -public: - virtual void on_join(gms::inet_address endpoint, gms::endpoint_state ep_state) override; - virtual void before_change(gms::inet_address endpoint, gms::endpoint_state current_state, gms::application_state new_state_key, const gms::versioned_value& new_value) override; - /* - * Handle the reception of a new particular ApplicationState for a particular endpoint. Note that the value of the - * ApplicationState has not necessarily "changed" since the last known value, if we already received the same update - * from somewhere else. - * - * onChange only ever sees one ApplicationState piece change at a time (even if many ApplicationState updates were - * received at the same time), so we perform a kind of state machine here. We are concerned with two events: knowing - * the token associated with an endpoint, and knowing its operation mode. Nodes can start in either bootstrap or - * normal mode, and from bootstrap mode can change mode to normal. A node in bootstrap mode needs to have - * pendingranges set in TokenMetadata; a node in normal mode should instead be part of the token ring. - * - * Normal progression of ApplicationState.STATUS values for a node should be like this: - * STATUS_BOOTSTRAPPING,token - * if bootstrapping. stays this way until all files are received. - * STATUS_NORMAL,token - * ready to serve reads and writes. - * STATUS_LEAVING,token - * get ready to leave the cluster as part of a decommission - * STATUS_LEFT,token - * set after decommission is completed. - * - * Other STATUS values that may be seen (possibly anywhere in the normal progression): - * STATUS_MOVING,newtoken - * set if node is currently moving to a new token in the ring - * REMOVING_TOKEN,deadtoken - * set if the node is dead and is being removed by its REMOVAL_COORDINATOR - * REMOVED_TOKEN,deadtoken - * set if the node is dead and has been removed by its REMOVAL_COORDINATOR - * - * Note: Any time a node state changes from STATUS_NORMAL, it will not be visible to new nodes. So it follows that - * you should never bootstrap a new node during a removenode, decommission or move. - */ - virtual void on_change(inet_address endpoint, application_state state, const versioned_value& value) override; - virtual void on_alive(gms::inet_address endpoint, gms::endpoint_state state) override; - virtual void on_dead(gms::inet_address endpoint, gms::endpoint_state state) override; - virtual void on_remove(gms::inet_address endpoint) override; - virtual void on_restart(gms::inet_address endpoint, gms::endpoint_state state) override; - -public: - // For migration_listener - virtual void on_create_keyspace(const sstring& ks_name) override { keyspace_changed(ks_name).get(); } - virtual void on_create_column_family(const sstring& ks_name, const sstring& cf_name) override {} - virtual void on_create_user_type(const sstring& ks_name, const sstring& type_name) override {} - virtual void on_create_function(const sstring& ks_name, const sstring& function_name) override {} - virtual void on_create_aggregate(const sstring& ks_name, const sstring& aggregate_name) override {} - virtual void on_create_view(const sstring& ks_name, const sstring& view_name) override {} - - virtual void on_update_keyspace(const sstring& ks_name) override { keyspace_changed(ks_name).get(); } - virtual void on_update_column_family(const sstring& ks_name, const sstring& cf_name, bool) override {} - virtual void on_update_user_type(const sstring& ks_name, const sstring& type_name) override {} - virtual void on_update_function(const sstring& ks_name, const sstring& function_name) override {} - virtual void on_update_aggregate(const sstring& ks_name, const sstring& aggregate_name) override {} - virtual void on_update_view(const sstring& ks_name, const sstring& view_name, bool columns_changed) override {} - - virtual void on_drop_keyspace(const sstring& ks_name) override { keyspace_changed(ks_name).get(); } - virtual void on_drop_column_family(const sstring& ks_name, const sstring& cf_name) override {} - virtual void on_drop_user_type(const sstring& ks_name, const sstring& type_name) override {} - virtual void on_drop_function(const sstring& ks_name, const sstring& function_name) override {} - virtual void on_drop_aggregate(const sstring& ks_name, const sstring& aggregate_name) override {} - virtual void on_drop_view(const sstring& ks_name, const sstring& view_name) override {} -private: - void update_peer_info(inet_address endpoint); - void do_update_system_peers_table(gms::inet_address endpoint, const application_state& state, const versioned_value& value); - sstring get_application_state_value(inet_address endpoint, application_state appstate); - std::unordered_set get_tokens_for(inet_address endpoint); - future<> replicate_to_all_cores(); - semaphore _replicate_task{1}; -private: - /** - * Replicates token_metadata contents on shard0 instance to other shards. - * - * Should be called with a _replicate_task semaphore taken. - * Should run on shard 0 only. - * - * @return a ready future when replication is complete. - */ - future<> replicate_tm_only(); - - /** - * Replicates token_metadata and gossiper::endpoint_state_map contents on - * shard0 instances to other shards. - * - * Should be called with a _replicate_task and a gossiper::timer_callback - * semaphores taken. - * Should run on shard 0 only. - * - * @param g0 a "shared_from_this()" pointer to a gossiper instance on shard0 - * - * @return a ready future when replication is complete. - */ - future<> replicate_tm_and_ep_map(shared_ptr g0); - - /** - * Handle node bootstrap - * - * @param endpoint bootstrapping node - */ - void handle_state_bootstrap(inet_address endpoint); - - /** - * Handle node move to normal state. That is, node is entering token ring and participating - * in reads. - * - * @param endpoint node - */ - void handle_state_normal(inet_address endpoint); - - /** - * Handle node preparing to leave the ring - * - * @param endpoint node - */ - void handle_state_leaving(inet_address endpoint); - - /** - * Handle node leaving the ring. This will happen when a node is decommissioned - * - * @param endpoint If reason for leaving is decommission, endpoint is the leaving node. - * @param pieces STATE_LEFT,token - */ - void handle_state_left(inet_address endpoint, std::vector pieces); - - /** - * Handle node moving inside the ring. - * - * @param endpoint moving endpoint address - * @param pieces STATE_MOVING, token - */ - void handle_state_moving(inet_address endpoint, std::vector pieces); - - /** - * Handle notification that a node being actively removed from the ring via 'removenode' - * - * @param endpoint node - * @param pieces either REMOVED_TOKEN (node is gone) or REMOVING_TOKEN (replicas need to be restored) - */ - void handle_state_removing(inet_address endpoint, std::vector pieces); - -private: - void excise(std::unordered_set tokens, inet_address endpoint); - void excise(std::unordered_set tokens, inet_address endpoint, long expire_time); - - /** unlike excise we just need this endpoint gone without going through any notifications **/ - void remove_endpoint(inet_address endpoint); - - void add_expire_time_if_found(inet_address endpoint, int64_t expire_time); - - int64_t extract_expire_time(const std::vector& pieces) { - return std::stoll(pieces[2]); - } - - /** - * Finds living endpoints responsible for the given ranges - * - * @param keyspaceName the keyspace ranges belong to - * @param ranges the ranges to find sources for - * @return multimap of addresses to ranges the address is responsible for - */ - std::unordered_multimap get_new_source_ranges(const sstring& keyspaceName, const dht::token_range_vector& ranges); -public: - future<> confirm_replication(inet_address node); - -private: - - /** - * Sends a notification to a node indicating we have finished replicating data. - * - * @param remote node to send notification to - */ - future<> send_replication_notification(inet_address remote); - - /** - * Called when an endpoint is removed from the ring. This function checks - * whether this node becomes responsible for new ranges as a - * consequence and streams data if needed. - * - * This is rather ineffective, but it does not matter so much - * since this is called very seldom - * - * @param endpoint the node that left - */ - future<> restore_replica_count(inet_address endpoint, inet_address notify_endpoint); - - // needs to be modified to accept either a keyspace or ARS. - std::unordered_multimap get_changed_ranges_for_leaving(sstring keyspace_name, inet_address endpoint); -public: - /** raw load value */ - double get_load(); - - sstring get_load_string(); - - future> get_load_map(); - -#if 0 - public final void deliverHints(String host) throws UnknownHostException - { - HintedHandOffManager.instance.scheduleHintDelivery(host); - } -#endif -public: - future> get_local_tokens(); - -#if 0 - /* These methods belong to the MBean interface */ - - public List getTokens() - { - return getTokens(FBUtilities.getBroadcastAddress()); - } - - public List getTokens(String endpoint) throws UnknownHostException - { - return getTokens(InetAddress.getByName(endpoint)); - } - - private List getTokens(InetAddress endpoint) - { - List strTokens = new ArrayList<>(); - for (Token tok : getTokenMetadata().getTokens(endpoint)) - strTokens.add(tok.toString()); - return strTokens; - } -#endif - - sstring get_release_version(); - - sstring get_schema_version(); - - future>> describe_schema_versions(); - -#if 0 - public List getLeavingNodes() - { - return stringify(_token_metadata.getLeavingEndpoints()); - } - - public List getMovingNodes() - { - List endpoints = new ArrayList<>(); - - for (Pair node : _token_metadata.getMovingEndpoints()) - { - endpoints.add(node.right.getHostAddress()); - } - - return endpoints; - } - - public List getJoiningNodes() - { - return stringify(_token_metadata.getBootstrapTokens().valueSet()); - } - - public List getLiveNodes() - { - return stringify(Gossiper.instance.getLiveMembers()); - } - - public List getUnreachableNodes() - { - return stringify(Gossiper.instance.getUnreachableMembers()); - } - - private List stringify(Iterable endpoints) - { - List stringEndpoints = new ArrayList<>(); - for (InetAddress ep : endpoints) - { - stringEndpoints.add(ep.getHostAddress()); - } - return stringEndpoints; - } - - public int forceKeyspaceCleanup(String keyspaceName, String... columnFamilies) throws IOException, ExecutionException, InterruptedException - { - if (keyspaceName.equals(SystemKeyspace.NAME)) - throw new RuntimeException("Cleanup of the system keyspace is neither necessary nor wise"); - - CompactionManager.AllSSTableOpStatus status = CompactionManager.AllSSTableOpStatus.SUCCESSFUL; - for (ColumnFamilyStore cfStore : getValidColumnFamilies(false, false, keyspaceName, columnFamilies)) - { - CompactionManager.AllSSTableOpStatus oneStatus = cfStore.forceCleanup(); - if (oneStatus != CompactionManager.AllSSTableOpStatus.SUCCESSFUL) - status = oneStatus; - } - return status.statusCode; - } - - public int scrub(boolean disableSnapshot, boolean skipCorrupted, String keyspaceName, String... columnFamilies) throws IOException, ExecutionException, InterruptedException - { - CompactionManager.AllSSTableOpStatus status = CompactionManager.AllSSTableOpStatus.SUCCESSFUL; - for (ColumnFamilyStore cfStore : getValidColumnFamilies(false, false, keyspaceName, columnFamilies)) - { - CompactionManager.AllSSTableOpStatus oneStatus = cfStore.scrub(disableSnapshot, skipCorrupted); - if (oneStatus != CompactionManager.AllSSTableOpStatus.SUCCESSFUL) - status = oneStatus; - } - return status.statusCode; - } - - public int upgradeSSTables(String keyspaceName, boolean excludeCurrentVersion, String... columnFamilies) throws IOException, ExecutionException, InterruptedException - { - CompactionManager.AllSSTableOpStatus status = CompactionManager.AllSSTableOpStatus.SUCCESSFUL; - for (ColumnFamilyStore cfStore : getValidColumnFamilies(true, true, keyspaceName, columnFamilies)) - { - CompactionManager.AllSSTableOpStatus oneStatus = cfStore.sstablesRewrite(excludeCurrentVersion); - if (oneStatus != CompactionManager.AllSSTableOpStatus.SUCCESSFUL) - status = oneStatus; - } - return status.statusCode; - } - - public void forceKeyspaceCompaction(String keyspaceName, String... columnFamilies) throws IOException, ExecutionException, InterruptedException - { - for (ColumnFamilyStore cfStore : getValidColumnFamilies(true, false, keyspaceName, columnFamilies)) - { - cfStore.forceMajorCompaction(); - } - } - -#endif - /** - * Takes the snapshot for all keyspaces. A snapshot name must be specified. - * - * @param tag the tag given to the snapshot; may not be null or empty - */ - future<> take_snapshot(sstring tag) { - return take_snapshot(tag, {}); - } - - /** - * Takes the snapshot for the given keyspaces. A snapshot name must be specified. - * - * @param tag the tag given to the snapshot; may not be null or empty - * @param keyspaceNames the names of the keyspaces to snapshot; empty means "all." - */ - future<> take_snapshot(sstring tag, std::vector keyspace_names); - - /** - * Takes the snapshot of a specific column family. A snapshot name must be specified. - * - * @param keyspaceName the keyspace which holds the specified column family - * @param columnFamilyName the column family to snapshot - * @param tag the tag given to the snapshot; may not be null or empty - */ - future<> take_column_family_snapshot(sstring ks_name, sstring cf_name, sstring tag); -#if 0 - - private Keyspace getValidKeyspace(String keyspaceName) throws IOException - { - if (!Schema.instance.getKeyspaces().contains(keyspaceName)) - { - throw new IOException("Keyspace " + keyspaceName + " does not exist"); - } - return Keyspace.open(keyspaceName); - } -#endif - - /** - * Remove the snapshot with the given name from the given keyspaces. - * If no tag is specified we will remove all snapshots. - */ - future<> clear_snapshot(sstring tag, std::vector keyspace_names); - - future>> get_snapshot_details(); - - future true_snapshots_size(); -#if 0 - - /** - * @param allowIndexes Allow index CF names to be passed in - * @param autoAddIndexes Automatically add secondary indexes if a CF has them - * @param keyspaceName keyspace - * @param cfNames CFs - * @throws java.lang.IllegalArgumentException when given CF name does not exist - */ - public Iterable getValidColumnFamilies(boolean allowIndexes, boolean autoAddIndexes, String keyspaceName, String... cfNames) throws IOException - { - Keyspace keyspace = getValidKeyspace(keyspaceName); - Set valid = new HashSet<>(); - - if (cfNames.length == 0) - { - // all stores are interesting - for (ColumnFamilyStore cfStore : keyspace.getColumnFamilyStores()) - { - valid.add(cfStore); - if (autoAddIndexes) - { - for (SecondaryIndex si : cfStore.indexManager.getIndexes()) - { - if (si.getIndexCfs() != null) { - logger.info("adding secondary index {} to operation", si.getIndexName()); - valid.add(si.getIndexCfs()); - } - } - - } - } - return valid; - } - // filter out interesting stores - for (String cfName : cfNames) - { - //if the CF name is an index, just flush the CF that owns the index - String baseCfName = cfName; - String idxName = null; - if (cfName.contains(".")) // secondary index - { - if(!allowIndexes) - { - logger.warn("Operation not allowed on secondary Index table ({})", cfName); - continue; - } - - String[] parts = cfName.split("\\.", 2); - baseCfName = parts[0]; - idxName = parts[1]; - } - - ColumnFamilyStore cfStore = keyspace.getColumnFamilyStore(baseCfName); - if (idxName != null) - { - Collection< SecondaryIndex > indexes = cfStore.indexManager.getIndexesByNames(new HashSet<>(Arrays.asList(cfName))); - if (indexes.isEmpty()) - logger.warn(String.format("Invalid index specified: %s/%s. Proceeding with others.", baseCfName, idxName)); - else - valid.add(Iterables.get(indexes, 0).getIndexCfs()); - } - else - { - valid.add(cfStore); - if(autoAddIndexes) - { - for(SecondaryIndex si : cfStore.indexManager.getIndexes()) - { - if (si.getIndexCfs() != null) { - logger.info("adding secondary index {} to operation", si.getIndexName()); - valid.add(si.getIndexCfs()); - } - } - } - } - } - return valid; - } - - /** - * Flush all memtables for a keyspace and column families. - * @param keyspaceName - * @param columnFamilies - * @throws IOException - */ - public void forceKeyspaceFlush(String keyspaceName, String... columnFamilies) throws IOException - { - for (ColumnFamilyStore cfStore : getValidColumnFamilies(true, false, keyspaceName, columnFamilies)) - { - logger.debug("Forcing flush on keyspace {}, CF {}", keyspaceName, cfStore.name); - cfStore.forceBlockingFlush(); - } - } - - /** - * Sends JMX notification to subscribers. - * - * @param type Message type - * @param message Message itself - * @param userObject Arbitrary object to attach to notification - */ - public void sendNotification(String type, String message, Object userObject) - { - Notification jmxNotification = new Notification(type, jmxObjectName, notificationSerialNumber.incrementAndGet(), message); - jmxNotification.setUserData(userObject); - sendNotification(jmxNotification); - } - - public int repairAsync(String keyspace, Map repairSpec) - { - RepairOption option = RepairOption.parse(repairSpec, getPartitioner()); - // if ranges are not specified - if (option.getRanges().isEmpty()) - { - if (option.isPrimaryRange()) - { - // when repairing only primary range, neither dataCenters nor hosts can be set - if (option.getDataCenters().isEmpty() && option.getHosts().isEmpty()) - option.getRanges().addAll(getPrimaryRanges(keyspace)); - // except dataCenters only contain local DC (i.e. -local) - else if (option.getDataCenters().size() == 1 && option.getDataCenters().contains(DatabaseDescriptor.getLocalDataCenter())) - option.getRanges().addAll(getPrimaryRangesWithinDC(keyspace)); - else - throw new IllegalArgumentException("You need to run primary range repair on all nodes in the cluster."); - } - else - { - option.getRanges().addAll(getLocalRanges(keyspace)); - } - } - return forceRepairAsync(keyspace, option); - } - - @Deprecated - public int forceRepairAsync(String keyspace, - boolean isSequential, - Collection dataCenters, - Collection hosts, - boolean primaryRange, - boolean fullRepair, - String... columnFamilies) - { - return forceRepairAsync(keyspace, isSequential ? RepairParallelism.SEQUENTIAL : RepairParallelism.PARALLEL, dataCenters, hosts, primaryRange, fullRepair, columnFamilies); - } - - @Deprecated - public int forceRepairAsync(String keyspace, - RepairParallelism parallelismDegree, - Collection dataCenters, - Collection hosts, - boolean primaryRange, - boolean fullRepair, - String... columnFamilies) - { - if (FBUtilities.isWindows() && parallelismDegree != RepairParallelism.PARALLEL) - { - logger.warn("Snapshot-based repair is not yet supported on Windows. Reverting to parallel repair."); - parallelismDegree = RepairParallelism.PARALLEL; - } - - RepairOption options = new RepairOption(parallelismDegree, primaryRange, !fullRepair, false, 1, Collections.>emptyList()); - if (dataCenters != null) - { - options.getDataCenters().addAll(dataCenters); - } - if (hosts != null) - { - options.getHosts().addAll(hosts); - } - if (columnFamilies != null) - { - for (String columnFamily : columnFamilies) - { - options.getColumnFamilies().add(columnFamily); - } - } - return forceRepairAsync(keyspace, options); - } - - public int forceRepairAsync(String keyspace, - boolean isSequential, - boolean isLocal, - boolean primaryRange, - boolean fullRepair, - String... columnFamilies) - { - Set dataCenters = null; - if (isLocal) - { - dataCenters = Sets.newHashSet(DatabaseDescriptor.getLocalDataCenter()); - } - return forceRepairAsync(keyspace, isSequential, dataCenters, null, primaryRange, fullRepair, columnFamilies); - } - - public int forceRepairRangeAsync(String beginToken, - String endToken, - String keyspaceName, - boolean isSequential, - Collection dataCenters, - Collection hosts, - boolean fullRepair, - String... columnFamilies) - { - return forceRepairRangeAsync(beginToken, endToken, keyspaceName, isSequential ? RepairParallelism.SEQUENTIAL : RepairParallelism.PARALLEL, dataCenters, hosts, fullRepair, columnFamilies); - } - - public int forceRepairRangeAsync(String beginToken, - String endToken, - String keyspaceName, - RepairParallelism parallelismDegree, - Collection dataCenters, - Collection hosts, - boolean fullRepair, - String... columnFamilies) - { - if (FBUtilities.isWindows() && parallelismDegree != RepairParallelism.PARALLEL) - { - logger.warn("Snapshot-based repair is not yet supported on Windows. Reverting to parallel repair."); - parallelismDegree = RepairParallelism.PARALLEL; - } - Collection> repairingRange = createRepairRangeFrom(beginToken, endToken); - - RepairOption options = new RepairOption(parallelismDegree, false, !fullRepair, false, 1, repairingRange); - options.getDataCenters().addAll(dataCenters); - if (hosts != null) - { - options.getHosts().addAll(hosts); - } - if (columnFamilies != null) - { - for (String columnFamily : columnFamilies) - { - options.getColumnFamilies().add(columnFamily); - } - } - - logger.info("starting user-requested repair of range {} for keyspace {} and column families {}", - repairingRange, keyspaceName, columnFamilies); - return forceRepairAsync(keyspaceName, options); - } - - public int forceRepairRangeAsync(String beginToken, - String endToken, - String keyspaceName, - boolean isSequential, - boolean isLocal, - boolean fullRepair, - String... columnFamilies) - { - Set dataCenters = null; - if (isLocal) - { - dataCenters = Sets.newHashSet(DatabaseDescriptor.getLocalDataCenter()); - } - return forceRepairRangeAsync(beginToken, endToken, keyspaceName, isSequential, dataCenters, null, fullRepair, columnFamilies); - } - - /** - * Create collection of ranges that match ring layout from given tokens. - * - * @param beginToken beginning token of the range - * @param endToken end token of the range - * @return collection of ranges that match ring layout in TokenMetadata - */ - @SuppressWarnings("unchecked") - @VisibleForTesting - Collection> createRepairRangeFrom(String beginToken, String endToken) - { - Token parsedBeginToken = getPartitioner().getTokenFactory().fromString(beginToken); - Token parsedEndToken = getPartitioner().getTokenFactory().fromString(endToken); - - // Break up given range to match ring layout in TokenMetadata - ArrayList> repairingRange = new ArrayList<>(); - - ArrayList tokens = new ArrayList<>(_token_metadata.sortedTokens()); - if (!tokens.contains(parsedBeginToken)) - { - tokens.add(parsedBeginToken); - } - if (!tokens.contains(parsedEndToken)) - { - tokens.add(parsedEndToken); - } - // tokens now contain all tokens including our endpoints - Collections.sort(tokens); - - int start = tokens.indexOf(parsedBeginToken), end = tokens.indexOf(parsedEndToken); - for (int i = start; i != end; i = (i+1) % tokens.size()) - { - Range range = new Range<>(tokens.get(i), tokens.get((i+1) % tokens.size())); - repairingRange.add(range); - } - - return repairingRange; - } - - public int forceRepairAsync(String keyspace, RepairOption options) - { - if (options.getRanges().isEmpty() || Keyspace.open(keyspace).getReplicationStrategy().getReplicationFactor() < 2) - return 0; - - int cmd = nextRepairCommand.incrementAndGet(); - new Thread(createRepairTask(cmd, keyspace, options)).start(); - return cmd; - } - - private Thread createQueryThread(final int cmd, final UUID sessionId) - { - return new Thread(new WrappedRunnable() - { - // Query events within a time interval that overlaps the last by one second. Ignore duplicates. Ignore local traces. - // Wake up upon local trace activity. Query when notified of trace activity with a timeout that doubles every two timeouts. - public void runMayThrow() throws Exception - { - TraceState state = Tracing.instance.get(sessionId); - if (state == null) - throw new Exception("no tracestate"); - - String format = "select event_id, source, activity from %s.%s where session_id = ? and event_id > ? and event_id < ?;"; - String query = String.format(format, TraceKeyspace.NAME, TraceKeyspace.EVENTS); - SelectStatement statement = (SelectStatement) QueryProcessor.parseStatement(query).prepare().statement; - - ByteBuffer sessionIdBytes = ByteBufferUtil.bytes(sessionId); - InetAddress source = FBUtilities.getBroadcastAddress(); - - HashSet[] seen = new HashSet[] { new HashSet(), new HashSet() }; - int si = 0; - UUID uuid; - - long tlast = System.currentTimeMillis(), tcur; - - TraceState.Status status; - long minWaitMillis = 125; - long maxWaitMillis = 1000 * 1024L; - long timeout = minWaitMillis; - boolean shouldDouble = false; - - while ((status = state.waitActivity(timeout)) != TraceState.Status.STOPPED) - { - if (status == TraceState.Status.IDLE) - { - timeout = shouldDouble ? Math.min(timeout * 2, maxWaitMillis) : timeout; - shouldDouble = !shouldDouble; - } - else - { - timeout = minWaitMillis; - shouldDouble = false; - } - ByteBuffer tminBytes = ByteBufferUtil.bytes(UUIDGen.minTimeUUID(tlast - 1000)); - ByteBuffer tmaxBytes = ByteBufferUtil.bytes(UUIDGen.maxTimeUUID(tcur = System.currentTimeMillis())); - QueryOptions options = QueryOptions.forInternalCalls(ConsistencyLevel.ONE, Lists.newArrayList(sessionIdBytes, tminBytes, tmaxBytes)); - ResultMessage.Rows rows = statement.execute(QueryState.forInternalCalls(), options); - UntypedResultSet result = UntypedResultSet.create(rows.result); - - for (UntypedResultSet.Row r : result) - { - if (source.equals(r.getInetAddress("source"))) - continue; - if ((uuid = r.getUUID("event_id")).timestamp() > (tcur - 1000) * 10000) - seen[si].add(uuid); - if (seen[si == 0 ? 1 : 0].contains(uuid)) - continue; - String message = String.format("%s: %s", r.getInetAddress("source"), r.getString("activity")); - sendNotification("repair", message, new int[]{cmd, ActiveRepairService.Status.RUNNING.ordinal()}); - } - tlast = tcur; - - si = si == 0 ? 1 : 0; - seen[si].clear(); - } - } - }); - } - - private FutureTask createRepairTask(final int cmd, final String keyspace, final RepairOption options) - { - if (!options.getDataCenters().isEmpty() && options.getDataCenters().contains(DatabaseDescriptor.getLocalDataCenter())) - { - throw new IllegalArgumentException("the local data center must be part of the repair"); - } - - return new FutureTask<>(new WrappedRunnable() - { - protected void runMayThrow() throws Exception - { - final TraceState traceState; - - String[] columnFamilies = options.getColumnFamilies().toArray(new String[options.getColumnFamilies().size()]); - Iterable validColumnFamilies = getValidColumnFamilies(false, false, keyspace, columnFamilies); - - final long startTime = System.currentTimeMillis(); - String message = String.format("Starting repair command #%d, repairing keyspace %s with %s", cmd, keyspace, options); - logger.info(message); - sendNotification("repair", message, new int[]{cmd, ActiveRepairService.Status.STARTED.ordinal()}); - if (options.isTraced()) - { - StringBuilder cfsb = new StringBuilder(); - for (ColumnFamilyStore cfs : validColumnFamilies) - cfsb.append(", ").append(cfs.keyspace.getName()).append(".").append(cfs.name); - - UUID sessionId = Tracing.instance.newSession(Tracing.TraceType.REPAIR); - traceState = Tracing.instance.begin("repair", ImmutableMap.of("keyspace", keyspace, "columnFamilies", cfsb.substring(2))); - Tracing.traceRepair(message); - traceState.enableActivityNotification(); - traceState.setNotificationHandle(new int[]{ cmd, ActiveRepairService.Status.RUNNING.ordinal() }); - Thread queryThread = createQueryThread(cmd, sessionId); - queryThread.setName("RepairTracePolling"); - queryThread.start(); - } - else - { - traceState = null; - } - - final Set allNeighbors = new HashSet<>(); - Map> rangeToNeighbors = new HashMap<>(); - for (Range range : options.getRanges()) - { - try - { - Set neighbors = ActiveRepairService.getNeighbors(keyspace, range, options.getDataCenters(), options.getHosts()); - rangeToNeighbors.put(range, neighbors); - allNeighbors.addAll(neighbors); - } - catch (IllegalArgumentException e) - { - logger.error("Repair failed:", e); - sendNotification("repair", e.getMessage(), new int[]{cmd, ActiveRepairService.Status.FINISHED.ordinal()}); - return; - } - } - - // Validate columnfamilies - List columnFamilyStores = new ArrayList<>(); - try - { - Iterables.addAll(columnFamilyStores, validColumnFamilies); - } - catch (IllegalArgumentException e) - { - sendNotification("repair", e.getMessage(), new int[]{cmd, ActiveRepairService.Status.FINISHED.ordinal()}); - return; - } - - final UUID parentSession; - long repairedAt; - try - { - parentSession = ActiveRepairService.instance.prepareForRepair(allNeighbors, options, columnFamilyStores); - repairedAt = ActiveRepairService.instance.getParentRepairSession(parentSession).repairedAt; - } - catch (Throwable t) - { - sendNotification("repair", String.format("Repair failed with error %s", t.getMessage()), new int[]{cmd, ActiveRepairService.Status.FINISHED.ordinal()}); - return; - } - - // Set up RepairJob executor for this repair command. - final ListeningExecutorService executor = MoreExecutors.listeningDecorator(new JMXConfigurableThreadPoolExecutor(options.getJobThreads(), - Integer.MAX_VALUE, - TimeUnit.SECONDS, - new LinkedBlockingQueue(), - new NamedThreadFactory("Repair#" + cmd), - "internal")); - - List> futures = new ArrayList<>(options.getRanges().size()); - String[] cfnames = new String[columnFamilyStores.size()]; - for (int i = 0; i < columnFamilyStores.size(); i++) - { - cfnames[i] = columnFamilyStores.get(i).name; - } - for (Range range : options.getRanges()) - { - final RepairSession session = ActiveRepairService.instance.submitRepairSession(parentSession, - range, - keyspace, - options.getParallelism(), - rangeToNeighbors.get(range), - repairedAt, - executor, - cfnames); - if (session == null) - continue; - // After repair session completes, notify client its result - Futures.addCallback(session, new FutureCallback() - { - public void onSuccess(RepairSessionResult result) - { - String message = String.format("Repair session %s for range %s finished", session.getId(), session.getRange().toString()); - logger.info(message); - sendNotification("repair", message, new int[]{cmd, ActiveRepairService.Status.SESSION_SUCCESS.ordinal()}); - } - - public void onFailure(Throwable t) - { - String message = String.format("Repair session %s for range %s failed with error %s", session.getId(), session.getRange().toString(), t.getMessage()); - logger.error(message, t); - sendNotification("repair", message, new int[]{cmd, ActiveRepairService.Status.SESSION_FAILED.ordinal()}); - } - }); - futures.add(session); - } - - // After all repair sessions completes(successful or not), - // run anticompaction if necessary and send finish notice back to client - final ListenableFuture> allSessions = Futures.successfulAsList(futures); - Futures.addCallback(allSessions, new FutureCallback>() - { - public void onSuccess(List result) - { - // filter out null(=failed) results and get successful ranges - Collection> successfulRanges = new ArrayList<>(); - for (RepairSessionResult sessionResult : result) - { - if (sessionResult != null) - { - successfulRanges.add(sessionResult.range); - } - } - try - { - ActiveRepairService.instance.finishParentSession(parentSession, allNeighbors, successfulRanges); - } - catch (Exception e) - { - logger.error("Error in incremental repair", e); - } - repairComplete(); - } - - public void onFailure(Throwable t) - { - repairComplete(); - } - - private void repairComplete() - { - String duration = DurationFormatUtils.formatDurationWords(System.currentTimeMillis() - startTime, true, true); - String message = String.format("Repair command #%d finished in %s", cmd, duration); - sendNotification("repair", message, - new int[]{cmd, ActiveRepairService.Status.FINISHED.ordinal()}); - logger.info(message); - if (options.isTraced()) - { - traceState.setNotificationHandle(null); - // Because DebuggableThreadPoolExecutor#afterExecute and this callback - // run in a nondeterministic order (within the same thread), the - // TraceState may have been nulled out at this point. The TraceState - // should be traceState, so just set it without bothering to check if it - // actually was nulled out. - Tracing.instance.set(traceState); - Tracing.traceRepair(message); - Tracing.instance.stopSession(); - } - executor.shutdownNow(); - } - }); - } - }, null); - } - - public void forceTerminateAllRepairSessions() { - ActiveRepairService.instance.terminateSessions(); - } - - /* End of MBean interface methods */ - - /** - * Get the "primary ranges" for the specified keyspace and endpoint. - * "Primary ranges" are the ranges that the node is responsible for storing replica primarily. - * The node that stores replica primarily is defined as the first node returned - * by {@link AbstractReplicationStrategy#calculateNaturalEndpoints}. - * - * @param keyspace Keyspace name to check primary ranges - * @param ep endpoint we are interested in. - * @return primary ranges for the specified endpoint. - */ - public Collection> getPrimaryRangesForEndpoint(String keyspace, InetAddress ep) - { - AbstractReplicationStrategy strategy = Keyspace.open(keyspace).getReplicationStrategy(); - Collection> primaryRanges = new HashSet<>(); - TokenMetadata metadata = _token_metadata.cloneOnlyTokenMap(); - for (Token token : metadata.sortedTokens()) - { - List endpoints = strategy.calculateNaturalEndpoints(token, metadata); - if (endpoints.size() > 0 && endpoints.get(0).equals(ep)) - primaryRanges.add(new Range<>(metadata.getPredecessor(token), token)); - } - return primaryRanges; - } - - /** - * Get the "primary ranges" within local DC for the specified keyspace and endpoint. - * - * @see #getPrimaryRangesForEndpoint(String, java.net.InetAddress) - * @param keyspace Keyspace name to check primary ranges - * @param referenceEndpoint endpoint we are interested in. - * @return primary ranges within local DC for the specified endpoint. - */ - public Collection> getPrimaryRangeForEndpointWithinDC(String keyspace, InetAddress referenceEndpoint) - { - TokenMetadata metadata = _token_metadata.cloneOnlyTokenMap(); - String localDC = DatabaseDescriptor.getEndpointSnitch().getDatacenter(referenceEndpoint); - Collection localDcNodes = metadata.getTopology().getDatacenterEndpoints().get(localDC); - AbstractReplicationStrategy strategy = Keyspace.open(keyspace).getReplicationStrategy(); - - Collection> localDCPrimaryRanges = new HashSet<>(); - for (Token token : metadata.sortedTokens()) - { - List endpoints = strategy.calculateNaturalEndpoints(token, metadata); - for (InetAddress endpoint : endpoints) - { - if (localDcNodes.contains(endpoint)) - { - if (endpoint.equals(referenceEndpoint)) - { - localDCPrimaryRanges.add(new Range<>(metadata.getPredecessor(token), token)); - } - break; - } - } - } - - return localDCPrimaryRanges; - } -#endif - /** - * Get all ranges an endpoint is responsible for (by keyspace) - * Replication strategy's get_ranges() guarantees that no wrap-around range is returned. - * @param ep endpoint we are interested in. - * @return ranges for the specified endpoint. - */ - dht::token_range_vector get_ranges_for_endpoint(const sstring& name, const gms::inet_address& ep) const; - - /** - * Get all ranges that span the ring given a set - * of tokens. All ranges are in sorted order of - * ranges. - * @return ranges in sorted order - */ - dht::token_range_vector get_all_ranges(const std::vector& sorted_tokens) const; - /** - * This method returns the N endpoints that are responsible for storing the - * specified key i.e for replication. - * - * @param keyspaceName keyspace name also known as keyspace - * @param cf Column family name - * @param key key for which we need to find the endpoint - * @return the endpoint responsible for this key - */ - std::vector get_natural_endpoints(const sstring& keyspace, - const sstring& cf, const sstring& key) const; -#if 0 - public List getNaturalEndpoints(String keyspaceName, ByteBuffer key) - { - return getNaturalEndpoints(keyspaceName, getPartitioner().getToken(key)); - } -#endif - /** - * This method returns the N endpoints that are responsible for storing the - * specified key i.e for replication. - * - * @param keyspaceName keyspace name also known as keyspace - * @param pos position for which we need to find the endpoint - * @return the endpoint responsible for this token - */ - std::vector get_natural_endpoints(const sstring& keyspace, const token& pos) const; -#if 0 - /** - * This method attempts to return N endpoints that are responsible for storing the - * specified key i.e for replication. - * - * @param keyspace keyspace name also known as keyspace - * @param key key for which we need to find the endpoint - * @return the endpoint responsible for this key - */ - public List getLiveNaturalEndpoints(Keyspace keyspace, ByteBuffer key) - { - return getLiveNaturalEndpoints(keyspace, getPartitioner().decorateKey(key)); - } - - public List getLiveNaturalEndpoints(Keyspace keyspace, RingPosition pos) - { - List endpoints = keyspace.getReplicationStrategy().getNaturalEndpoints(pos); - List liveEps = new ArrayList<>(endpoints.size()); - - for (InetAddress endpoint : endpoints) - { - if (FailureDetector.instance.isAlive(endpoint)) - liveEps.add(endpoint); - } - - return liveEps; - } - - public void setLoggingLevel(String classQualifier, String rawLevel) throws Exception - { - ch.qos.logback.classic.Logger logBackLogger = (ch.qos.logback.classic.Logger) LoggerFactory.getLogger(classQualifier); - - // if both classQualifer and rawLevel are empty, reload from configuration - if (StringUtils.isBlank(classQualifier) && StringUtils.isBlank(rawLevel) ) - { - JMXConfiguratorMBean jmxConfiguratorMBean = JMX.newMBeanProxy(ManagementFactory.getPlatformMBeanServer(), - new ObjectName("ch.qos.logback.classic:Name=default,Type=ch.qos.logback.classic.jmx.JMXConfigurator"), - JMXConfiguratorMBean.class); - jmxConfiguratorMBean.reloadDefaultConfiguration(); - return; - } - // classQualifer is set, but blank level given - else if (StringUtils.isNotBlank(classQualifier) && StringUtils.isBlank(rawLevel) ) - { - if (logBackLogger.getLevel() != null || hasAppenders(logBackLogger)) - logBackLogger.setLevel(null); - return; - } - - ch.qos.logback.classic.Level level = ch.qos.logback.classic.Level.toLevel(rawLevel); - logBackLogger.setLevel(level); - logger.info("set log level to {} for classes under '{}' (if the level doesn't look like '{}' then the logger couldn't parse '{}')", level, classQualifier, rawLevel, rawLevel); - } - - /** - * @return the runtime logging levels for all the configured loggers - */ - @Override - public MapgetLoggingLevels() { - Map logLevelMaps = Maps.newLinkedHashMap(); - LoggerContext lc = (LoggerContext) LoggerFactory.getILoggerFactory(); - for (ch.qos.logback.classic.Logger logger : lc.getLoggerList()) - { - if(logger.getLevel() != null || hasAppenders(logger)) - logLevelMaps.put(logger.getName(), logger.getLevel().toString()); - } - return logLevelMaps; - } - - private boolean hasAppenders(ch.qos.logback.classic.Logger logger) { - Iterator> it = logger.iteratorForAppenders(); - return it.hasNext(); - } -#endif - /** - * @return Vector of Token ranges (_not_ keys!) together with estimated key count, - * breaking up the data this node is responsible for into pieces of roughly keys_per_split - */ - std::vector> get_splits(const sstring& ks_name, - const sstring& cf_name, - range range, - uint32_t keys_per_split); -public: - future<> decommission(); - -private: - /** - * Broadcast leaving status and update local _token_metadata accordingly - */ - future<> start_leaving(); - void leave_ring(); - void unbootstrap(); - future<> stream_hints(); - -public: - future<> move(sstring new_token) { - // FIXME: getPartitioner().getTokenFactory().validate(newToken); - return move(dht::global_partitioner().from_sstring(new_token)); - } - -private: - /** - * move the node to new token or find a new token to boot to according to load - * - * @param newToken new token to boot to, or if null, find balanced token to boot to - * - * @throws IOException on any I/O operation error - */ - future<> move(token new_token); -public: - - class range_relocator { - private: - streaming::stream_plan _stream_plan; - - public: - range_relocator(std::unordered_set tokens, std::vector keyspace_names) - : _stream_plan("Relocation") { - calculate_to_from_streams(std::move(tokens), std::move(keyspace_names)); - } - - private: - void calculate_to_from_streams(std::unordered_set new_tokens, std::vector keyspace_names); - - public: - future<> stream() { - return _stream_plan.execute().discard_result(); - } - - bool streams_needed() { - return !_stream_plan.is_empty(); - } - }; - - - /** - * Get the status of a token removal. - */ - future get_removal_status(); - - /** - * Force a remove operation to complete. This may be necessary if a remove operation - * blocks forever due to node/stream failure. removeToken() must be called - * first, this is a last resort measure. No further attempt will be made to restore replicas. - */ - future<> force_remove_completion(); - -public: - /** - * Remove a node that has died, attempting to restore the replica count. - * If the node is alive, decommission should be attempted. If decommission - * fails, then removeToken should be called. If we fail while trying to - * restore the replica count, finally forceRemoveCompleteion should be - * called to forcibly remove the node without regard to replica count. - * - * @param hostIdString token for the node - */ - future<> removenode(sstring host_id_string); - - future get_operation_mode(); - - future is_starting(); - - drain_progress get_drain_progress() const { - return _drain_progress; - } - - /** - * Shuts node off to writes, empties memtables and the commit log. - * There are two differences between drain and the normal shutdown hook: - * - Drain waits for in-progress streaming to complete - * - Drain flushes *all* columnfamilies (shutdown hook only flushes non-durable CFs) - */ - future<> drain(); - -#if 0 - // Never ever do this at home. Used by tests. - IPartitioner setPartitionerUnsafe(IPartitioner newPartitioner) - { - IPartitioner oldPartitioner = DatabaseDescriptor.getPartitioner(); - DatabaseDescriptor.setPartitioner(newPartitioner); - valueFactory = new VersionedValue.VersionedValueFactory(getPartitioner()); - return oldPartitioner; - } - - TokenMetadata setTokenMetadataUnsafe(TokenMetadata tmd) - { - TokenMetadata old = _token_metadata; - _token_metadata = tmd; - return old; - } - - public void truncate(String keyspace, String columnFamily) throws TimeoutException, IOException - { - try - { - StorageProxy.truncateBlocking(keyspace, columnFamily); - } - catch (UnavailableException e) - { - throw new IOException(e.getMessage()); - } - } -#endif -public: - future> get_ownership(); - - future> effective_ownership(sstring keyspace_name); -#if 0 - /** - * Calculates ownership. If there are multiple DC's and the replication strategy is DC aware then ownership will be - * calculated per dc, i.e. each DC will have total ring ownership divided amongst its nodes. Without replication - * total ownership will be a multiple of the number of DC's and this value will then go up within each DC depending - * on the number of replicas within itself. For DC unaware replication strategies, ownership without replication - * will be 100%. - * - * @throws IllegalStateException when node is not configured properly. - */ - public LinkedHashMap effectiveOwnership(String keyspace) throws IllegalStateException - { - - if (keyspace != null) - { - Keyspace keyspaceInstance = Schema.instance.getKeyspaceInstance(keyspace); - if(keyspaceInstance == null) - throw new IllegalArgumentException("The keyspace " + keyspace + ", does not exist"); - - if(keyspaceInstance.getReplicationStrategy() instanceof LocalStrategy) - throw new IllegalStateException("Ownership values for keyspaces with LocalStrategy are meaningless"); - } - else - { - List nonSystemKeyspaces = Schema.instance.getNonSystemKeyspaces(); - - //system_traces is a non-system keyspace however it needs to be counted as one for this process - int specialTableCount = 0; - if (nonSystemKeyspaces.contains("system_traces")) - { - specialTableCount += 1; - } - if (nonSystemKeyspaces.size() > specialTableCount) - throw new IllegalStateException("Non-system keyspaces don't have the same replication settings, effective ownership information is meaningless"); - - keyspace = "system_traces"; - } - - TokenMetadata metadata = _token_metadata.cloneOnlyTokenMap(); - - Collection> endpointsGroupedByDc = new ArrayList<>(); - // mapping of dc's to nodes, use sorted map so that we get dcs sorted - SortedMap> sortedDcsToEndpoints = new TreeMap<>(); - sortedDcsToEndpoints.putAll(metadata.getTopology().getDatacenterEndpoints().asMap()); - for (Collection endpoints : sortedDcsToEndpoints.values()) - endpointsGroupedByDc.add(endpoints); - - Map tokenOwnership = getPartitioner().describeOwnership(_token_metadata.sortedTokens()); - LinkedHashMap finalOwnership = Maps.newLinkedHashMap(); - - // calculate ownership per dc - for (Collection endpoints : endpointsGroupedByDc) - { - // calculate the ownership with replication and add the endpoint to the final ownership map - for (InetAddress endpoint : endpoints) - { - float ownership = 0.0f; - for (Range range : getRangesForEndpoint(keyspace, endpoint)) - { - if (tokenOwnership.containsKey(range.right)) - ownership += tokenOwnership.get(range.right); - } - finalOwnership.put(endpoint, ownership); - } - } - return finalOwnership; - } - - - private boolean hasSameReplication(List list) - { - if (list.isEmpty()) - return false; - - for (int i = 0; i < list.size() -1; i++) - { - KSMetaData ksm1 = Schema.instance.getKSMetaData(list.get(i)); - KSMetaData ksm2 = Schema.instance.getKSMetaData(list.get(i + 1)); - if (!ksm1.strategyClass.equals(ksm2.strategyClass) || - !Iterators.elementsEqual(ksm1.strategyOptions.entrySet().iterator(), - ksm2.strategyOptions.entrySet().iterator())) - return false; - } - return true; - } - - public List getKeyspaces() - { - List keyspaceNamesList = new ArrayList<>(Schema.instance.getKeyspaces()); - return Collections.unmodifiableList(keyspaceNamesList); - } - - public List getNonSystemKeyspaces() - { - List keyspaceNamesList = new ArrayList<>(Schema.instance.getNonSystemKeyspaces()); - return Collections.unmodifiableList(keyspaceNamesList); - } - - public void updateSnitch(String epSnitchClassName, Boolean dynamic, Integer dynamicUpdateInterval, Integer dynamicResetInterval, Double dynamicBadnessThreshold) throws ClassNotFoundException - { - IEndpointSnitch oldSnitch = DatabaseDescriptor.getEndpointSnitch(); - - // new snitch registers mbean during construction - IEndpointSnitch newSnitch; - try - { - newSnitch = FBUtilities.construct(epSnitchClassName, "snitch"); - } - catch (ConfigurationException e) - { - throw new ClassNotFoundException(e.getMessage()); - } - if (dynamic) - { - DatabaseDescriptor.setDynamicUpdateInterval(dynamicUpdateInterval); - DatabaseDescriptor.setDynamicResetInterval(dynamicResetInterval); - DatabaseDescriptor.setDynamicBadnessThreshold(dynamicBadnessThreshold); - newSnitch = new DynamicEndpointSnitch(newSnitch); - } - - // point snitch references to the new instance - DatabaseDescriptor.setEndpointSnitch(newSnitch); - for (String ks : Schema.instance.getKeyspaces()) - { - Keyspace.open(ks).getReplicationStrategy().snitch = newSnitch; - } - - if (oldSnitch instanceof DynamicEndpointSnitch) - ((DynamicEndpointSnitch)oldSnitch).unregisterMBean(); - } -#endif - -private: - /** - * Seed data to the endpoints that will be responsible for it at the future - * - * @param rangesToStreamByKeyspace keyspaces and data ranges with endpoints included for each - * @return async Future for whether stream was success - */ - future<> stream_ranges(std::unordered_map> ranges_to_stream_by_keyspace); - -public: - /** - * Calculate pair of ranges to stream/fetch for given two range collections - * (current ranges for keyspace and ranges after move to new token) - * - * @param current collection of the ranges by current token - * @param updated collection of the ranges after token is changed - * @return pair of ranges to stream/fetch for given current and updated range collections - */ - std::pair, std::unordered_set> - calculate_stream_and_fetch_ranges(const dht::token_range_vector& current, const dht::token_range_vector& updated); -#if 0 - public void bulkLoad(String directory) - { - try - { - bulkLoadInternal(directory).get(); - } - catch (Exception e) - { - throw new RuntimeException(e); - } - } - - public String bulkLoadAsync(String directory) - { - return bulkLoadInternal(directory).planId.toString(); - } - - private StreamResultFuture bulkLoadInternal(String directory) - { - File dir = new File(directory); - - if (!dir.exists() || !dir.isDirectory()) - throw new IllegalArgumentException("Invalid directory " + directory); - - SSTableLoader.Client client = new SSTableLoader.Client() - { - public void init(String keyspace) - { - try - { - setPartitioner(DatabaseDescriptor.getPartitioner()); - for (Map.Entry, List> entry : StorageService.instance.getRangeToAddressMap(keyspace).entrySet()) - { - Range range = entry.getKey(); - for (InetAddress endpoint : entry.getValue()) - addRangeForEndpoint(range, endpoint); - } - } - catch (Exception e) - { - throw new RuntimeException(e); - } - } - - public CFMetaData getCFMetaData(String keyspace, String cfName) - { - return Schema.instance.getCFMetaData(keyspace, cfName); - } - }; - - SSTableLoader loader = new SSTableLoader(dir, client, new OutputHandler.LogOutput()); - return loader.stream(); - } -#endif -public: - int32_t get_exception_count(); -#if 0 - public void rescheduleFailedDeletions() - { - SSTableDeletingTask.rescheduleFailedTasks(); - } -#endif - /** - * Load new SSTables not currently tracked by the system - * - * This can be called, for instance, after copying a batch of SSTables to a CF directory. - * - * This should not be called in parallel for the same keyspace / column family, and doing - * so will throw an std::runtime_exception. - * - * @param ks_name the keyspace in which to search for new SSTables. - * @param cf_name the column family in which to search for new SSTables. - * @return a future<> when the operation finishes. - */ - future<> load_new_sstables(sstring ks_name, sstring cf_name); -#if 0 - /** - * #{@inheritDoc} - */ - public List sampleKeyRange() // do not rename to getter - see CASSANDRA-4452 for details - { - List keys = new ArrayList<>(); - for (Keyspace keyspace : Keyspace.nonSystem()) - { - for (Range range : getPrimaryRangesForEndpoint(keyspace.getName(), FBUtilities.getBroadcastAddress())) - keys.addAll(keySamples(keyspace.getColumnFamilyStores(), range)); - } - - List sampledKeys = new ArrayList<>(keys.size()); - for (DecoratedKey key : keys) - sampledKeys.add(key.getToken().toString()); - return sampledKeys; - } - - public void rebuildSecondaryIndex(String ksName, String cfName, String... idxNames) - { - ColumnFamilyStore.rebuildSecondaryIndex(ksName, cfName, idxNames); - } - - public void resetLocalSchema() throws IOException - { - MigrationManager.resetLocalSchema(); - } - - public void setTraceProbability(double probability) - { - this.traceProbability = probability; - } - - public double getTraceProbability() - { - return traceProbability; - } - - public void disableAutoCompaction(String ks, String... columnFamilies) throws IOException - { - for (ColumnFamilyStore cfs : getValidColumnFamilies(true, true, ks, columnFamilies)) - { - cfs.disableAutoCompaction(); - } - } - - public void enableAutoCompaction(String ks, String... columnFamilies) throws IOException - { - for (ColumnFamilyStore cfs : getValidColumnFamilies(true, true, ks, columnFamilies)) - { - cfs.enableAutoCompaction(); - } - } - - /** Returns the name of the cluster */ - public String getClusterName() - { - return DatabaseDescriptor.getClusterName(); - } - - /** Returns the cluster partitioner */ - public String getPartitionerName() - { - return DatabaseDescriptor.getPartitionerName(); - } - - public int getTombstoneWarnThreshold() - { - return DatabaseDescriptor.getTombstoneWarnThreshold(); - } - - public void setTombstoneWarnThreshold(int threshold) - { - DatabaseDescriptor.setTombstoneWarnThreshold(threshold); - } - - public int getTombstoneFailureThreshold() - { - return DatabaseDescriptor.getTombstoneFailureThreshold(); - } - - public void setTombstoneFailureThreshold(int threshold) - { - DatabaseDescriptor.setTombstoneFailureThreshold(threshold); - } - - public int getBatchSizeFailureThreshold() - { - return DatabaseDescriptor.getBatchSizeFailThresholdInKB(); - } - - public void setBatchSizeFailureThreshold(int threshold) - { - DatabaseDescriptor.setBatchSizeFailThresholdInKB(threshold); - } - - public void setHintedHandoffThrottleInKB(int throttleInKB) - { - DatabaseDescriptor.setHintedHandoffThrottleInKB(throttleInKB); - logger.info(String.format("Updated hinted_handoff_throttle_in_kb to %d", throttleInKB)); - } -#endif - - template - auto run_with_api_lock(sstring operation, Func&& func) { - return get_storage_service().invoke_on(0, [operation = std::move(operation), - func = std::forward(func)] (storage_service& ss) mutable { - if (!ss._operation_in_progress.empty()) { - throw std::runtime_error(sprint("Operation %s is in progress, try again", ss._operation_in_progress)); - } - ss._operation_in_progress = std::move(operation); - return func(ss).finally([&ss] { - ss._operation_in_progress = sstring(); - }); - }); - } - - template - auto run_with_no_api_lock(Func&& func) { - return get_storage_service().invoke_on(0, [func = std::forward(func)] (storage_service& ss) mutable { - return func(ss); - }); - } -private: - void do_isolate_on_error(disk_error type); - utils::UUID _local_host_id; -public: - utils::UUID get_local_id() { return _local_host_id; } - - static sstring get_config_supported_features(); - - bool cluster_supports_range_tombstones() { - return bool(_range_tombstones_feature); - } - - bool cluster_supports_large_partitions() const { - return bool(_large_partitions_feature); - } - - bool cluster_supports_materialized_views() const { - return bool(_materialized_views_feature); - } - - bool cluster_supports_counters() const { - return bool(_counters_feature); - } - - bool cluster_supports_indexes() const { - return bool(_indexes_feature); - } -}; - -inline future<> init_storage_service(distributed& db) { - return service::get_storage_service().start(std::ref(db)); -} - -inline future<> deinit_storage_service() { - return service::get_storage_service().stop(); -} - -} diff --git a/scylla/sstable_mutation_readers.hh b/scylla/sstable_mutation_readers.hh deleted file mode 100644 index 9995e4f..0000000 --- a/scylla/sstable_mutation_readers.hh +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "sstables/sstables.hh" -#include "query-request.hh" -#include "mutation_reader.hh" - -class sstable_range_wrapping_reader final : public mutation_reader::impl { - lw_shared_ptr _sst; - sstables::mutation_reader _smr; -public: - sstable_range_wrapping_reader(lw_shared_ptr sst, - schema_ptr s, - const dht::partition_range& pr, - const query::partition_slice& slice, - const io_priority_class& pc, - streamed_mutation::forwarding fwd, - mutation_reader::forwarding fwd_mr) - : _sst(sst) - , _smr(sst->read_range_rows(std::move(s), pr, slice, pc, fwd, fwd_mr)) { - } - virtual future operator()() override { - return _smr.read(); - } - virtual future<> fast_forward_to(const dht::partition_range& pr) override { - return _smr.fast_forward_to(pr); - } -}; diff --git a/scylla/sstables/atomic_deletion.cc b/scylla/sstables/atomic_deletion.cc deleted file mode 100644 index a4d0d75..0000000 --- a/scylla/sstables/atomic_deletion.cc +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Copyright (C) 2016 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "atomic_deletion.hh" -#include "to_string.hh" -#include -#include -#include - -namespace sstables { - -atomic_deletion_manager::atomic_deletion_manager(unsigned shard_count, - std::function (std::vector sstables)> delete_sstables) - : _shard_count(shard_count) - , _delete_sstables(std::move(delete_sstables)) { -} - -future<> -atomic_deletion_manager::delete_atomically(std::vector atomic_deletion_set, unsigned deleting_shard) { - // runs on shard 0 only - _deletion_logger.debug("shard {} atomically deleting {}", deleting_shard, atomic_deletion_set); - - if (_atomic_deletions_cancelled) { - _deletion_logger.debug("atomic deletions disabled, erroring out"); - using boost::adaptors::transformed; - throw atomic_deletion_cancelled(atomic_deletion_set - | transformed(std::mem_fn(&sstable_to_delete::name))); - } - - // Insert atomic_deletion_set into the list of sets pending deletion. If the new set - // overlaps with an existing set, merge them (the merged set will be deleted atomically). - auto merged_set = make_lw_shared(pending_deletion()); - for (auto&& sst_to_delete : atomic_deletion_set) { - merged_set->names.insert(sst_to_delete.name); - if (!sst_to_delete.shared) { - for (auto shard : boost::irange(0, _shard_count)) { - _shards_agreeing_to_delete_sstable[sst_to_delete.name].insert(shard); - } - } - } - auto pr = make_lw_shared>(); - merged_set->completions.insert(pr); - auto ret = pr->get_future(); - for (auto&& sst_to_delete : atomic_deletion_set) { - auto i = _atomic_deletion_sets.find(sst_to_delete.name); - // merge from old deletion set to new deletion set - // i->second can be nullptr, see below why - if (i != _atomic_deletion_sets.end() && i->second) { - boost::copy(i->second->names, std::inserter(merged_set->names, merged_set->names.end())); - boost::copy(i->second->completions, std::inserter(merged_set->completions, merged_set->completions.end())); - } - } - _deletion_logger.debug("new atomic set: {}", merged_set->names); - // we need to merge new_atomic_deletion_sets into g_atomic_deletion_sets, - // but beware of exceptions. We do that with a first pass that inserts - // nullptr as the value, so the second pass only replaces, and does not allocate - for (auto&& sst_to_delete : atomic_deletion_set) { - _atomic_deletion_sets.emplace(sst_to_delete.name, nullptr); - } - // now, no allocations are involved, so this commits the operation atomically - for (auto&& n : merged_set->names) { - auto i = _atomic_deletion_sets.find(n); - i->second = merged_set; - } - - // Mark each sstable as being deleted from deleting_shard. We have to do - // this in a separate pass, so the consideration whether we can delete or not - // sees all the data from this pass. - for (auto&& sst : atomic_deletion_set) { - _shards_agreeing_to_delete_sstable[sst.name].insert(deleting_shard); - } - - // Figure out if the (possibly merged) set can be deleted - for (auto&& sst : merged_set->names) { - if (_shards_agreeing_to_delete_sstable[sst].size() != _shard_count) { - // Not everyone agrees, leave the set pending - _deletion_logger.debug("deferring deletion until all shards agree"); - return ret; - } - } - - // Cannot recover from a failed deletion - for (auto&& name : merged_set->names) { - _atomic_deletion_sets.erase(name); - _shards_agreeing_to_delete_sstable.erase(name); - } - - // Everyone agrees, let's delete - auto names = boost::copy_range>(merged_set->names); - _deletion_logger.debug("deleting {}", names); - // Run deletion in the background; all callers are waiting for it via merged_set->completions - _delete_sstables(names).then_wrapped([this, merged_set] (future<> result) { - _deletion_logger.debug("atomic deletion completed: {}", merged_set->names); - shared_future<> sf(std::move(result)); - for (auto&& comp : merged_set->completions) { - sf.get_future().forward_to(std::move(*comp)); - } - }); - return ret; -} - -void -atomic_deletion_manager::cancel_atomic_deletions() { - _atomic_deletions_cancelled = true; - for (auto&& pd : _atomic_deletion_sets) { - if (!pd.second) { - // Could happen if a delete_atomically() failed - continue; - } - for (auto&& c : pd.second->completions) { - c->set_exception(atomic_deletion_cancelled(pd.second->names)); - } - // since sets are shared, make sure we don't hit the same one again - pd.second->completions.clear(); - } - _atomic_deletion_sets.clear(); - _shards_agreeing_to_delete_sstable.clear(); -} - -} diff --git a/scylla/sstables/atomic_deletion.hh b/scylla/sstables/atomic_deletion.hh deleted file mode 100644 index 2ee8c48..0000000 --- a/scylla/sstables/atomic_deletion.hh +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright (C) 2016 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -// The atomic deletion manager solves the problem of orchestrating -// the deletion of files that must be deleted as a group, where each -// shard has different groups, and all shards delete a file for it to -// be deleted. For example, -// -// shard 0: delete "A" -// we can't delete anything because shard 1 hasn't agreed yet. -// shard 1: delete "A" and B" -// shard 1 agrees to delete "A", but we can't delete it yet, -// because shard 1 requires that it be deleted together with "B", -// and shard 0 hasn't agreed to delete "B" yet. -// shard 0: delete "B" and "C" -// shards 0 and 1 now both agree to delete "A" and "B", but shard 0 -// doesn't allow us to delete "B" without "C". -// shard 1: delete "C" -// finally, we can delete "A", "B", and "C". - -#include "log.hh" -#include -#include -#include -#include -#include // for shard_id -#include -#include -#include - -#include "seastarx.hh" - -namespace sstables { - -struct sstable_to_delete { - sstable_to_delete(sstring name, bool shared) : name(std::move(name)), shared(shared) {} - sstring name; - bool shared = false; - friend std::ostream& operator<<(std::ostream& os, const sstable_to_delete& std); -}; - -class atomic_deletion_cancelled : public std::exception { - std::string _msg; -public: - explicit atomic_deletion_cancelled(std::vector names); - template - explicit atomic_deletion_cancelled(StringRange range) - : atomic_deletion_cancelled(std::vector{range.begin(), range.end()}) { - } - const char* what() const noexcept override; -}; - -class atomic_deletion_manager { - logging::logger _deletion_logger{"sstable-deletion"}; - using shards_agreeing_to_delete_sstable_type = std::unordered_set; - using sstables_to_delete_atomically_type = std::set; - struct pending_deletion { - sstables_to_delete_atomically_type names; - std::unordered_set>> completions; - }; - bool _atomic_deletions_cancelled = false; - // map from sstable name to a set of sstables that must be deleted atomically, including itself - std::unordered_map> _atomic_deletion_sets; - std::unordered_map _shards_agreeing_to_delete_sstable; - unsigned _shard_count; - std::function (std::vector sstables)> _delete_sstables; -public: - atomic_deletion_manager(unsigned shard_count, - std::function (std::vector sstables)> delete_sstables); - future<> delete_atomically(std::vector atomic_deletion_set, unsigned deleting_shard); - void cancel_atomic_deletions(); -}; - -} diff --git a/scylla/sstables/binary_search.hh b/scylla/sstables/binary_search.hh deleted file mode 100644 index 49d269f..0000000 --- a/scylla/sstables/binary_search.hh +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (C) 2017 ScyllaDB - * - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "sstables/key.hh" -#include "dht/i_partitioner.hh" - -namespace sstables { - -/** - * @returns: >= 0, if key is found. That is the index where the key is found. - * -1, if key is not found, and is smaller than the first key in the list. - * <= -2, if key is not found, but is greater than one of the keys. By adding 2 and - * negating, one can determine the index before which the key would have to - * be inserted. - * - * Origin uses this slightly modified binary search for the Summary, that will - * indicate in which bucket the element would be in case it is not a match. - * - * For the Index entries, it uses a "normal", java.lang binary search. Because - * we have made the explicit decision to open code the comparator for - * efficiency, using a separate binary search would be possible, but very - * messy. - * - * It's easier to reuse the same code for both binary searches, and just ignore - * the extra information when not needed. - * - * This code should work in all kinds of vectors in whose's elements is possible to aquire - * a key view via get_key(). - */ -template -int binary_search(const T& entries, const key& sk, const dht::token& token) { - int low = 0, mid = entries.size(), high = mid - 1, result = -1; - - auto& partitioner = dht::global_partitioner(); - - while (low <= high) { - // The token comparison should yield the right result most of the time. - // So we avoid expensive copying operations that happens at key - // creation by keeping only a key view, and then manually carrying out - // both parts of the comparison ourselves. - mid = low + ((high - low) >> 1); - key_view mid_key = entries[mid].get_key(); - auto mid_token = partitioner.get_token(mid_key); - - if (token == mid_token) { - result = sk.tri_compare(mid_key); - } else { - result = token < mid_token ? -1 : 1; - } - - if (result > 0) { - low = mid + 1; - } else if (result < 0) { - high = mid - 1; - } else { - return mid; - } - } - - return -mid - (result < 0 ? 1 : 2); -} - -template -int binary_search(const T& entries, const key& sk) { - return binary_search(entries, sk, dht::global_partitioner().get_token(key_view(sk))); -} - -} diff --git a/scylla/sstables/column_name_helper.hh b/scylla/sstables/column_name_helper.hh deleted file mode 100644 index d2e6ee0..0000000 --- a/scylla/sstables/column_name_helper.hh +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "core/sstring.hh" -#include "schema.hh" -#include "compound_compat.hh" -#include -#include -#include - -class column_name_helper { -private: - static inline void may_grow(std::vector& v, size_t target_size) { - if (target_size > v.size()) { - v.resize(target_size); - } - } -public: - template - static void min_max_components(const schema& schema, std::vector& min_seen, std::vector& max_seen, T components) { - may_grow(min_seen, schema.clustering_key_size()); - may_grow(max_seen, schema.clustering_key_size()); - - auto& types = schema.clustering_key_type()->types(); - auto i = 0U; - for (auto& value : components) { - auto& type = types[i]; - - if (!max_seen[i] || type->compare(value, max_seen[i].value()) > 0) { - max_seen[i] = bytes(value.data(), value.size()); - } - if (!min_seen[i] || type->compare(value, min_seen[i].value()) < 0) { - min_seen[i] = bytes(value.data(), value.size()); - } - i++; - } - } -}; diff --git a/scylla/sstables/compaction.cc b/scylla/sstables/compaction.cc deleted file mode 100644 index 3ceff40..0000000 --- a/scylla/sstables/compaction.cc +++ /dev/null @@ -1,615 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include "core/future-util.hh" -#include "core/pipe.hh" - -#include "sstables.hh" -#include "compaction.hh" -#include "database.hh" -#include "mutation_reader.hh" -#include "schema.hh" -#include "db/system_keyspace.hh" -#include "db/query_context.hh" -#include "service/storage_service.hh" -#include "service/priority_manager.hh" -#include "db_clock.hh" -#include "mutation_compactor.hh" -#include "leveled_manifest.hh" - -namespace sstables { - -logging::logger clogger("compaction"); - -class sstable_reader final : public ::mutation_reader::impl { - shared_sstable _sst; - mutation_reader _reader; -public: - sstable_reader(shared_sstable sst, schema_ptr schema) - : _sst(std::move(sst)) - , _reader(_sst->read_rows(schema, service::get_local_compaction_priority())) - {} - virtual future operator()() override { - return _reader.read().handle_exception([sst = _sst] (auto ep) { - clogger.error("Compaction found an exception when reading sstable {} : {}", - sst->get_filename(), ep); - return make_exception_future(ep); - }); - } -}; - -static api::timestamp_type get_max_purgeable_timestamp(const column_family& cf, sstable_set::incremental_selector& selector, - const std::unordered_set& compacting_set, const dht::decorated_key& dk) { - auto timestamp = api::max_timestamp; - stdx::optional hk; - for (auto&& sst : boost::range::join(selector.select(dk.token()), cf.compacted_undeleted_sstables())) { - if (compacting_set.count(sst)) { - continue; - } - if (!hk) { - hk = sstables::sstable::make_hashed_key(*cf.schema(), dk.key()); - } - if (sst->filter_has_key(*hk)) { - timestamp = std::min(timestamp, sst->get_stats_metadata().min_timestamp); - } - } - return timestamp; -} - -static bool belongs_to_current_node(const dht::token& t, const dht::token_range_vector& sorted_owned_ranges) { - auto low = std::lower_bound(sorted_owned_ranges.begin(), sorted_owned_ranges.end(), t, - [] (const range& a, const dht::token& b) { - // check that range a is before token b. - return a.after(b, dht::token_comparator()); - }); - - if (low != sorted_owned_ranges.end()) { - const dht::token_range& r = *low; - return r.contains(t, dht::token_comparator()); - } - - return false; -} - -static void delete_sstables_for_interrupted_compaction(std::vector& new_sstables, sstring& ks, sstring& cf) { - // Delete either partially or fully written sstables of a compaction that - // was either stopped abruptly (e.g. out of disk space) or deliberately - // (e.g. nodetool stop COMPACTION). - for (auto& sst : new_sstables) { - clogger.debug("Deleting sstable {} of interrupted compaction for {}.{}", sst->get_filename(), ks, cf); - sst->mark_for_deletion(); - } -} - -static std::vector get_uncompacting_sstables(column_family& cf, std::vector& sstables) { - auto all_sstables = boost::copy_range>(*cf.get_sstables_including_compacted_undeleted()); - boost::sort(all_sstables, [] (const shared_sstable& x, const shared_sstable& y) { - return x->generation() < y->generation(); - }); - std::sort(sstables.begin(), sstables.end(), [] (const shared_sstable& x, const shared_sstable& y) { - return x->generation() < y->generation(); - }); - std::vector not_compacted_sstables; - boost::set_difference(all_sstables, sstables, - std::back_inserter(not_compacted_sstables), [] (const shared_sstable& x, const shared_sstable& y) { - return x->generation() < y->generation(); - }); - return not_compacted_sstables; -} - -class compaction; - -class compacting_sstable_writer { - compaction& _c; - sstable_writer* _writer = nullptr; -public: - explicit compacting_sstable_writer(compaction& c) : _c(c) {} - - void consume_new_partition(const dht::decorated_key& dk); - - void consume(tombstone t) { _writer->consume(t); } - stop_iteration consume(static_row&& sr, tombstone, bool) { return _writer->consume(std::move(sr)); } - stop_iteration consume(clustering_row&& cr, row_tombstone, bool) { return _writer->consume(std::move(cr)); } - stop_iteration consume(range_tombstone&& rt) { return _writer->consume(std::move(rt)); } - - stop_iteration consume_end_of_partition(); - void consume_end_of_stream(); -}; - -class compaction { -protected: - column_family& _cf; - std::vector _sstables; - uint64_t _max_sstable_size; - uint32_t _sstable_level; - lw_shared_ptr _info = make_lw_shared(); - uint64_t _estimated_partitions = 0; - std::vector _ancestors; - db::replay_position _rp; - seastar::thread_scheduling_group* _tsg; -protected: - compaction(column_family& cf, std::vector sstables, uint64_t max_sstable_size, uint32_t sstable_level, seastar::thread_scheduling_group* tsg) - : _cf(cf) - , _sstables(std::move(sstables)) - , _max_sstable_size(max_sstable_size) - , _sstable_level(sstable_level) - , _tsg(tsg) - { - _cf.get_compaction_manager().register_compaction(_info); - } - - uint64_t partitions_per_sstable() const { - uint64_t estimated_sstables = std::max(1UL, uint64_t(ceil(double(_info->start_size) / _max_sstable_size))); - return ceil(double(_estimated_partitions) / estimated_sstables); - } - - void setup_new_sstable(shared_sstable& sst) { - _info->new_sstables.push_back(sst); - sst->get_metadata_collector().set_replay_position(_rp); - sst->get_metadata_collector().sstable_level(_sstable_level); - for (auto ancestor : _ancestors) { - sst->add_ancestor(ancestor); - } - } - - void finish_new_sstable(stdx::optional& writer, shared_sstable& sst) { - writer->consume_end_of_stream(); - writer = stdx::nullopt; - sst->open_data().get0(); - _info->end_size += sst->bytes_on_disk(); - } -public: - compaction& operator=(const compaction&) = delete; - compaction(const compaction&) = delete; - - virtual ~compaction() { - _cf.get_compaction_manager().deregister_compaction(_info); - } - - seastar::thread_attributes thread_attributes() { - seastar::thread_attributes attr; - attr.scheduling_group = _tsg; - return attr; - } -private: - ::mutation_reader setup() { - std::vector<::mutation_reader> readers; - auto schema = _cf.schema(); - sstring formatted_msg = "["; - - for (auto& sst : _sstables) { - // We also capture the sstable, so we keep it alive while the read isn't done - readers.emplace_back(make_mutation_reader(sst, schema)); - // FIXME: If the sstables have cardinality estimation bitmaps, use that - // for a better estimate for the number of partitions in the merged - // sstable than just adding up the lengths of individual sstables. - _estimated_partitions += sst->get_estimated_key_count(); - _info->total_partitions += sst->get_estimated_key_count(); - // Compacted sstable keeps track of its ancestors. - _ancestors.push_back(sst->generation()); - formatted_msg += sprint("%s:level=%d, ", sst->get_filename(), sst->get_sstable_level()); - _info->start_size += sst->bytes_on_disk(); - // TODO: - // Note that this is not fully correct. Since we might be merging sstables that originated on - // another shard (#cpu changed), we might be comparing RP:s with differing shard ids, - // which might vary in "comparable" size quite a bit. However, since the worst that happens - // is that we might miss a high water mark for the commit log replayer, - // this is kind of ok, esp. since we will hopefully not be trying to recover based on - // compacted sstables anyway (CL should be clean by then). - _rp = std::max(_rp, sst->get_stats_metadata().position); - } - formatted_msg += "]"; - _info->sstables = _sstables.size(); - _info->ks = schema->ks_name(); - _info->cf = schema->cf_name(); - report_start(formatted_msg); - - return ::make_combined_reader(std::move(readers)); - } - - void finish(std::chrono::time_point started_at, std::chrono::time_point ended_at) { - auto ratio = double(_info->end_size) / double(_info->start_size); - auto duration = std::chrono::duration(ended_at - started_at); - auto throughput = (double(_info->end_size) / (1024*1024)) / duration.count(); - sstring new_sstables_msg; - for (auto& newtab : _info->new_sstables) { - new_sstables_msg += sprint("%s:level=%d, ", newtab->get_filename(), newtab->get_sstable_level()); - } - - // FIXME: there is some missing information in the log message below. - // look at CompactionTask::runMayThrow() in origin for reference. - // - add support to merge summary (message: Partition merge counts were {%s}.). - // - there is no easy way, currently, to know the exact number of total partitions. - // By the time being, using estimated key count. - sstring formatted_msg = sprint("%ld sstables to [%s]. %ld bytes to %ld (~%d%% of original) in %dms = %.2fMB/s. " \ - "~%ld total partitions merged to %ld.", - _info->sstables, new_sstables_msg, _info->start_size, _info->end_size, int(ratio * 100), - std::chrono::duration_cast(duration).count(), throughput, - _info->total_partitions, _info->total_keys_written); - report_finish(formatted_msg, ended_at); - } - - virtual void report_start(const sstring& formatted_msg) const = 0; - virtual void report_finish(const sstring& formatted_msg, std::chrono::time_point ended_at) const = 0; - - virtual std::function max_purgeable_func() { - return [] (const dht::decorated_key& dk) { - return api::min_timestamp; - }; - } - - virtual std::function filter_func() const { - return [] (const streamed_mutation& sm) { - return true; - }; - } - - // select a sstable writer based on decorated key. - virtual sstable_writer* select_sstable_writer(const dht::decorated_key& dk) = 0; - // stop current writer - virtual void stop_sstable_writer() = 0; - // finish all writers. - virtual void finish_sstable_writer() = 0; - - compacting_sstable_writer get_compacting_sstable_writer() { - return compacting_sstable_writer(*this); - } - - const schema_ptr& schema() const { - return _cf.schema(); - } -public: - static future> run(std::unique_ptr c); - - friend class compacting_sstable_writer; -}; - -void compacting_sstable_writer::consume_new_partition(const dht::decorated_key& dk) { - if (_c._info->is_stop_requested()) { - // Compaction manager will catch this exception and re-schedule the compaction. - throw compaction_stop_exception(_c._info->ks, _c._info->cf, _c._info->stop_requested); - } - _writer = _c.select_sstable_writer(dk); - _writer->consume_new_partition(dk); - _c._info->total_keys_written++; -} - -stop_iteration compacting_sstable_writer::consume_end_of_partition() { - auto ret = _writer->consume_end_of_partition(); - if (ret == stop_iteration::yes) { - // stop sstable writer being currently used. - _c.stop_sstable_writer(); - } - return ret; -} - -void compacting_sstable_writer::consume_end_of_stream() { - // this will stop any writer opened by compaction. - _c.finish_sstable_writer(); -} - -class regular_compaction : public compaction { - std::function _creator; - // store a clone of sstable set for column family, which needs to be alive for incremental selector. - const sstable_set _set; - // used to incrementally calculate max purgeable timestamp, as we iterate through decorated keys. - sstable_set::incremental_selector _selector; - // sstable being currently written. - shared_sstable _sst; - stdx::optional _writer; -public: - regular_compaction(column_family& cf, std::vector sstables, std::function creator, - uint64_t max_sstable_size, uint32_t sstable_level, seastar::thread_scheduling_group* tsg) - : compaction(cf, std::move(sstables), max_sstable_size, sstable_level, tsg) - , _creator(std::move(creator)) - , _set(cf.get_sstable_set()) - , _selector(_set.make_incremental_selector()) - { - } - - void report_start(const sstring& formatted_msg) const override { - clogger.info("Compacting {}", formatted_msg); - } - - void report_finish(const sstring& formatted_msg, std::chrono::time_point ended_at) const override { - clogger.info("Compacted {}", formatted_msg); - - // skip update if running without a query context, for example, when running a test case. - if (!db::qctx) { - return; - } - // FIXME: add support to merged_rows. merged_rows is a histogram that - // shows how many sstables each row is merged from. This information - // cannot be accessed until we make combined_reader more generic, - // for example, by adding a reducer method. - auto compacted_at = std::chrono::duration_cast(ended_at.time_since_epoch()).count(); - db::system_keyspace::update_compaction_history(_info->ks, _info->cf, compacted_at, - _info->start_size, _info->end_size, std::unordered_map{}).get0(); - } - - virtual std::function max_purgeable_func() override { - std::unordered_set compacting(_sstables.begin(), _sstables.end()); - return [this, compacting = std::move(compacting)] (const dht::decorated_key& dk) { - return get_max_purgeable_timestamp(_cf, _selector, compacting, dk); - }; - } - - virtual std::function filter_func() const override { - return [] (const streamed_mutation& sm) { - return dht::shard_of(sm.decorated_key().token()) == engine().cpu_id(); - }; - } - - virtual sstable_writer* select_sstable_writer(const dht::decorated_key& dk) override { - if (!_writer) { - _sst = _creator(); - setup_new_sstable(_sst); - - auto&& priority = service::get_local_compaction_priority(); - sstable_writer_config cfg; - cfg.max_sstable_size = _max_sstable_size; - _writer.emplace(_sst->get_writer(*_cf.schema(), partitions_per_sstable(), cfg, priority)); - } - return &*_writer; - } - - virtual void stop_sstable_writer() override { - finish_new_sstable(_writer, _sst); - } - - virtual void finish_sstable_writer() override { - if (_writer) { - stop_sstable_writer(); - } - } -}; - -class cleanup_compaction final : public regular_compaction { -public: - cleanup_compaction(column_family& cf, std::vector sstables, std::function creator, - uint64_t max_sstable_size, uint32_t sstable_level, seastar::thread_scheduling_group* tsg) - : regular_compaction(cf, std::move(sstables), std::move(creator), max_sstable_size, sstable_level, tsg) - { - _info->type = compaction_type::Cleanup; - } - - void report_start(const sstring& formatted_msg) const override { - clogger.info("Cleaning {}", formatted_msg); - } - - void report_finish(const sstring& formatted_msg, std::chrono::time_point ended_at) const override { - clogger.info("Cleaned {}", formatted_msg); - } - - std::function filter_func() const override { - dht::token_range_vector owned_ranges = service::get_local_storage_service().get_local_ranges(_cf.schema()->ks_name()); - - return [this, owned_ranges = std::move(owned_ranges)] (const streamed_mutation& sm) { - if (dht::shard_of(sm.decorated_key().token()) != engine().cpu_id()) { - return false; - } - - if (!belongs_to_current_node(sm.decorated_key().token(), owned_ranges)) { - return false; - } - return true; - }; - } -}; - - -class resharding_compaction final : public compaction { - std::vector>> _output_sstables; - shard_id _shard; // shard of current sstable writer - std::function _sstable_creator; -public: - resharding_compaction(std::vector sstables, column_family& cf, std::function creator, - uint64_t max_sstable_size, uint32_t sstable_level, seastar::thread_scheduling_group* tsg) - : compaction(cf, std::move(sstables), max_sstable_size, sstable_level, tsg) - , _output_sstables(smp::count) - , _sstable_creator(std::move(creator)) - { - _info->type = compaction_type::Reshard; - } - - void report_start(const sstring& formatted_msg) const override { - clogger.info("Resharding {}", formatted_msg); - } - - void report_finish(const sstring& formatted_msg, std::chrono::time_point ended_at) const override { - clogger.info("Resharded {}", formatted_msg); - } - - sstable_writer* select_sstable_writer(const dht::decorated_key& dk) override { - _shard = dht::shard_of(dk.token()); - auto& sst = _output_sstables[_shard].first; - auto& writer = _output_sstables[_shard].second; - - if (!writer) { - sst = _sstable_creator(_shard); - setup_new_sstable(sst); - - sstable_writer_config cfg; - cfg.max_sstable_size = _max_sstable_size; - auto&& priority = service::get_local_compaction_priority(); - writer.emplace(sst->get_writer(*_cf.schema(), partitions_per_sstable(), cfg, priority, _shard)); - } - return &*writer; - } - - void stop_sstable_writer() override { - auto& sst = _output_sstables[_shard].first; - auto& writer = _output_sstables[_shard].second; - - finish_new_sstable(writer, sst); - } - - void finish_sstable_writer() override { - for (auto& p : _output_sstables) { - if (p.second) { - finish_new_sstable(p.second, p.first); - } - } - } -}; - -future> compaction::run(std::unique_ptr c) { - auto attr = c->thread_attributes(); - return seastar::async(std::move(attr), [c = std::move(c)] () mutable { - auto reader = c->setup(); - - auto cr = c->get_compacting_sstable_writer(); - auto cfc = make_stable_flattened_mutations_consumer>( - *c->schema(), gc_clock::now(), std::move(cr), c->max_purgeable_func()); - - auto start_time = db_clock::now(); - try { - consume_flattened_in_thread(reader, cfc, c->filter_func()); - } catch (...) { - delete_sstables_for_interrupted_compaction(c->_info->new_sstables, c->_info->ks, c->_info->cf); - c = nullptr; // make sure writers are stopped while running in thread context - throw; - } - - c->finish(std::move(start_time), db_clock::now()); - - return std::move(c->_info->new_sstables); - }); -} - -template -static std::unique_ptr make_compaction(bool cleanup, Params&&... params) { - if (cleanup) { - return std::make_unique(std::forward(params)...); - } else { - return std::make_unique(std::forward(params)...); - } -} - -future> -compact_sstables(std::vector sstables, column_family& cf, std::function creator, - uint64_t max_sstable_size, uint32_t sstable_level, bool cleanup, seastar::thread_scheduling_group *tsg) { - if (sstables.empty()) { - throw std::runtime_error(sprint("Called compaction with empty set on behalf of {}.{}", cf.schema()->ks_name(), cf.schema()->cf_name())); - } - auto c = make_compaction(cleanup, cf, std::move(sstables), std::move(creator), max_sstable_size, sstable_level, tsg); - return compaction::run(std::move(c)); -} - -future> -reshard_sstables(std::vector sstables, column_family& cf, std::function creator, - uint64_t max_sstable_size, uint32_t sstable_level, seastar::thread_scheduling_group* tsg) { - if (sstables.empty()) { - throw std::runtime_error(sprint("Called resharding with empty set on behalf of {}.{}", cf.schema()->ks_name(), cf.schema()->cf_name())); - } - auto c = std::make_unique(std::move(sstables), cf, std::move(creator), max_sstable_size, sstable_level, tsg); - return compaction::run(std::move(c)); -} - -std::vector -get_fully_expired_sstables(column_family& cf, std::vector& compacting, int32_t gc_before) { - clogger.debug("Checking droppable sstables in {}.{}", cf.schema()->ks_name(), cf.schema()->cf_name()); - - if (compacting.empty()) { - return {}; - } - - std::list candidates; - auto uncompacting_sstables = get_uncompacting_sstables(cf, compacting); - // Get list of uncompacting sstables that overlap the ones being compacted. - std::vector overlapping = leveled_manifest::overlapping(*cf.schema(), compacting, uncompacting_sstables); - int64_t min_timestamp = std::numeric_limits::max(); - - for (auto& sstable : overlapping) { - if (sstable->get_stats_metadata().max_local_deletion_time >= gc_before) { - min_timestamp = std::min(min_timestamp, sstable->get_stats_metadata().min_timestamp); - } - } - - auto compacted_undeleted_gens = boost::copy_range>(cf.compacted_undeleted_sstables() - | boost::adaptors::transformed(std::mem_fn(&sstables::sstable::generation))); - auto has_undeleted_ancestor = [&compacted_undeleted_gens] (auto& candidate) { - return boost::algorithm::any_of(candidate->ancestors(), [&compacted_undeleted_gens] (auto gen) { - return compacted_undeleted_gens.count(gen); - }); - }; - - // SStables that do not contain live data is added to list of possibly expired sstables. - for (auto& candidate : compacting) { - clogger.debug("Checking if candidate of generation {} and max_deletion_time {} is expired, gc_before is {}", - candidate->generation(), candidate->get_stats_metadata().max_local_deletion_time, gc_before); - // A fully expired sstable which has an ancestor undeleted shouldn't be compacted because - // expired data won't be purged because undeleted sstables are taken into account when - // calculating max purgeable timestamp, and not doing it could lead to a compaction loop. - if (candidate->get_stats_metadata().max_local_deletion_time < gc_before && !has_undeleted_ancestor(candidate)) { - clogger.debug("Adding candidate of generation {} to list of possibly expired sstables", candidate->generation()); - candidates.push_back(candidate); - } else { - min_timestamp = std::min(min_timestamp, candidate->get_stats_metadata().min_timestamp); - } - } - - auto it = candidates.begin(); - while (it != candidates.end()) { - auto& candidate = *it; - // Remove from list any candidate that may contain a tombstone that covers older data. - if (candidate->get_stats_metadata().max_timestamp >= min_timestamp) { - it = candidates.erase(it); - } else { - clogger.debug("Dropping expired SSTable {} (maxLocalDeletionTime={}, gcBefore={})", - candidate->get_filename(), candidate->get_stats_metadata().max_local_deletion_time, gc_before); - it++; - } - } - return std::vector(candidates.begin(), candidates.end()); -} - -} diff --git a/scylla/sstables/compaction.hh b/scylla/sstables/compaction.hh deleted file mode 100644 index 7baf5f6..0000000 --- a/scylla/sstables/compaction.hh +++ /dev/null @@ -1,139 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - * - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "sstables.hh" -#include - -namespace sstables { - - struct compaction_descriptor { - // List of sstables to be compacted. - std::vector sstables; - // Level of sstable(s) created by compaction procedure. - int level; - // Threshold size for sstable(s) to be created. - uint64_t max_sstable_bytes; - - compaction_descriptor() = default; - - compaction_descriptor(std::vector sstables, int level = 0, uint64_t max_sstable_bytes = std::numeric_limits::max()) - : sstables(std::move(sstables)) - , level(level) - , max_sstable_bytes(max_sstable_bytes) {} - }; - - struct resharding_descriptor { - std::vector sstables; - uint64_t max_sstable_bytes; - shard_id reshard_at; - uint32_t level; - }; - - enum class compaction_type { - Compaction = 0, - Cleanup = 1, - Validation = 2, - Scrub = 3, - Index_build = 4, - Reshard = 5, - }; - - static inline sstring compaction_name(compaction_type type) { - switch (type) { - case compaction_type::Compaction: - return "COMPACTION"; - case compaction_type::Cleanup: - return "CLEANUP"; - case compaction_type::Validation: - return "VALIDATION"; - case compaction_type::Scrub: - return "SCRUB"; - case compaction_type::Index_build: - return "INDEX_BUILD"; - case compaction_type::Reshard: - return "RESHARD"; - default: - throw std::runtime_error("Invalid Compaction Type"); - } - } - - struct compaction_info { - compaction_type type = compaction_type::Compaction; - sstring ks; - sstring cf; - size_t sstables = 0; - uint64_t start_size = 0; - uint64_t end_size = 0; - uint64_t total_partitions = 0; - uint64_t total_keys_written = 0; - std::vector new_sstables; - sstring stop_requested; - - bool is_stop_requested() const { - return stop_requested.size() > 0; - } - - void stop(sstring reason) { - stop_requested = std::move(reason); - } - }; - - // Compact a list of N sstables into M sstables. - // Returns a vector with newly created sstables(s). - // - // creator is used to get a sstable object for a new sstable that will be written. - // max_sstable_size is a relaxed limit size for a sstable to be generated. - // Example: It's okay for the size of a new sstable to go beyond max_sstable_size - // when writing its last partition. - // sstable_level will be level of the sstable(s) to be created by this function. - // If cleanup is true, mutation that doesn't belong to current node will be - // cleaned up, log messages will inform the user that compact_sstables runs for - // cleaning operation, and compaction history will not be updated. - future> compact_sstables(std::vector sstables, - column_family& cf, std::function creator, - uint64_t max_sstable_size, uint32_t sstable_level, bool cleanup = false, - seastar::thread_scheduling_group* tsg = nullptr); - - // Compacts a set of N shared sstables into M sstables. For every shard involved, - // i.e. which owns any of the sstables, a new unshared sstable is created. - future> reshard_sstables(std::vector sstables, - column_family& cf, std::function creator, - uint64_t max_sstable_size, uint32_t sstable_level, - seastar::thread_scheduling_group* tsg = nullptr); - - // Return the most interesting bucket applying the size-tiered strategy. - std::vector - size_tiered_most_interesting_bucket(lw_shared_ptr candidates); - - std::vector - size_tiered_most_interesting_bucket(const std::list& candidates); - - // Return list of expired sstables for column family cf. - // A sstable is fully expired *iff* its max_local_deletion_time precedes gc_before and its - // max timestamp is lower than any other relevant sstable. - // In simpler words, a sstable is fully expired if all of its live cells with TTL is expired - // and possibly doesn't contain any tombstone that covers cells in other sstables. - std::vector - get_fully_expired_sstables(column_family& cf, std::vector& compacting, int32_t gc_before); -} diff --git a/scylla/sstables/compaction_manager.cc b/scylla/sstables/compaction_manager.cc deleted file mode 100644 index 5d2ab93..0000000 --- a/scylla/sstables/compaction_manager.cc +++ /dev/null @@ -1,561 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "compaction_manager.hh" -#include "database.hh" -#include -#include "exceptions.hh" -#include - -static logging::logger cmlog("compaction_manager"); - -class compacting_sstable_registration { - compaction_manager* _cm; - std::vector _compacting; -public: - compacting_sstable_registration(compaction_manager* cm, std::vector compacting) - : _cm(cm) - , _compacting(std::move(compacting)) - { - _cm->register_compacting_sstables(_compacting); - } - - compacting_sstable_registration& operator=(const compacting_sstable_registration&) = delete; - compacting_sstable_registration(const compacting_sstable_registration&) = delete; - - compacting_sstable_registration& operator=(compacting_sstable_registration&& other) noexcept { - if (this != &other) { - this->~compacting_sstable_registration(); - new (this) compacting_sstable_registration(std::move(other)); - } - return *this; - } - - compacting_sstable_registration(compacting_sstable_registration&& other) noexcept - : _cm(other._cm) - , _compacting(std::move(other._compacting)) - { - other._cm = nullptr; - } - - ~compacting_sstable_registration() { - if (_cm) { - _cm->deregister_compacting_sstables(_compacting); - } - } -}; - -class compaction_weight_registration { - compaction_manager* _cm; - column_family* _cf; - int _weight; -public: - compaction_weight_registration(compaction_manager* cm, column_family* cf, int weight) - : _cm(cm) - , _cf(cf) - , _weight(weight) - { - _cm->register_weight(_cf, _weight); - } - - compaction_weight_registration& operator=(const compaction_weight_registration&) = delete; - compaction_weight_registration(const compaction_weight_registration&) = delete; - - compaction_weight_registration& operator=(compaction_weight_registration&& other) noexcept { - if (this != &other) { - this->~compaction_weight_registration(); - new (this) compaction_weight_registration(std::move(other)); - } - return *this; - } - - compaction_weight_registration(compaction_weight_registration&& other) noexcept - : _cm(other._cm) - , _cf(other._cf) - , _weight(other._weight) - { - other._cm = nullptr; - other._cf = nullptr; - other._weight = 0; - } - - ~compaction_weight_registration() { - if (_cm) { - _cm->deregister_weight(_cf, _weight); - } - } -}; - -static inline uint64_t get_total_size(const std::vector& sstables) { - uint64_t total_size = 0; - for (auto& sst : sstables) { - total_size += sst->data_size(); - } - return total_size; -} - -// Calculate weight of compaction job. -static inline int calculate_weight(uint64_t total_size) { - // At the moment, '4' is being used as log base for determining the weight - // of a compaction job. With base of 4, what happens is that when you have - // a 40-second compaction in progress, and a tiny 10-second compaction - // comes along, you do them in parallel. - // TODO: Find a possibly better log base through experimentation. - static constexpr int WEIGHT_LOG_BASE = 4; - - // computes the logarithm (base WEIGHT_LOG_BASE) of total_size. - return int(std::log(total_size) / std::log(WEIGHT_LOG_BASE)); -} - -static inline int calculate_weight(const std::vector& sstables) { - if (sstables.empty()) { - return 0; - } - return calculate_weight(get_total_size(sstables)); -} - -int compaction_manager::trim_to_compact(column_family* cf, sstables::compaction_descriptor& descriptor) { - int weight = calculate_weight(descriptor.sstables); - // NOTE: a compaction job with level > 0 cannot be trimmed because leveled - // compaction relies on higher levels having no overlapping sstables. - if (descriptor.level != 0 || descriptor.sstables.empty()) { - return weight; - } - auto it = _weight_tracker.find(cf); - if (it == _weight_tracker.end()) { - return weight; - } - - std::unordered_set& s = it->second; - uint64_t total_size = get_total_size(descriptor.sstables); - int min_threshold = cf->schema()->min_compaction_threshold(); - - while (descriptor.sstables.size() > size_t(min_threshold)) { - if (s.count(weight)) { - total_size -= descriptor.sstables.back()->data_size(); - descriptor.sstables.pop_back(); - weight = calculate_weight(total_size); - } else { - break; - } - } - return weight; -} - -bool compaction_manager::can_register_weight(column_family* cf, int weight, bool parallel_compaction) { - auto it = _weight_tracker.find(cf); - if (it == _weight_tracker.end()) { - return true; - } - std::unordered_set& s = it->second; - // Only one weight is allowed if parallel compaction is disabled. - if (!parallel_compaction && !s.empty()) { - return false; - } - // TODO: Maybe allow only *smaller* compactions to start? That can be done - // by returning true only if weight is not in the set and is lower than any - // entry in the set. - if (s.count(weight)) { - // If reached this point, it means that there is an ongoing compaction - // with the weight of the compaction job. - return false; - } - return true; -} - -void compaction_manager::register_weight(column_family* cf, int weight) { - auto it = _weight_tracker.find(cf); - if (it == _weight_tracker.end()) { - _weight_tracker.insert({cf, {weight}}); - } else { - it->second.insert(weight); - } -} - -void compaction_manager::deregister_weight(column_family* cf, int weight) { - auto it = _weight_tracker.find(cf); - assert(it != _weight_tracker.end()); - it->second.erase(weight); -} - -std::vector compaction_manager::get_candidates(const column_family& cf) { - std::vector candidates; - candidates.reserve(cf.sstables_count()); - // Filter out sstables that are being compacted. - for (auto& sst : cf.candidates_for_compaction()) { - if (!_compacting_sstables.count(sst)) { - candidates.push_back(sst); - } - } - return candidates; -} - -void compaction_manager::register_compacting_sstables(const std::vector& sstables) { - for (auto& sst : sstables) { - _compacting_sstables.insert(sst); - } -} - -void compaction_manager::deregister_compacting_sstables(const std::vector& sstables) { - // Remove compacted sstables from the set of compacting sstables. - for (auto& sst : sstables) { - _compacting_sstables.erase(sst); - } -} - -// submit_sstable_rewrite() starts a compaction task, much like submit(), -// But rather than asking a compaction policy what to compact, this function -// compacts just a single sstable, and writes one new sstable. This operation -// is useful to split an sstable containing data belonging to multiple shards -// into a separate sstable on each shard. -void compaction_manager::submit_sstable_rewrite(column_family* cf, sstables::shared_sstable sst) { - // The semaphore ensures that the sstable rewrite operations submitted by - // submit_sstable_rewrite are run in sequence, and not all of them in - // parallel. Note that unlike general compaction which currently allows - // different cfs to compact in parallel, here we don't have a semaphore - // per cf, so we only get one rewrite at a time on each shard. - static thread_local semaphore sem(1); - // We cannot, and don't need to, compact an sstable which is already - // being compacted anyway. - if (_stopped || _compacting_sstables.count(sst)) { - return; - } - // Conversely, we don't want another compaction job to compact the - // sstable we are planning to work on: - _compacting_sstables.insert(sst); - auto task = make_lw_shared(); - task->compacting_cf = cf; - _tasks.push_back(task); - task->compaction_done = with_semaphore(sem, 1, [this, task, cf, sst] { - _stats.active_tasks++; - if (!can_proceed(task)) { - return make_ready_future<>(); - } - return cf->compact_sstables(sstables::compaction_descriptor( - std::vector{sst}, - sst->get_sstable_level(), - std::numeric_limits::max()), false); - }).then_wrapped([this, sst, task] (future<> f) { - _compacting_sstables.erase(sst); - _stats.active_tasks--; - _tasks.remove(task); - try { - f.get(); - _stats.completed_tasks++; - } catch (sstables::compaction_stop_exception& e) { - cmlog.info("compaction info: {}", e.what()); - _stats.errors++; - } catch (...) { - cmlog.error("compaction failed: {}", std::current_exception()); - _stats.errors++; - } - }); -} - -future<> compaction_manager::submit_major_compaction(column_family* cf) { - if (_stopped) { - return make_ready_future<>(); - } - auto task = make_lw_shared(); - task->compacting_cf = cf; - _tasks.push_back(task); - - // first take major compaction semaphore, then exclusely take compaction lock for column family. - // it cannot be the other way around, or minor compaction for this column family would be - // prevented while an ongoing major compaction doesn't release the semaphore. - task->compaction_done = with_semaphore(_major_compaction_sem, 1, [this, task, cf] { - return with_lock(_compaction_locks[cf].for_write(), [this, task, cf] { - _stats.active_tasks++; - if (!can_proceed(task)) { - return make_ready_future<>(); - } - - // candidates are sstables that aren't being operated on by other compaction types. - // those are eligible for major compaction. - // FIXME: we need to make major compaction compaction strategy aware. For example, - // leveled strategy may want to promote the merged sstables of a level N. - auto sstables = get_candidates(*cf); - auto compacting = compacting_sstable_registration(this, sstables); - - return cf->compact_sstables(sstables::compaction_descriptor(std::move(sstables))).then([compacting = std::move(compacting)] {}); - }); - }).then_wrapped([this, task] (future<> f) { - _stats.active_tasks--; - _tasks.remove(task); - try { - f.get(); - _stats.completed_tasks++; - } catch (sstables::compaction_stop_exception& e) { - cmlog.info("major compaction stopped, reason: {}", e.what()); - _stats.errors++; - } catch (...) { - cmlog.error("major compaction failed, reason: {}", std::current_exception()); - _stats.errors++; - } - }); - return task->compaction_done.get_future().then([task] {}); -} - -future<> compaction_manager::task_stop(lw_shared_ptr task) { - task->stopping = true; - auto f = task->compaction_done.get_future(); - return f.then([task] { - task->stopping = false; - return make_ready_future<>(); - }); -} - -compaction_manager::compaction_manager() = default; - -compaction_manager::~compaction_manager() { - // Assert that compaction manager was explicitly stopped, if started. - // Otherwise, fiber(s) will be alive after the object is destroyed. - assert(_stopped == true); -} - -void compaction_manager::register_metrics() { - namespace sm = seastar::metrics; - - _metrics.add_group("compaction_manager", { - sm::make_gauge("compactions", [this] { return _stats.active_tasks; }, - sm::description("Holds the number of currently active compactions. " - "Too high number of concurrent compactions may overwhelm the disk.")), - }); -} - -void compaction_manager::start() { - _stopped = false; - register_metrics(); -} - -future<> compaction_manager::stop() { - cmlog.info("Asked to stop"); - if (_stopped) { - return make_ready_future<>(); - } - _stopped = true; - // Reset the metrics registry - _metrics.clear(); - // Stop all ongoing compaction. - for (auto& info : _compactions) { - info->stop("shutdown"); - } - // Wait for each task handler to stop. Copy list because task remove itself - // from the list when done. - auto tasks = _tasks; - return do_with(std::move(tasks), [this] (std::list>& tasks) { - return parallel_for_each(tasks, [this] (auto& task) { - return this->task_stop(task); - }); - }).then([this] { - _weight_tracker.clear(); - cmlog.info("Stopped"); - return make_ready_future<>(); - }); -} - -inline bool compaction_manager::can_proceed(const lw_shared_ptr& task) { - return !_stopped && !task->stopping; -} - -inline future<> compaction_manager::put_task_to_sleep(lw_shared_ptr& task) { - cmlog.info("compaction task handler sleeping for {} seconds", - std::chrono::duration_cast(task->compaction_retry.sleep_time()).count()); - return task->compaction_retry.retry(); -} - -inline bool compaction_manager::maybe_stop_on_error(future<> f) { - bool retry = false; - try { - f.get(); - } catch (sstables::compaction_stop_exception& e) { - // We want compaction stopped here to be retried because this may have - // happened at user request (using nodetool stop), and to mimic C* - // behavior, compaction is retried later on. - cmlog.info("compaction info: {}", e.what()); - retry = true; - } catch (storage_io_error& e) { - cmlog.error("compaction failed due to storage io error: {}", e.what()); - retry = false; - stop(); - } catch (...) { - cmlog.error("compaction failed: {}", std::current_exception()); - retry = true; - } - return retry; -} - -void compaction_manager::submit(column_family* cf) { - auto task = make_lw_shared(); - task->compacting_cf = cf; - _tasks.push_back(task); - _stats.pending_tasks++; - - task->compaction_done = repeat([this, task, cf] () mutable { - if (!can_proceed(task)) { - _stats.pending_tasks--; - return make_ready_future(stop_iteration::yes); - } - return with_lock(_compaction_locks[cf].for_read(), [this, task] () mutable { - column_family& cf = *task->compacting_cf; - sstables::compaction_strategy cs = cf.get_compaction_strategy(); - sstables::compaction_descriptor descriptor = cs.get_sstables_for_compaction(cf, get_candidates(cf)); - int weight = trim_to_compact(&cf, descriptor); - - // Stop compaction task immediately if strategy is satisfied or job cannot run in parallel. - if (descriptor.sstables.empty() || !can_register_weight(&cf, weight, cs.parallel_compaction())) { - _stats.pending_tasks--; - cmlog.debug("Refused compaction job ({} sstable(s)) of weight {} for {}.{}", - descriptor.sstables.size(), weight, cf.schema()->ks_name(), cf.schema()->cf_name()); - return make_ready_future(stop_iteration::yes); - } - auto compacting = compacting_sstable_registration(this, descriptor.sstables); - auto c_weight = compaction_weight_registration(this, &cf, weight); - cmlog.debug("Accepted compaction job ({} sstable(s)) of weight {} for {}.{}", - descriptor.sstables.size(), weight, cf.schema()->ks_name(), cf.schema()->cf_name()); - - _stats.pending_tasks--; - _stats.active_tasks++; - return cf.run_compaction(std::move(descriptor)) - .then_wrapped([this, task, compacting = std::move(compacting), c_weight = std::move(c_weight)] (future<> f) mutable { - _stats.active_tasks--; - - if (!can_proceed(task)) { - maybe_stop_on_error(std::move(f)); - return make_ready_future(stop_iteration::yes); - } - if (maybe_stop_on_error(std::move(f))) { - _stats.errors++; - _stats.pending_tasks++; - return put_task_to_sleep(task).then([] { - return make_ready_future(stop_iteration::no); - }); - } - _stats.pending_tasks++; - _stats.completed_tasks++; - task->compaction_retry.reset(); - return make_ready_future(stop_iteration::no); - }); - }); - }).finally([this, task] { - _tasks.remove(task); - }); -} - -inline bool compaction_manager::check_for_cleanup(column_family* cf) { - for (auto& task : _tasks) { - if (task->compacting_cf == cf && task->cleanup) { - return true; - } - } - return false; -} - -future<> compaction_manager::perform_cleanup(column_family* cf) { - if (check_for_cleanup(cf)) { - throw std::runtime_error(sprint("cleanup request failed: there is an ongoing cleanup on %s.%s", - cf->schema()->ks_name(), cf->schema()->cf_name())); - } - auto task = make_lw_shared(); - task->compacting_cf = cf; - task->cleanup = true; - _tasks.push_back(task); - _stats.pending_tasks++; - - task->compaction_done = repeat([this, task] () mutable { - if (!can_proceed(task)) { - _stats.pending_tasks--; - return make_ready_future(stop_iteration::yes); - } - column_family& cf = *task->compacting_cf; - sstables::compaction_descriptor descriptor = sstables::compaction_descriptor(get_candidates(cf)); - auto compacting = compacting_sstable_registration(this, descriptor.sstables); - - _stats.pending_tasks--; - _stats.active_tasks++; - return cf.cleanup_sstables(std::move(descriptor)) - .then_wrapped([this, task, compacting = std::move(compacting)] (future<> f) mutable { - _stats.active_tasks--; - if (!can_proceed(task)) { - maybe_stop_on_error(std::move(f)); - return make_ready_future(stop_iteration::yes); - } - if (maybe_stop_on_error(std::move(f))) { - _stats.errors++; - _stats.pending_tasks++; - return put_task_to_sleep(task).then([] { - return make_ready_future(stop_iteration::no); - }); - } - _stats.completed_tasks++; - return make_ready_future(stop_iteration::yes); - }); - }).finally([this, task] { - _tasks.remove(task); - }); - - return task->compaction_done.get_future().then([task] {}); -} - -future<> compaction_manager::remove(column_family* cf) { - // FIXME: better way to iterate through compaction info for a given column family, - // although this path isn't performance sensitive. - for (auto& info : _compactions) { - if (cf->schema()->ks_name() == info->ks && cf->schema()->cf_name() == info->cf) { - info->stop("column family removal"); - } - } - - // We need to guarantee that a task being stopped will not retry to compact - // a column family being removed. - auto tasks_to_stop = make_lw_shared>>(); - for (auto& task : _tasks) { - if (task->compacting_cf == cf) { - tasks_to_stop->push_back(task); - task->stopping = true; - } - } - // Wait for the termination of an ongoing compaction on cf, if any. - return do_for_each(*tasks_to_stop, [this, cf] (auto& task) { - return this->task_stop(task); - }).then([this, cf, tasks_to_stop] { - _weight_tracker.erase(cf); - _compaction_locks.erase(cf); - }); -} - -void compaction_manager::stop_compaction(sstring type) { - // TODO: this method only works for compaction of type compaction and cleanup. - // Other types are: validation, scrub, index_build. - sstables::compaction_type target_type; - if (type == "COMPACTION") { - target_type = sstables::compaction_type::Compaction; - } else if (type == "CLEANUP") { - target_type = sstables::compaction_type::Cleanup; - } else { - throw std::runtime_error(sprint("Compaction of type %s cannot be stopped by compaction manager", type.c_str())); - } - for (auto& info : _compactions) { - if (target_type == info->type) { - info->stop("user request"); - } - } -} diff --git a/scylla/sstables/compaction_manager.hh b/scylla/sstables/compaction_manager.hh deleted file mode 100644 index d3b0889..0000000 --- a/scylla/sstables/compaction_manager.hh +++ /dev/null @@ -1,176 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "core/semaphore.hh" -#include "core/sstring.hh" -#include "core/shared_ptr.hh" -#include "core/gate.hh" -#include "core/shared_future.hh" -#include "core/rwlock.hh" -#include -#include "log.hh" -#include "utils/exponential_backoff_retry.hh" -#include -#include -#include -#include "sstables/compaction.hh" - -class column_family; -class compacting_sstable_registration; -class compaction_weight_registration; - -// Compaction manager is a feature used to manage compaction jobs from multiple -// column families pertaining to the same database. -class compaction_manager { -public: - struct stats { - int64_t pending_tasks = 0; - int64_t completed_tasks = 0; - uint64_t active_tasks = 0; // Number of compaction going on. - int64_t errors = 0; - }; -private: - struct task { - column_family* compacting_cf = nullptr; - shared_future<> compaction_done = make_ready_future<>(); - exponential_backoff_retry compaction_retry = exponential_backoff_retry(std::chrono::seconds(5), std::chrono::seconds(300)); - bool stopping = false; - bool cleanup = false; - }; - - // compaction manager may have N fibers to allow parallel compaction per shard. - std::list> _tasks; - - // Used to assert that compaction_manager was explicitly stopped, if started. - bool _stopped = true; - - stats _stats; - seastar::metrics::metric_groups _metrics; - - std::list> _compactions; - - // Store sstables that are being compacted at the moment. That's needed to prevent - // a sstable from being compacted twice. - std::unordered_set _compacting_sstables; - - // Keep track of weight of ongoing compaction for each column family. - // That's used to allow parallel compaction on the same column family. - std::unordered_map> _weight_tracker; - - // Purpose is to serialize major compaction across all column families, so as to - // reduce disk space requirement. - semaphore _major_compaction_sem{1}; - // Prevents column family from running major and minor compaction at same time. - std::unordered_map _compaction_locks; -private: - future<> task_stop(lw_shared_ptr task); - - // Return true if weight is not registered. If parallel_compaction is not - // true, only one weight is allowed to be registered. - bool can_register_weight(column_family* cf, int weight, bool parallel_compaction); - // Register weight for a column family. Do that only if can_register_weight() - // returned true. - void register_weight(column_family* cf, int weight); - // Deregister weight for a column family. - void deregister_weight(column_family* cf, int weight); - - // If weight of compaction job is taken, it will be trimmed until its new - // weight is not taken or its size is equal to minimum threshold. - // Return weight of compaction job. - int trim_to_compact(column_family* cf, sstables::compaction_descriptor& descriptor); - - // Get candidates for compaction strategy, which are all sstables but the ones being compacted. - std::vector get_candidates(const column_family& cf); - - void register_compacting_sstables(const std::vector& sstables); - void deregister_compacting_sstables(const std::vector& sstables); - - // Return true if compaction manager and task weren't asked to stop. - inline bool can_proceed(const lw_shared_ptr& task); - - // Check if column family is being cleaned up. - inline bool check_for_cleanup(column_family *cf); - - inline future<> put_task_to_sleep(lw_shared_ptr& task); - - // Compaction manager stop itself if it finds an storage I/O error which results in - // stop of transportation services. It cannot make progress anyway. - // Returns true if error is judged not fatal, and compaction can be retried. - inline bool maybe_stop_on_error(future<> f); -public: - compaction_manager(); - ~compaction_manager(); - - void register_metrics(); - - // Start compaction manager. - void start(); - - // Stop all fibers. Ongoing compactions will be waited. - future<> stop(); - - bool stopped() const { return _stopped; } - - // Submit a column family to be compacted. - void submit(column_family* cf); - - // Submit a column family to be cleaned up and wait for its termination. - future<> perform_cleanup(column_family* cf); - - // Submit a specific sstable to be rewritten, while dropping data which - // does not belong to this shard. Meant to be used on startup when an - // sstable is shared by multiple shards, and we want to split it to a - // separate sstable for each shard. - void submit_sstable_rewrite(column_family* cf, - sstables::shared_sstable s); - - // Submit a column family for major compaction. - future<> submit_major_compaction(column_family* cf); - - // Remove a column family from the compaction manager. - // Cancel requests on cf and wait for a possible ongoing compaction on cf. - future<> remove(column_family* cf); - - const stats& get_stats() const { - return _stats; - } - - void register_compaction(lw_shared_ptr c) { - _compactions.push_back(c); - } - - void deregister_compaction(lw_shared_ptr c) { - _compactions.remove(c); - } - - const std::list>& get_compactions() const { - return _compactions; - } - - // Stops ongoing compaction of a given type. - void stop_compaction(sstring type); - - friend class compacting_sstable_registration; - friend class compaction_weight_registration; -}; - diff --git a/scylla/sstables/compaction_strategy.cc b/scylla/sstables/compaction_strategy.cc deleted file mode 100644 index 8c07ff2..0000000 --- a/scylla/sstables/compaction_strategy.cc +++ /dev/null @@ -1,948 +0,0 @@ -/* - * Copyright (C) 2016 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include - -#include "sstables.hh" -#include "compaction.hh" -#include "database.hh" -#include "compaction_strategy.hh" -#include "schema.hh" -#include "cql3/statements/property_definitions.hh" -#include "leveled_manifest.hh" -#include "sstable_set.hh" -#include "compatible_ring_position.hh" -#include -#include -#include "date_tiered_compaction_strategy.hh" - -logging::logger date_tiered_manifest::logger = logging::logger("DateTieredCompactionStrategy"); -logging::logger leveled_manifest::logger("LeveledManifest"); - -namespace sstables { - -extern logging::logger clogger; - -class incremental_selector_impl { -public: - virtual ~incremental_selector_impl() {} - virtual std::pair> select(const dht::token& token) = 0; -}; - -class sstable_set_impl { -public: - virtual ~sstable_set_impl() {} - virtual std::unique_ptr clone() const = 0; - virtual std::vector select(const dht::partition_range& range) const = 0; - virtual void insert(shared_sstable sst) = 0; - virtual void erase(shared_sstable sst) = 0; - virtual std::unique_ptr make_incremental_selector() const = 0; -}; - -sstable_set::sstable_set(std::unique_ptr impl, lw_shared_ptr all) - : _impl(std::move(impl)) - , _all(std::move(all)) { -} - -sstable_set::sstable_set(const sstable_set& x) - : _impl(x._impl->clone()) - , _all(make_lw_shared(sstable_list(*x._all))) { -} - -sstable_set::sstable_set(sstable_set&&) noexcept = default; - -sstable_set& -sstable_set::operator=(const sstable_set& x) { - if (this != &x) { - auto tmp = sstable_set(x); - *this = std::move(tmp); - } - return *this; -} - -sstable_set& -sstable_set::operator=(sstable_set&&) noexcept = default; - -std::vector -sstable_set::select(const dht::partition_range& range) const { - return _impl->select(range); -} - -void -sstable_set::insert(shared_sstable sst) { - _impl->insert(sst); - try { - _all->insert(sst); - } catch (...) { - _impl->erase(sst); - throw; - } -} - -void -sstable_set::erase(shared_sstable sst) { - _impl->erase(sst); - _all->erase(sst); -} - -sstable_set::~sstable_set() = default; - -sstable_set::incremental_selector::incremental_selector(std::unique_ptr impl) - : _impl(std::move(impl)) { -} - -sstable_set::incremental_selector::~incremental_selector() = default; - -sstable_set::incremental_selector::incremental_selector(sstable_set::incremental_selector&&) noexcept = default; - -const std::vector& -sstable_set::incremental_selector::select(const dht::token& t) const { - if (!_current_token_range || !_current_token_range->contains(t, dht::token_comparator())) { - auto&& x = _impl->select(t); - _current_token_range = std::move(std::get<0>(x)); - _current_sstables = std::move(std::get<1>(x)); - } - return _current_sstables; -} - -sstable_set::incremental_selector -sstable_set::make_incremental_selector() const { - return incremental_selector(_impl->make_incremental_selector()); -} - -// default sstable_set, not specialized for anything -class bag_sstable_set : public sstable_set_impl { - // erasing is slow, but select() is fast - std::vector _sstables; -public: - virtual std::unique_ptr clone() const override { - return std::make_unique(*this); - } - virtual std::vector select(const dht::partition_range& range = query::full_partition_range) const override { - return _sstables; - } - virtual void insert(shared_sstable sst) override { - _sstables.push_back(std::move(sst)); - } - virtual void erase(shared_sstable sst) override { - _sstables.erase(boost::find(_sstables, sst)); - } - virtual std::unique_ptr make_incremental_selector() const override; - class incremental_selector; -}; - -class bag_sstable_set::incremental_selector : public incremental_selector_impl { - const std::vector& _sstables; -public: - incremental_selector(const std::vector& sstables) - : _sstables(sstables) { - } - virtual std::pair> select(const dht::token& token) override { - return std::make_pair(dht::token_range::make_open_ended_both_sides(), _sstables); - } -}; - -std::unique_ptr bag_sstable_set::make_incremental_selector() const { - return std::make_unique(_sstables); -} - -// specialized when sstables are partitioned in the token range space -// e.g. leveled compaction strategy -class partitioned_sstable_set : public sstable_set_impl { - using value_set = std::unordered_set; - using interval_map_type = boost::icl::interval_map; - using interval_type = interval_map_type::interval_type; - using map_iterator = interval_map_type::const_iterator; -private: - schema_ptr _schema; - std::vector _unleveled_sstables; - interval_map_type _leveled_sstables; -private: - static interval_type make_interval(const schema& s, const dht::partition_range& range) { - return interval_type::closed( - compatible_ring_position(s, range.start()->value()), - compatible_ring_position(s, range.end()->value())); - } - interval_type make_interval(const dht::partition_range& range) const { - return make_interval(*_schema, range); - } - interval_type singular(const dht::ring_position& rp) const { - auto crp = compatible_ring_position(*_schema, rp); - return interval_type::closed(crp, crp); - } - std::pair query(const dht::partition_range& range) const { - if (range.start() && range.end()) { - return _leveled_sstables.equal_range(make_interval(range)); - } - else if (range.start() && !range.end()) { - auto start = singular(range.start()->value()); - return { _leveled_sstables.lower_bound(start), _leveled_sstables.end() }; - } else if (!range.start() && range.end()) { - auto end = singular(range.end()->value()); - return { _leveled_sstables.begin(), _leveled_sstables.upper_bound(end) }; - } else { - return { _leveled_sstables.begin(), _leveled_sstables.end() }; - } - } -public: - explicit partitioned_sstable_set(schema_ptr schema) - : _schema(std::move(schema)) { - } - virtual std::unique_ptr clone() const override { - return std::make_unique(*this); - } - virtual std::vector select(const dht::partition_range& range) const override { - auto ipair = query(range); - auto b = std::move(ipair.first); - auto e = std::move(ipair.second); - value_set result; - while (b != e) { - boost::copy(b++->second, std::inserter(result, result.end())); - } - auto r = _unleveled_sstables; - r.insert(r.end(), result.begin(), result.end()); - return r; - } - virtual void insert(shared_sstable sst) override { - if (sst->get_sstable_level() == 0) { - _unleveled_sstables.push_back(std::move(sst)); - } else { - auto first = sst->get_first_decorated_key().token(); - auto last = sst->get_last_decorated_key().token(); - using bound = dht::partition_range::bound; - _leveled_sstables.add({ - make_interval( - dht::partition_range( - bound(dht::ring_position::starting_at(first)), - bound(dht::ring_position::ending_at(last)))), - value_set({sst})}); - } - } - virtual void erase(shared_sstable sst) override { - if (sst->get_sstable_level() == 0) { - _unleveled_sstables.erase(std::remove(_unleveled_sstables.begin(), _unleveled_sstables.end(), sst), _unleveled_sstables.end()); - } else { - auto first = sst->get_first_decorated_key().token(); - auto last = sst->get_last_decorated_key().token(); - using bound = dht::partition_range::bound; - _leveled_sstables.subtract({ - make_interval( - dht::partition_range( - bound(dht::ring_position::starting_at(first)), - bound(dht::ring_position::ending_at(last)))), - value_set({sst})}); - } - } - virtual std::unique_ptr make_incremental_selector() const override; - class incremental_selector; -}; - -class partitioned_sstable_set::incremental_selector : public incremental_selector_impl { - schema_ptr _schema; - const std::vector& _unleveled_sstables; - map_iterator _it; - const map_iterator _end; -private: - static dht::token_range to_token_range(const interval_type& i) { - return dht::token_range::make({i.lower().token(), boost::icl::is_left_closed(i.bounds())}, - {i.upper().token(), boost::icl::is_right_closed(i.bounds())}); - } -public: - incremental_selector(schema_ptr schema, const std::vector& unleveled_sstables, const interval_map_type& leveled_sstables) - : _schema(std::move(schema)) - , _unleveled_sstables(unleveled_sstables) - , _it(leveled_sstables.begin()) - , _end(leveled_sstables.end()) { - } - virtual std::pair> select(const dht::token& token) override { - auto pr = dht::partition_range::make(dht::ring_position::starting_at(token), dht::ring_position::ending_at(token)); - auto interval = make_interval(*_schema, std::move(pr)); - auto ssts = _unleveled_sstables; - - while (_it != _end) { - if (boost::icl::contains(_it->first, interval)) { - ssts.insert(ssts.end(), _it->second.begin(), _it->second.end()); - return std::make_pair(to_token_range(_it->first), std::move(ssts)); - } - // we don't want to skip current interval if token lies before it. - if (boost::icl::lower_less(interval, _it->first)) { - return std::make_pair(dht::token_range::make({token, true}, {_it->first.lower().token(), false}), - std::move(ssts)); - } - _it++; - } - return std::make_pair(dht::token_range::make_open_ended_both_sides(), std::move(ssts)); - } -}; - -std::unique_ptr partitioned_sstable_set::make_incremental_selector() const { - return std::make_unique(_schema, _unleveled_sstables, _leveled_sstables); -} - -class compaction_strategy_impl { -protected: - bool _use_clustering_key_filter = false; -public: - virtual ~compaction_strategy_impl() {} - virtual compaction_descriptor get_sstables_for_compaction(column_family& cfs, std::vector candidates) = 0; - virtual std::vector get_resharding_jobs(column_family& cf, std::vector candidates); - virtual void notify_completion(const std::vector>& removed, const std::vector>& added) { } - virtual compaction_strategy_type type() const = 0; - virtual bool parallel_compaction() const { - return true; - } - virtual int64_t estimated_pending_compactions(column_family& cf) const = 0; - virtual std::unique_ptr make_sstable_set(schema_ptr schema) const { - return std::make_unique(); - } - bool use_clustering_key_filter() const { - return _use_clustering_key_filter; - } -}; - -std::vector -compaction_strategy_impl::get_resharding_jobs(column_family& cf, std::vector candidates) { - std::vector jobs; - shard_id reshard_at_current = 0; - - clogger.debug("Trying to get resharding jobs for {}.{}...", cf.schema()->ks_name(), cf.schema()->cf_name()); - for (auto& candidate : candidates) { - auto level = candidate->get_sstable_level(); - jobs.push_back(resharding_descriptor{{std::move(candidate)}, std::numeric_limits::max(), reshard_at_current++ % smp::count, level}); - } - return jobs; -} - -// -// Null compaction strategy is the default compaction strategy. -// As the name implies, it does nothing. -// -class null_compaction_strategy : public compaction_strategy_impl { -public: - virtual compaction_descriptor get_sstables_for_compaction(column_family& cfs, std::vector candidates) override { - return sstables::compaction_descriptor(); - } - - virtual int64_t estimated_pending_compactions(column_family& cf) const override { - return 0; - } - - virtual compaction_strategy_type type() const { - return compaction_strategy_type::null; - } -}; - -// -// Major compaction strategy is about compacting all available sstables into one. -// -class major_compaction_strategy : public compaction_strategy_impl { - static constexpr size_t min_compact_threshold = 2; -public: - virtual compaction_descriptor get_sstables_for_compaction(column_family& cfs, std::vector candidates) override { - // At least, two sstables must be available for compaction to take place. - if (cfs.sstables_count() < min_compact_threshold) { - return sstables::compaction_descriptor(); - } - return sstables::compaction_descriptor(std::move(candidates)); - } - - virtual int64_t estimated_pending_compactions(column_family& cf) const override { - return (cf.sstables_count() < min_compact_threshold) ? 0 : 1; - } - - virtual compaction_strategy_type type() const { - return compaction_strategy_type::major; - } -}; - -class size_tiered_compaction_strategy_options { - static constexpr uint64_t DEFAULT_MIN_SSTABLE_SIZE = 50L * 1024L * 1024L; - static constexpr double DEFAULT_BUCKET_LOW = 0.5; - static constexpr double DEFAULT_BUCKET_HIGH = 1.5; - static constexpr double DEFAULT_COLD_READS_TO_OMIT = 0.05; - const sstring MIN_SSTABLE_SIZE_KEY = "min_sstable_size"; - const sstring BUCKET_LOW_KEY = "bucket_low"; - const sstring BUCKET_HIGH_KEY = "bucket_high"; - const sstring COLD_READS_TO_OMIT_KEY = "cold_reads_to_omit"; - - uint64_t min_sstable_size = DEFAULT_MIN_SSTABLE_SIZE; - double bucket_low = DEFAULT_BUCKET_LOW; - double bucket_high = DEFAULT_BUCKET_HIGH; - double cold_reads_to_omit = DEFAULT_COLD_READS_TO_OMIT; -public: - static std::experimental::optional get_value(const std::map& options, const sstring& name) { - auto it = options.find(name); - if (it == options.end()) { - return std::experimental::nullopt; - } - return it->second; - } - - size_tiered_compaction_strategy_options(const std::map& options) { - using namespace cql3::statements; - - auto tmp_value = get_value(options, MIN_SSTABLE_SIZE_KEY); - min_sstable_size = property_definitions::to_long(MIN_SSTABLE_SIZE_KEY, tmp_value, DEFAULT_MIN_SSTABLE_SIZE); - - tmp_value = get_value(options, BUCKET_LOW_KEY); - bucket_low = property_definitions::to_double(BUCKET_LOW_KEY, tmp_value, DEFAULT_BUCKET_LOW); - - tmp_value = get_value(options, BUCKET_HIGH_KEY); - bucket_high = property_definitions::to_double(BUCKET_HIGH_KEY, tmp_value, DEFAULT_BUCKET_HIGH); - - tmp_value = get_value(options, COLD_READS_TO_OMIT_KEY); - cold_reads_to_omit = property_definitions::to_double(COLD_READS_TO_OMIT_KEY, tmp_value, DEFAULT_COLD_READS_TO_OMIT); - } - - size_tiered_compaction_strategy_options() { - min_sstable_size = DEFAULT_MIN_SSTABLE_SIZE; - bucket_low = DEFAULT_BUCKET_LOW; - bucket_high = DEFAULT_BUCKET_HIGH; - cold_reads_to_omit = DEFAULT_COLD_READS_TO_OMIT; - } - - // FIXME: convert java code below. -#if 0 - public static Map validateOptions(Map options, Map uncheckedOptions) throws ConfigurationException - { - String optionValue = options.get(MIN_SSTABLE_SIZE_KEY); - try - { - long minSSTableSize = optionValue == null ? DEFAULT_MIN_SSTABLE_SIZE : Long.parseLong(optionValue); - if (minSSTableSize < 0) - { - throw new ConfigurationException(String.format("%s must be non negative: %d", MIN_SSTABLE_SIZE_KEY, minSSTableSize)); - } - } - catch (NumberFormatException e) - { - throw new ConfigurationException(String.format("%s is not a parsable int (base10) for %s", optionValue, MIN_SSTABLE_SIZE_KEY), e); - } - - double bucketLow = parseDouble(options, BUCKET_LOW_KEY, DEFAULT_BUCKET_LOW); - double bucketHigh = parseDouble(options, BUCKET_HIGH_KEY, DEFAULT_BUCKET_HIGH); - if (bucketHigh <= bucketLow) - { - throw new ConfigurationException(String.format("%s value (%s) is less than or equal to the %s value (%s)", - BUCKET_HIGH_KEY, bucketHigh, BUCKET_LOW_KEY, bucketLow)); - } - - double maxColdReadsRatio = parseDouble(options, COLD_READS_TO_OMIT_KEY, DEFAULT_COLD_READS_TO_OMIT); - if (maxColdReadsRatio < 0.0 || maxColdReadsRatio > 1.0) - { - throw new ConfigurationException(String.format("%s value (%s) should be between between 0.0 and 1.0", - COLD_READS_TO_OMIT_KEY, optionValue)); - } - - uncheckedOptions.remove(MIN_SSTABLE_SIZE_KEY); - uncheckedOptions.remove(BUCKET_LOW_KEY); - uncheckedOptions.remove(BUCKET_HIGH_KEY); - uncheckedOptions.remove(COLD_READS_TO_OMIT_KEY); - - return uncheckedOptions; - } -#endif - friend class size_tiered_compaction_strategy; -}; - -class size_tiered_compaction_strategy : public compaction_strategy_impl { - size_tiered_compaction_strategy_options _options; - - // Return a list of pair of shared_sstable and its respective size. - std::vector> create_sstable_and_length_pairs(const std::vector& sstables) const; - - // Group files of similar size into buckets. - std::vector> get_buckets(const std::vector& sstables) const; - - // Maybe return a bucket of sstables to compact - std::vector - most_interesting_bucket(std::vector> buckets, unsigned min_threshold, unsigned max_threshold); - - // Return the average size of a given list of sstables. - uint64_t avg_size(std::vector& sstables) { - assert(sstables.size() > 0); // this should never fail - uint64_t n = 0; - - for (auto& sstable : sstables) { - // FIXME: Switch to sstable->bytes_on_disk() afterwards. That's what C* uses. - n += sstable->data_size(); - } - - return n / sstables.size(); - } -public: - size_tiered_compaction_strategy() = default; - size_tiered_compaction_strategy(const std::map& options) : - _options(options) {} - - virtual compaction_descriptor get_sstables_for_compaction(column_family& cfs, std::vector candidates) override; - - virtual int64_t estimated_pending_compactions(column_family& cf) const override; - - friend std::vector size_tiered_most_interesting_bucket(lw_shared_ptr); - friend std::vector size_tiered_most_interesting_bucket(const std::list&); - - virtual compaction_strategy_type type() const { - return compaction_strategy_type::size_tiered; - } -}; - -std::vector> -size_tiered_compaction_strategy::create_sstable_and_length_pairs(const std::vector& sstables) const { - - std::vector> sstable_length_pairs; - sstable_length_pairs.reserve(sstables.size()); - - for(auto& sstable : sstables) { - auto sstable_size = sstable->data_size(); - assert(sstable_size != 0); - - sstable_length_pairs.emplace_back(sstable, sstable_size); - } - - return sstable_length_pairs; -} - -std::vector> -size_tiered_compaction_strategy::get_buckets(const std::vector& sstables) const { - // sstables sorted by size of its data file. - auto sorted_sstables = create_sstable_and_length_pairs(sstables); - - std::sort(sorted_sstables.begin(), sorted_sstables.end(), [] (auto& i, auto& j) { - return i.second < j.second; - }); - - std::map> buckets; - - bool found; - for (auto& pair : sorted_sstables) { - found = false; - size_t size = pair.second; - - // look for a bucket containing similar-sized files: - // group in the same bucket if it's w/in 50% of the average for this bucket, - // or this file and the bucket are all considered "small" (less than `minSSTableSize`) - for (auto& entry : buckets) { - std::vector bucket = entry.second; - size_t old_average_size = entry.first; - - if ((size > (old_average_size * _options.bucket_low) && size < (old_average_size * _options.bucket_high)) || - (size < _options.min_sstable_size && old_average_size < _options.min_sstable_size)) { - size_t total_size = bucket.size() * old_average_size; - size_t new_average_size = (total_size + size) / (bucket.size() + 1); - - bucket.push_back(pair.first); - buckets.erase(old_average_size); - buckets.insert({ new_average_size, std::move(bucket) }); - - found = true; - break; - } - } - - // no similar bucket found; put it in a new one - if (!found) { - std::vector new_bucket; - new_bucket.push_back(pair.first); - buckets.insert({ size, std::move(new_bucket) }); - } - } - - std::vector> bucket_list; - bucket_list.reserve(buckets.size()); - - for (auto& entry : buckets) { - bucket_list.push_back(std::move(entry.second)); - } - - return bucket_list; -} - -std::vector -size_tiered_compaction_strategy::most_interesting_bucket(std::vector> buckets, - unsigned min_threshold, unsigned max_threshold) -{ - std::vector, uint64_t>> pruned_buckets_and_hotness; - pruned_buckets_and_hotness.reserve(buckets.size()); - - // FIXME: add support to get hotness for each bucket. - - for (auto& bucket : buckets) { - // FIXME: the coldest sstables will be trimmed to meet the threshold, so we must add support to this feature - // by converting SizeTieredCompactionStrategy::trimToThresholdWithHotness. - // By the time being, we will only compact buckets that meet the threshold. - bucket.resize(std::min(bucket.size(), size_t(max_threshold))); - if (bucket.size() >= min_threshold) { - auto avg = avg_size(bucket); - pruned_buckets_and_hotness.push_back({ std::move(bucket), avg }); - } - } - - if (pruned_buckets_and_hotness.empty()) { - return std::vector(); - } - - // NOTE: Compacting smallest sstables first, located at the beginning of the sorted vector. - auto& min = *std::min_element(pruned_buckets_and_hotness.begin(), pruned_buckets_and_hotness.end(), [] (auto& i, auto& j) { - // FIXME: ignoring hotness by the time being. - - return i.second < j.second; - }); - auto hottest = std::move(min.first); - - return hottest; -} - -compaction_descriptor size_tiered_compaction_strategy::get_sstables_for_compaction(column_family& cfs, std::vector candidates) { - // make local copies so they can't be changed out from under us mid-method - int min_threshold = cfs.schema()->min_compaction_threshold(); - int max_threshold = cfs.schema()->max_compaction_threshold(); - - // TODO: Add support to filter cold sstables (for reference: SizeTieredCompactionStrategy::filterColdSSTables). - - auto buckets = get_buckets(candidates); - - std::vector most_interesting = most_interesting_bucket(std::move(buckets), min_threshold, max_threshold); - if (most_interesting.empty()) { - // nothing to do - return sstables::compaction_descriptor(); - } - - return sstables::compaction_descriptor(std::move(most_interesting)); -} - -int64_t size_tiered_compaction_strategy::estimated_pending_compactions(column_family& cf) const { - int min_threshold = cf.schema()->min_compaction_threshold(); - int max_threshold = cf.schema()->max_compaction_threshold(); - std::vector sstables; - int64_t n = 0; - - sstables.reserve(cf.sstables_count()); - for (auto& entry : *cf.get_sstables()) { - sstables.push_back(entry); - } - - for (auto& bucket : get_buckets(sstables)) { - if (bucket.size() >= size_t(min_threshold)) { - n += std::ceil(double(bucket.size()) / max_threshold); - } - } - return n; -} - -std::vector size_tiered_most_interesting_bucket(lw_shared_ptr candidates) { - size_tiered_compaction_strategy cs; - - std::vector sstables; - sstables.reserve(candidates->size()); - for (auto& entry : *candidates) { - sstables.push_back(entry); - } - - auto buckets = cs.get_buckets(sstables); - - std::vector most_interesting = cs.most_interesting_bucket(std::move(buckets), - DEFAULT_MIN_COMPACTION_THRESHOLD, DEFAULT_MAX_COMPACTION_THRESHOLD); - - return most_interesting; -} - -std::vector -size_tiered_most_interesting_bucket(const std::list& candidates) { - size_tiered_compaction_strategy cs; - - std::vector sstables(candidates.begin(), candidates.end()); - - auto buckets = cs.get_buckets(sstables); - - std::vector most_interesting = cs.most_interesting_bucket(std::move(buckets), - DEFAULT_MIN_COMPACTION_THRESHOLD, DEFAULT_MAX_COMPACTION_THRESHOLD); - - return most_interesting; -} - -class leveled_compaction_strategy : public compaction_strategy_impl { - static constexpr int32_t DEFAULT_MAX_SSTABLE_SIZE_IN_MB = 160; - const sstring SSTABLE_SIZE_OPTION = "sstable_size_in_mb"; - - int32_t _max_sstable_size_in_mb = DEFAULT_MAX_SSTABLE_SIZE_IN_MB; - stdx::optional>> _last_compacted_keys; - std::vector _compaction_counter; -public: - leveled_compaction_strategy(const std::map& options) { - using namespace cql3::statements; - - auto tmp_value = size_tiered_compaction_strategy_options::get_value(options, SSTABLE_SIZE_OPTION); - _max_sstable_size_in_mb = property_definitions::to_int(SSTABLE_SIZE_OPTION, tmp_value, DEFAULT_MAX_SSTABLE_SIZE_IN_MB); - if (_max_sstable_size_in_mb >= 1000) { - clogger.warn("Max sstable size of {}MB is configured; having a unit of compaction this large is probably a bad idea", - _max_sstable_size_in_mb); - } else if (_max_sstable_size_in_mb < 50) { - clogger.warn("Max sstable size of {}MB is configured. Testing done for CASSANDRA-5727 indicates that performance improves up to 160MB", - _max_sstable_size_in_mb); - } - _compaction_counter.resize(leveled_manifest::MAX_LEVELS); - } - - virtual compaction_descriptor get_sstables_for_compaction(column_family& cfs, std::vector candidates) override; - - virtual std::vector get_resharding_jobs(column_family& cf, std::vector candidates) override; - - virtual void notify_completion(const std::vector>& removed, const std::vector>& added) override; - - // for each level > 0, get newest sstable and use its last key as last - // compacted key for the previous level. - void generate_last_compacted_keys(leveled_manifest& manifest); - - virtual int64_t estimated_pending_compactions(column_family& cf) const override; - - virtual bool parallel_compaction() const override { - return false; - } - - virtual compaction_strategy_type type() const { - return compaction_strategy_type::leveled; - } - virtual std::unique_ptr make_sstable_set(schema_ptr schema) const override { - return std::make_unique(std::move(schema)); - } -}; - -compaction_descriptor leveled_compaction_strategy::get_sstables_for_compaction(column_family& cfs, std::vector candidates) { - // NOTE: leveled_manifest creation may be slightly expensive, so later on, - // we may want to store it in the strategy itself. However, the sstable - // lists managed by the manifest may become outdated. For example, one - // sstable in it may be marked for deletion after compacted. - // Currently, we create a new manifest whenever it's time for compaction. - leveled_manifest manifest = leveled_manifest::create(cfs, candidates, _max_sstable_size_in_mb); - if (!_last_compacted_keys) { - generate_last_compacted_keys(manifest); - } - auto candidate = manifest.get_compaction_candidates(*_last_compacted_keys, _compaction_counter); - - if (candidate.sstables.empty()) { - return sstables::compaction_descriptor(); - } - - clogger.debug("leveled: Compacting {} out of {} sstables", candidate.sstables.size(), cfs.get_sstables()->size()); - - return std::move(candidate); -} - -std::vector leveled_compaction_strategy::get_resharding_jobs(column_family& cf, std::vector candidates) { - leveled_manifest manifest = leveled_manifest::create(cf, candidates, _max_sstable_size_in_mb); - - std::vector descriptors; - shard_id target_shard = 0; - auto get_shard = [&target_shard] { return target_shard++ % smp::count; }; - - // Basically, we'll iterate through all levels, and for each, we'll sort the - // sstables by first key because there's a need to reshard together adjacent - // sstables. - // The shard at which the job will run is chosen in a round-robin fashion. - for (auto level = 0U; level <= manifest.get_level_count(); level++) { - uint64_t max_sstable_size = !level ? std::numeric_limits::max() : (_max_sstable_size_in_mb*1024*1024); - auto& sstables = manifest.get_level(level); - sstables.sort([] (auto& i, auto& j) { - return i->compare_by_first_key(*j) < 0; - }); - - resharding_descriptor current_descriptor = resharding_descriptor{{}, max_sstable_size, get_shard(), level}; - - for (auto it = sstables.begin(); it != sstables.end(); it++) { - current_descriptor.sstables.push_back(*it); - - auto next = std::next(it); - if (current_descriptor.sstables.size() == smp::count || next == sstables.end()) { - descriptors.push_back(std::move(current_descriptor)); - current_descriptor = resharding_descriptor{{}, max_sstable_size, get_shard(), level}; - } - } - } - return descriptors; -} - -void leveled_compaction_strategy::notify_completion(const std::vector>& removed, const std::vector>& added) { - if (removed.empty() || added.empty()) { - return; - } - auto min_level = std::numeric_limits::max(); - for (auto& sstable : removed) { - min_level = std::min(min_level, sstable->get_sstable_level()); - } - - const sstables::sstable *last = nullptr; - for (auto& candidate : added) { - if (!last || last->compare_by_first_key(*candidate) < 0) { - last = &*candidate; - } - } - _last_compacted_keys.value().at(min_level) = last->get_last_decorated_key(); -} - -void leveled_compaction_strategy::generate_last_compacted_keys(leveled_manifest& manifest) { - std::vector> last_compacted_keys(leveled_manifest::MAX_LEVELS); - for (auto i = 0; i < leveled_manifest::MAX_LEVELS - 1; i++) { - if (manifest.get_level(i + 1).empty()) { - continue; - } - - const sstables::sstable* sstable_with_last_compacted_key = nullptr; - stdx::optional max_creation_time; - for (auto& sst : manifest.get_level(i + 1)) { - auto wtime = sst->data_file_write_time(); - if (!max_creation_time || wtime >= *max_creation_time) { - sstable_with_last_compacted_key = &*sst; - max_creation_time = wtime; - } - } - last_compacted_keys[i] = sstable_with_last_compacted_key->get_last_decorated_key(); - } - _last_compacted_keys = std::move(last_compacted_keys); -} - -int64_t leveled_compaction_strategy::estimated_pending_compactions(column_family& cf) const { - std::vector sstables; - sstables.reserve(cf.sstables_count()); - for (auto& entry : *cf.get_sstables()) { - sstables.push_back(entry); - } - leveled_manifest manifest = leveled_manifest::create(cf, sstables, _max_sstable_size_in_mb); - return manifest.get_estimated_tasks(); -} - -class date_tiered_compaction_strategy : public compaction_strategy_impl { - date_tiered_manifest _manifest; -public: - date_tiered_compaction_strategy(const std::map& options) - : _manifest(options) - { - _use_clustering_key_filter = true; - } - - virtual compaction_descriptor get_sstables_for_compaction(column_family& cfs, std::vector candidates) override { - auto gc_before = gc_clock::now() - cfs.schema()->gc_grace_seconds(); - auto sstables = _manifest.get_next_sstables(cfs, candidates, gc_before); - clogger.debug("datetiered: Compacting {} out of {} sstables", sstables.size(), candidates.size()); - if (sstables.empty()) { - return sstables::compaction_descriptor(); - } - return sstables::compaction_descriptor(std::move(sstables)); - } - - virtual int64_t estimated_pending_compactions(column_family& cf) const override { - return _manifest.get_estimated_tasks(cf); - } - - virtual compaction_strategy_type type() const { - return compaction_strategy_type::date_tiered; - } -}; - -compaction_strategy::compaction_strategy(::shared_ptr impl) - : _compaction_strategy_impl(std::move(impl)) {} -compaction_strategy::compaction_strategy() = default; -compaction_strategy::~compaction_strategy() = default; -compaction_strategy::compaction_strategy(const compaction_strategy&) = default; -compaction_strategy::compaction_strategy(compaction_strategy&&) = default; -compaction_strategy& compaction_strategy::operator=(compaction_strategy&&) = default; - -compaction_strategy_type compaction_strategy::type() const { - return _compaction_strategy_impl->type(); -} - -compaction_descriptor compaction_strategy::get_sstables_for_compaction(column_family& cfs, std::vector candidates) { - return _compaction_strategy_impl->get_sstables_for_compaction(cfs, std::move(candidates)); -} - -std::vector compaction_strategy::get_resharding_jobs(column_family& cf, std::vector candidates) { - return _compaction_strategy_impl->get_resharding_jobs(cf, std::move(candidates)); -} - -void compaction_strategy::notify_completion(const std::vector>& removed, const std::vector>& added) { - _compaction_strategy_impl->notify_completion(removed, added); -} - -bool compaction_strategy::parallel_compaction() const { - return _compaction_strategy_impl->parallel_compaction(); -} - -int64_t compaction_strategy::estimated_pending_compactions(column_family& cf) const { - return _compaction_strategy_impl->estimated_pending_compactions(cf); -} - -bool compaction_strategy::use_clustering_key_filter() const { - return _compaction_strategy_impl->use_clustering_key_filter(); -} - -sstable_set -compaction_strategy::make_sstable_set(schema_ptr schema) const { - return sstable_set( - _compaction_strategy_impl->make_sstable_set(std::move(schema)), - make_lw_shared()); -} - -compaction_strategy make_compaction_strategy(compaction_strategy_type strategy, const std::map& options) { - ::shared_ptr impl; - - switch(strategy) { - case compaction_strategy_type::null: - impl = make_shared(null_compaction_strategy()); - break; - case compaction_strategy_type::major: - impl = make_shared(major_compaction_strategy()); - break; - case compaction_strategy_type::size_tiered: - impl = make_shared(size_tiered_compaction_strategy(options)); - break; - case compaction_strategy_type::leveled: - impl = make_shared(leveled_compaction_strategy(options)); - break; - case compaction_strategy_type::date_tiered: - impl = make_shared(date_tiered_compaction_strategy(options)); - break; - default: - throw std::runtime_error("strategy not supported"); - } - - return compaction_strategy(std::move(impl)); -} - -} diff --git a/scylla/sstables/compress.cc b/scylla/sstables/compress.cc deleted file mode 100644 index 87f1fc4..0000000 --- a/scylla/sstables/compress.cc +++ /dev/null @@ -1,346 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include -#include - -#include -#include -#include - -#include "compress.hh" - -#include -#include -#include - -#include "unimplemented.hh" -#include "stdx.hh" - -namespace sstables { - -void compression::update(uint64_t compressed_file_length) { - // FIXME: also process _compression.options (just for crc-check frequency) - if (name.value == "LZ4Compressor") { - _uncompress = uncompress_lz4; - } else if (name.value == "SnappyCompressor") { - _uncompress = uncompress_snappy; - } else if (name.value == "DeflateCompressor") { - _uncompress = uncompress_deflate; - } else { - throw std::runtime_error("unsupported compression type"); - } - - _compressed_file_length = compressed_file_length; -} - -void compression::set_compressor(compressor c) { - if (c == compressor::lz4) { - _compress = compress_lz4; - _compress_max_size = compress_max_size_lz4; - name.value = "LZ4Compressor"; - } else if (c == compressor::snappy) { - _compress = compress_snappy; - _compress_max_size = compress_max_size_snappy; - name.value = "SnappyCompressor"; - } else if (c == compressor::deflate) { - _compress = compress_deflate; - _compress_max_size = compress_max_size_deflate; - name.value = "DeflateCompressor"; - } else { - throw std::runtime_error("unsupported compressor type"); - } -} - -// locate() takes a byte position in the uncompressed stream, and finds the -// the location of the compressed chunk on disk which contains it, and the -// offset in this chunk. -// locate() may only be used for offsets of actual bytes, and in particular -// the end-of-file position (one past the last byte) MUST not be used. If the -// caller wants to read from the end of file, it should simply read nothing. -compression::chunk_and_offset -compression::locate(uint64_t position) const { - auto ucl = uncompressed_chunk_length(); - auto chunk_index = position / ucl; - decltype(ucl) chunk_offset = position % ucl; - auto chunk_start = offsets.elements.at(chunk_index); - auto chunk_end = (chunk_index + 1 == offsets.elements.size()) - ? _compressed_file_length - : offsets.elements.at(chunk_index + 1); - return { chunk_start, chunk_end - chunk_start, chunk_offset }; -} - -} - -size_t uncompress_lz4(const char* input, size_t input_len, - char* output, size_t output_len) { - // We use LZ4_decompress_safe(). According to the documentation, the - // function LZ4_decompress_fast() is slightly faster, but maliciously - // crafted compressed data can cause it to overflow the output buffer. - // Theoretically, our compressed data is created by us so is not malicious - // (and accidental corruption is avoided by the compressed-data checksum), - // but let's not take that chance for now, until we've actually measured - // the performance benefit that LZ4_decompress_fast() would bring. - - // Cassandra's LZ4Compressor prepends to the chunk its uncompressed length - // in 4 bytes little-endian (!) order. We don't need this information - - // we already know the uncompressed data is at most the given chunk size - // (and usually is exactly that, except in the last chunk). The advance - // knowledge of the uncompressed size could be useful if we used - // LZ4_decompress_fast(), but we prefer LZ4_decompress_safe() anyway... - input += 4; - input_len -= 4; - - auto ret = LZ4_decompress_safe(input, output, input_len, output_len); - if (ret < 0) { - throw std::runtime_error("LZ4 uncompression failure"); - } - return ret; -} - -size_t compress_lz4(const char* input, size_t input_len, - char* output, size_t output_len) { - if (output_len < LZ4_COMPRESSBOUND(input_len) + 4) { - throw std::runtime_error("LZ4 compression failure: length of output is too small"); - } - // Write input_len (32-bit data) to beginning of output in little-endian representation. - output[0] = input_len & 0xFF; - output[1] = (input_len >> 8) & 0xFF; - output[2] = (input_len >> 16) & 0xFF; - output[3] = (input_len >> 24) & 0xFF; -#ifdef HAVE_LZ4_COMPRESS_DEFAULT - auto ret = LZ4_compress_default(input, output + 4, input_len, LZ4_compressBound(input_len)); -#else - auto ret = LZ4_compress(input, output + 4, input_len); -#endif - if (ret == 0) { - throw std::runtime_error("LZ4 compression failure: LZ4_compress() failed"); - } - return ret + 4; -} - -size_t uncompress_deflate(const char* input, size_t input_len, - char* output, size_t output_len) { - z_stream zs; - zs.zalloc = Z_NULL; - zs.zfree = Z_NULL; - zs.opaque = Z_NULL; - zs.avail_in = 0; - zs.next_in = Z_NULL; - if (inflateInit(&zs) != Z_OK) { - throw std::runtime_error("deflate uncompression init failure"); - } - // yuck, zlib is not const-correct, and also uses unsigned char while we use char :-( - zs.next_in = reinterpret_cast(const_cast(input)); - zs.avail_in = input_len; - zs.next_out = reinterpret_cast(output); - zs.avail_out = output_len; - auto res = inflate(&zs, Z_FINISH); - inflateEnd(&zs); - if (res == Z_STREAM_END) { - return output_len - zs.avail_out; - } else { - throw std::runtime_error("deflate uncompression failure"); - } -} - -size_t compress_deflate(const char* input, size_t input_len, - char* output, size_t output_len) { - z_stream zs; - zs.zalloc = Z_NULL; - zs.zfree = Z_NULL; - zs.opaque = Z_NULL; - zs.avail_in = 0; - zs.next_in = Z_NULL; - if (deflateInit(&zs, Z_DEFAULT_COMPRESSION) != Z_OK) { - throw std::runtime_error("deflate compression init failure"); - } - zs.next_in = reinterpret_cast(const_cast(input)); - zs.avail_in = input_len; - zs.next_out = reinterpret_cast(output); - zs.avail_out = output_len; - auto res = deflate(&zs, Z_FINISH); - deflateEnd(&zs); - if (res == Z_STREAM_END) { - return output_len - zs.avail_out; - } else { - throw std::runtime_error("deflate compression failure"); - } -} - -size_t uncompress_snappy(const char* input, size_t input_len, - char* output, size_t output_len) { - if (snappy_uncompress(input, input_len, output, &output_len) - == SNAPPY_OK) { - return output_len; - } else { - throw std::runtime_error("snappy uncompression failure"); - } -} - -size_t compress_snappy(const char* input, size_t input_len, - char* output, size_t output_len) { - auto ret = snappy_compress(input, input_len, output, &output_len); - if (ret != SNAPPY_OK) { - throw std::runtime_error("snappy compression failure: snappy_compress() failed"); - } - return output_len; -} - -size_t compress_max_size_lz4(size_t input_len) { - return LZ4_COMPRESSBOUND(input_len) + 4; -} - -size_t compress_max_size_deflate(size_t input_len) { - z_stream zs; - zs.zalloc = Z_NULL; - zs.zfree = Z_NULL; - zs.opaque = Z_NULL; - zs.avail_in = 0; - zs.next_in = Z_NULL; - if (deflateInit(&zs, Z_DEFAULT_COMPRESSION) != Z_OK) { - throw std::runtime_error("deflate compression init failure"); - } - auto res = deflateBound(&zs, input_len); - deflateEnd(&zs); - return res; -} - -size_t compress_max_size_snappy(size_t input_len) { - return snappy_max_compressed_length(input_len); -} - -class compressed_file_data_source_impl : public data_source_impl { - stdx::optional> _input_stream; - sstables::compression* _compression_metadata; - uint64_t _underlying_pos; - uint64_t _pos; - uint64_t _beg_pos; - uint64_t _end_pos; -public: - compressed_file_data_source_impl(file f, sstables::compression* cm, - uint64_t pos, size_t len, file_input_stream_options options) - : _compression_metadata(cm) - { - _beg_pos = pos; - if (pos > _compression_metadata->data_len) { - throw std::runtime_error("attempt to uncompress beyond end"); - } - if (len == 0 || pos == _compression_metadata->data_len) { - // Nothing to read - _end_pos = _pos = _beg_pos; - return; - } - if (len <= _compression_metadata->data_len - pos) { - _end_pos = pos + len; - } else { - _end_pos = _compression_metadata->data_len; - } - // _beg_pos and _end_pos specify positions in the compressed stream. - // We need to translate them into a range of uncompressed chunks, - // and open a file_input_stream to read that range. - auto start = _compression_metadata->locate(_beg_pos); - auto end = _compression_metadata->locate(_end_pos - 1); - _input_stream = make_file_input_stream(std::move(f), - start.chunk_start, - end.chunk_start + end.chunk_len - start.chunk_start, - std::move(options)); - _underlying_pos = start.chunk_start; - _pos = _beg_pos; - } - virtual future> get() override { - if (_pos >= _end_pos) { - return make_ready_future>(); - } - auto addr = _compression_metadata->locate(_pos); - // Uncompress the next chunk. We need to skip part of the first - // chunk, but then continue to read from beginning of chunks. - if (_pos != _beg_pos && addr.offset != 0) { - throw std::runtime_error("compressed reader out of sync"); - } - return _input_stream->read_exactly(addr.chunk_len). - then([this, addr](temporary_buffer buf) { - // The last 4 bytes of the chunk are the adler32 checksum - // of the rest of the (compressed) chunk. - auto compressed_len = addr.chunk_len - 4; - // FIXME: Do not always calculate checksum - Cassandra has a - // probability (defaulting to 1.0, but still...) - auto checksum = read_be(buf.get() + compressed_len); - if (checksum != checksum_adler32(buf.get(), compressed_len)) { - throw std::runtime_error("compressed chunk failed checksum"); - } - - // We know that the uncompressed data will take exactly - // chunk_length bytes (or less, if reading the last chunk). - temporary_buffer out( - _compression_metadata->uncompressed_chunk_length()); - // The compressed data is the whole chunk, minus the last 4 - // bytes (which contain the checksum verified above). - auto len = _compression_metadata->uncompress( - buf.get(), compressed_len, - out.get_write(), out.size()); - out.trim(len); - out.trim_front(addr.offset); - _pos += out.size(); - _underlying_pos += addr.chunk_len; - return out; - }); - } - - virtual future<> close() override { - if (!_input_stream) { - return make_ready_future<>(); - } - return _input_stream->close(); - } - - virtual future> skip(uint64_t n) override { - _pos += n; - assert(_pos <= _end_pos); - if (_pos == _end_pos) { - return make_ready_future>(); - } - auto addr = _compression_metadata->locate(_pos); - auto underlying_n = addr.chunk_start - _underlying_pos; - _underlying_pos = addr.chunk_start; - _beg_pos = _pos; - return _input_stream->skip(underlying_n).then([] { - return make_ready_future>(); - }); - } -}; - -class compressed_file_data_source : public data_source { -public: - compressed_file_data_source(file f, sstables::compression* cm, - uint64_t offset, size_t len, file_input_stream_options options) - : data_source(std::make_unique( - std::move(f), cm, offset, len, std::move(options))) - {} -}; - -input_stream make_compressed_file_input_stream( - file f, sstables::compression* cm, uint64_t offset, size_t len, - file_input_stream_options options) -{ - return input_stream(compressed_file_data_source( - std::move(f), cm, offset, len, std::move(options))); -} diff --git a/scylla/sstables/compress.hh b/scylla/sstables/compress.hh deleted file mode 100644 index 108f768..0000000 --- a/scylla/sstables/compress.hh +++ /dev/null @@ -1,205 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -// This is an implementation of a random-access compressed file compatible -// with Cassandra's org.apache.cassandra.io.compress compressed files. -// -// To allow reasonably-efficient seeking in the compressed file, the file -// is not compressed as a whole, but rather divided into chunks of a known -// size (by default, 64 KB), where each chunk is compressed individually. -// The compressed size of each chunk is different, so for allowing seeking -// to a particular position in the uncompressed data, we need to also know -// the position of each chunk. This offset vector is supplied externally as -// a "compression_metadata" object, which also contains additional information -// needed from decompression - such as the chunk size and compressor type. -// -// Cassandra supports three different compression algorithms for the chunks, -// LZ4, Snappy, and Deflate - the default (and therefore most important) is -// LZ4. Each compressor is an implementation of the "compressor" class. -// -// Each compressed chunk is followed by a 4-byte checksum of the compressed -// data, using the Adler32 algorithm. In Cassandra, there is a parameter -// "crc_check_chance" (defaulting to 1.0) which determines the probability -// of us verifying the checksum of each chunk we read. -// -// This implementation does not cache the compressed disk blocks (which -// are read using O_DIRECT), nor uncompressed data. We intend to cache high- -// level Cassandra rows, not disk blocks. - -#include -#include -#include - -#include "core/file.hh" -#include "core/reactor.hh" -#include "core/shared_ptr.hh" -#include "types.hh" -#include "../compress.hh" - -// An "uncompress_func" is a function which uncompresses the given compressed -// input chunk, and writes the uncompressed data into the given output buffer. -// An exception is thrown if the output buffer is not big enough, but that -// is not expected to happen - in the chunked compression scheme used here, -// we know that the uncompressed data will be exactly chunk_size bytes (or -// smaller for the last chunk). -typedef size_t uncompress_func(const char* input, size_t input_len, - char* output, size_t output_len); - -uncompress_func uncompress_lz4; -uncompress_func uncompress_snappy; -uncompress_func uncompress_deflate; - -typedef size_t compress_func(const char* input, size_t input_len, - char* output, size_t output_len); - -compress_func compress_lz4; -compress_func compress_snappy; -compress_func compress_deflate; - -typedef size_t compress_max_size_func(size_t input_len); - -compress_max_size_func compress_max_size_lz4; -compress_max_size_func compress_max_size_snappy; -compress_max_size_func compress_max_size_deflate; - -inline uint32_t init_checksum_adler32() { - return adler32(0, Z_NULL, 0); -} - -inline uint32_t checksum_adler32(const char* input, size_t input_len) { - auto init = adler32(0, Z_NULL, 0); - // yuck, zlib uses unsigned char while we use char :-( - return adler32(init, reinterpret_cast(input), - input_len); -} - -inline uint32_t checksum_adler32(uint32_t adler, const char* input, size_t input_len) { - return adler32(adler, reinterpret_cast(input), - input_len); -} - -inline uint32_t checksum_adler32_combine(uint32_t adler1, uint32_t adler2, size_t input_len2) { - return adler32_combine(adler1, adler2, input_len2); -} - -namespace sstables { - -struct compression { - disk_string name; - disk_array options; - uint32_t chunk_len; - uint64_t data_len; - disk_array offsets; - - template - auto describe_type(Describer f) { return f(name, options, chunk_len, data_len, offsets); } - -private: - // Variables determined from the above deserialized values, held for convenience: - uncompress_func *_uncompress = nullptr; - compress_func *_compress = nullptr; - // Return maximum length of data that compressor may output. - compress_max_size_func *_compress_max_size = nullptr; - // Variables *not* found in the "Compression Info" file (added by update()): - uint64_t _compressed_file_length = 0; - uint32_t _full_checksum; -public: - // Set the compressor algorithm, please check the definition of enum compressor. - void set_compressor(compressor c); - // After changing _compression, update() must be called to update - // additional variables depending on it. - void update(uint64_t compressed_file_length); - operator bool() const { - return _uncompress != nullptr; - } - // locate() locates in the compressed file the given byte position of - // the uncompressed data: - // 1. The byte range containing the appropriate compressed chunk, and - // 2. the offset into the uncompressed chunk. - // Note that the last 4 bytes of the returned chunk are not the actual - // compressed data, but rather the checksum of the compressed data. - // locate() throws an out-of-range exception if the position is beyond - // the last chunk. - struct chunk_and_offset { - uint64_t chunk_start; - uint64_t chunk_len; // variable size of compressed chunk - unsigned offset; // offset into chunk after uncompressing it - }; - chunk_and_offset locate(uint64_t position) const; - - unsigned uncompressed_chunk_length() const noexcept { - return chunk_len; - } - uint64_t uncompressed_file_length() const { - return data_len; - } - - uint64_t compressed_file_length() const { - return _compressed_file_length; - } - void set_compressed_file_length(uint64_t compressed_file_length) { - _compressed_file_length = compressed_file_length; - } - - uint32_t full_checksum() const { - return _full_checksum; - } - void init_full_checksum() { - _full_checksum = init_checksum_adler32(); - } - void update_full_checksum(uint32_t checksum, size_t size) { - _full_checksum = checksum_adler32_combine(_full_checksum, checksum, size); - } - - size_t uncompress( - const char* input, size_t input_len, - char* output, size_t output_len) const { - if (!_uncompress) { - throw std::runtime_error("uncompress is not supported"); - } - return _uncompress(input, input_len, output, output_len); - } - size_t compress( - const char* input, size_t input_len, - char* output, size_t output_len) const { - if (!_compress) { - throw std::runtime_error("compress is not supported"); - } - return _compress(input, input_len, output, output_len); - } - size_t compress_max_size(size_t input_len) const { - return _compress_max_size(input_len); - } - friend class sstable; -}; - -} - - -// Note: compression_metadata is passed by reference; The caller is -// responsible for keeping the compression_metadata alive as long as there -// are open streams on it. This should happen naturally on a higher level - -// as long as we have *sstables* work in progress, we need to keep the whole -// sstable alive, and the compression metadata is only a part of it. -input_stream make_compressed_file_input_stream( - file f, sstables::compression *cm, uint64_t offset, size_t len, class file_input_stream_options options); diff --git a/scylla/sstables/consumer.hh b/scylla/sstables/consumer.hh deleted file mode 100644 index bd83e49..0000000 --- a/scylla/sstables/consumer.hh +++ /dev/null @@ -1,323 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "core/future.hh" -#include "core/iostream.hh" -#include "sstables/exceptions.hh" -#include - -template -static inline T consume_be(temporary_buffer& p) { - T i = read_be(p.get()); - p.trim_front(sizeof(T)); - return i; -} - -namespace data_consumer { -enum class proceed { no, yes }; - -template -class continuous_data_consumer { - using proceed = data_consumer::proceed; - StateProcessor& state_processor() { - return static_cast(*this); - }; -protected: - input_stream _input; - uint64_t _stream_position; - // remaining length of input to read (if <0, continue until end of file). - int64_t _remain; - - // state machine progress: - enum class prestate { - NONE, - READING_U8, - READING_U16, - READING_U32, - READING_U64, - READING_BYTES, - } _prestate = prestate::NONE; - - // state for non-NONE prestates - uint32_t _pos; - // state for READING_U8, READING_U16, READING_U32, READING_U64 prestate - uint8_t _u8; - uint16_t _u16; - uint32_t _u32; - uint64_t _u64; - union { - char bytes[sizeof(uint64_t)]; - uint64_t uint64; - uint32_t uint32; - uint16_t uint16; - uint8_t uint8; - } _read_int; - // state for READING_BYTES prestate - temporary_buffer _read_bytes; - temporary_buffer* _read_bytes_where; // which temporary_buffer to set, _key or _val? - - enum class read_status { ready, waiting }; - inline read_status read_8(temporary_buffer& data) { - if (data.size() >= sizeof(uint8_t)) { - _u8 = consume_be(data); - return read_status::ready; - } else { - _pos = 0; - _prestate = prestate::READING_U8; - return read_status::waiting; - } - } - // Read a 16-bit integer into _u16. If the whole thing is in the buffer - // (this is the common case), do this immediately. Otherwise, remember - // what we have in the buffer, and remember to continue later by using - // a "prestate": - inline read_status read_16(temporary_buffer& data) { - if (data.size() >= sizeof(uint16_t)) { - _u16 = consume_be(data); - return read_status::ready; - } else { - std::copy(data.begin(), data.end(), _read_int.bytes); - _pos = data.size(); - data.trim(0); - _prestate = prestate::READING_U16; - return read_status::waiting; - } - } - inline read_status read_32(temporary_buffer& data) { - if (data.size() >= sizeof(uint32_t)) { - _u32 = consume_be(data); - return read_status::ready; - } else { - std::copy(data.begin(), data.end(), _read_int.bytes); - _pos = data.size(); - data.trim(0); - _prestate = prestate::READING_U32; - return read_status::waiting; - } - } - inline read_status read_64(temporary_buffer& data) { - if (data.size() >= sizeof(uint64_t)) { - _u64 = consume_be(data); - return read_status::ready; - } else { - std::copy(data.begin(), data.end(), _read_int.bytes); - _pos = data.size(); - data.trim(0); - _prestate = prestate::READING_U64; - return read_status::waiting; - } - } - inline read_status read_bytes(temporary_buffer& data, uint32_t len, temporary_buffer& where) { - if (data.size() >= len) { - where = data.share(0, len); - data.trim_front(len); - return read_status::ready; - } else { - // copy what we have so far, read the rest later - _read_bytes = temporary_buffer(len); - std::copy(data.begin(), data.end(),_read_bytes.get_write()); - _read_bytes_where = &where; - _pos = data.size(); - data.trim(0); - _prestate = prestate::READING_BYTES; - return read_status::waiting; - } - } - - inline void process_buffer(temporary_buffer& data) { - if (__builtin_expect((_prestate != prestate::NONE), 0)) { - do_process_buffer(data); - } - } -private: - // This is separated so that the compiler can inline "process_buffer". Because this chunk is too big, - // it usually won't if this is part of the main function - void do_process_buffer(temporary_buffer& data) { - // We're in the middle of reading a basic type, which crossed - // an input buffer. Resume that read before continuing to - // handle the current state: - if (_prestate == prestate::READING_BYTES) { - auto n = std::min(_read_bytes.size() - _pos, data.size()); - std::copy(data.begin(), data.begin() + n, - _read_bytes.get_write() + _pos); - data.trim_front(n); - _pos += n; - if (_pos == _read_bytes.size()) { - *_read_bytes_where = std::move(_read_bytes); - _prestate = prestate::NONE; - } - } else { - // in the middle of reading an integer - unsigned len; - switch (_prestate) { - case prestate::READING_U8: - len = sizeof(uint8_t); - break; - case prestate::READING_U16: - len = sizeof(uint16_t); - break; - case prestate::READING_U32: - len = sizeof(uint32_t); - break; - case prestate::READING_U64: - len = sizeof(uint64_t); - break; - default: - throw sstables::malformed_sstable_exception("unknown prestate"); - } - assert(_pos < len); - auto n = std::min((size_t)(len - _pos), data.size()); - std::copy(data.begin(), data.begin() + n, _read_int.bytes + _pos); - data.trim_front(n); - _pos += n; - if (_pos == len) { - // done reading the integer, store it in _u8, _u16, _u32 or _u64: - switch (_prestate) { - case prestate::READING_U8: - _u8 = _read_int.uint8; - break; - case prestate::READING_U16: - _u16 = net::ntoh(_read_int.uint16); - break; - case prestate::READING_U32: - _u32 = net::ntoh(_read_int.uint32); - break; - case prestate::READING_U64: - _u64 = net::ntoh(_read_int.uint64); - break; - default: - throw sstables::malformed_sstable_exception( - "unknown prestate"); - } - _prestate = prestate::NONE; - } - } - } - - void verify_end_state() { - state_processor().verify_end_state(); - } -public: - continuous_data_consumer(input_stream&& input, uint64_t start, uint64_t maxlen) - : _input(std::move(input)), _stream_position(start), _remain(maxlen) {} - - template - future<> consume_input(Consumer& c) { - return _input.consume(c); - } - - // some states do not consume input (its only exists to perform some - // action when finishing to read a primitive type via a prestate, in - // the rare case that a primitive type crossed a buffer). Such - // non-consuming states need to run even if the data buffer is empty. - bool non_consuming() { - return state_processor().non_consuming(); - } - - inline proceed process(temporary_buffer& data) { - while (data || non_consuming()) { - process_buffer(data); - // If _prestate is set to something other than prestate::NONE - // after process_buffer was called, it means that data wasn't - // enough to complete the prestate. That can happen specially - // when reading a large buf. Thefore, we need to ask caller - // to read more data until prestate is completed. - if (__builtin_expect((_prestate != prestate::NONE), 0)) { - // assert that data was all consumed by process_buffer. - assert(data.size() == 0); - return proceed::yes; - } - auto ret = state_processor().process_state(data); - if (__builtin_expect(ret == proceed::no, 0)) { - return ret; - } - } - return proceed::yes; - } - - using unconsumed_remainder = input_stream::unconsumed_remainder; - // called by input_stream::consume(): - future - operator()(temporary_buffer data) { - if (_remain >= 0 && data.size() >= (uint64_t)_remain) { - // We received more data than we actually care about, so process - // the beginning of the buffer, and return the rest to the stream - auto segment = data.share(0, _remain); - auto ret = process(segment); - data.trim_front(_remain - segment.size()); - auto len = _remain - segment.size(); - _remain -= len; - _stream_position += len; - if (_remain == 0 && ret == proceed::yes) { - verify_end_state(); - } - return make_ready_future(std::move(data)); - } else if (data.empty()) { - // End of file - verify_end_state(); - return make_ready_future(std::move(data)); - } else { - // We can process the entire buffer (if the consumer wants to). - auto orig_data_size = data.size(); - _stream_position += data.size(); - if (process(data) == proceed::yes) { - assert(data.size() == 0); - if (_remain >= 0) { - _remain -= orig_data_size; - } - return make_ready_future(); - } else { - if (_remain >= 0) { - _remain -= orig_data_size - data.size(); - } - _stream_position -= data.size(); - return make_ready_future(std::move(data)); - } - } - } - - future<> fast_forward_to(size_t begin, size_t end) { - assert(begin >= _stream_position); - auto n = begin - _stream_position; - _stream_position = begin; - - assert(end >= _stream_position); - _remain = end - _stream_position; - - _prestate = prestate::NONE; - return _input.skip(n); - } - - future<> skip_to(size_t begin) { - return fast_forward_to(begin, _stream_position + _remain); - } - - uint64_t position() const { - return _stream_position; - } - - future<> close() { - return _input.close(); - } -}; -} diff --git a/scylla/sstables/date_tiered_compaction_strategy.hh b/scylla/sstables/date_tiered_compaction_strategy.hh deleted file mode 100644 index 97ea976..0000000 --- a/scylla/sstables/date_tiered_compaction_strategy.hh +++ /dev/null @@ -1,427 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2016 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include -#include -#include -#include -#include -#include "sstables.hh" -#include "compaction.hh" -#include "timestamp.hh" -#include "cql3/statements/property_definitions.hh" - -static constexpr double DEFAULT_MAX_SSTABLE_AGE_DAYS = 365; -static constexpr int64_t DEFAULT_BASE_TIME_SECONDS = 60; - -struct duration_conversor { - // Convert given duration to TargetDuration and return value as timestamp. - template - static api::timestamp_type convert(SourceDuration d) { - return std::chrono::duration_cast(d).count(); - } - - // Convert given duration to duration that is represented by the string - // target_duration, and return value as timestamp. - template - static api::timestamp_type convert(const sstring& target_duration, SourceDuration d) { - if (target_duration == "HOURS") { - return convert(d); - } else if (target_duration == "MICROSECONDS") { - return convert(d); - } else if (target_duration == "MILLISECONDS") { - return convert(d); - } else if (target_duration == "MINUTES") { - return convert(d); - } else if (target_duration == "NANOSECONDS") { - return convert(d); - } else if (target_duration == "SECONDS") { - return convert(d); - } else { - throw std::runtime_error(sprint("target duration %s is not available", target_duration)); - } - } -}; - -class date_tiered_compaction_strategy_options { - const sstring DEFAULT_TIMESTAMP_RESOLUTION = "MICROSECONDS"; - const sstring TIMESTAMP_RESOLUTION_KEY = "timestamp_resolution"; - const sstring MAX_SSTABLE_AGE_KEY = "max_sstable_age_days"; - const sstring BASE_TIME_KEY = "base_time_seconds"; - - api::timestamp_type max_sstable_age; - api::timestamp_type base_time; -public: - date_tiered_compaction_strategy_options(const std::map& options) { - using namespace cql3::statements; - - auto tmp_value = get_value(options, TIMESTAMP_RESOLUTION_KEY); - auto target_unit = tmp_value ? tmp_value.value() : DEFAULT_TIMESTAMP_RESOLUTION; - - tmp_value = get_value(options, MAX_SSTABLE_AGE_KEY); - auto fractional_days = property_definitions::to_double(MAX_SSTABLE_AGE_KEY, tmp_value, DEFAULT_MAX_SSTABLE_AGE_DAYS); - int64_t max_sstable_age_in_hours = std::lround(fractional_days * 24); - max_sstable_age = duration_conversor::convert(target_unit, std::chrono::hours(max_sstable_age_in_hours)); - - tmp_value = get_value(options, BASE_TIME_KEY); - auto base_time_seconds = property_definitions::to_long(BASE_TIME_KEY, tmp_value, DEFAULT_BASE_TIME_SECONDS); - base_time = duration_conversor::convert(target_unit, std::chrono::seconds(base_time_seconds)); - } - - date_tiered_compaction_strategy_options() { - auto max_sstable_age_in_hours = int64_t(DEFAULT_MAX_SSTABLE_AGE_DAYS * 24); - max_sstable_age = std::chrono::duration_cast(std::chrono::hours(max_sstable_age_in_hours)).count(); - base_time = std::chrono::duration_cast(std::chrono::seconds(DEFAULT_BASE_TIME_SECONDS)).count(); - } -private: - static std::experimental::optional get_value(const std::map& options, const sstring& name) { - auto it = options.find(name); - if (it == options.end()) { - return std::experimental::nullopt; - } - return it->second; - } - - friend class date_tiered_manifest; -}; - -class date_tiered_manifest { - static logging::logger logger; - - date_tiered_compaction_strategy_options _options; -public: - date_tiered_manifest() = delete; - - date_tiered_manifest(const std::map& options) - : _options(options) - { - // FIXME: implement option to disable tombstone compaction. -#if 0 - if (!options.containsKey(AbstractCompactionStrategy.TOMBSTONE_COMPACTION_INTERVAL_OPTION) && !options.containsKey(AbstractCompactionStrategy.TOMBSTONE_THRESHOLD_OPTION)) - { - disableTombstoneCompactions = true; - logger.debug("Disabling tombstone compactions for DTCS"); - } - else - logger.debug("Enabling tombstone compactions for DTCS"); -#endif - } - - std::vector - get_next_sstables(column_family& cf, std::vector& uncompacting, gc_clock::time_point gc_before) { - if (cf.get_sstables()->empty()) { - return {}; - } - - // Find fully expired SSTables. Those will be included no matter what. - auto expired = get_fully_expired_sstables(cf, uncompacting, gc_before.time_since_epoch().count()); - - auto sort_ssts = [] (std::vector& sstables) { - std::sort(sstables.begin(), sstables.end(), [] (const auto& x, const auto& y) { - return x->generation() < y->generation(); - }); - }; - sort_ssts(uncompacting); - sort_ssts(expired); - - std::vector non_expired_set; - // Set non_expired_set with the elements that are in uncompacting, but not in the expired. - std::set_difference(uncompacting.begin(), uncompacting.end(), expired.begin(), expired.end(), - std::inserter(non_expired_set, non_expired_set.begin()), [] (const auto& x, const auto& y) { - return x->generation() < y->generation(); - }); - - auto compaction_candidates = get_next_non_expired_sstables(cf, non_expired_set, gc_before); - if (!expired.empty()) { - compaction_candidates.insert(compaction_candidates.end(), expired.begin(), expired.end()); - } - return compaction_candidates; - } - - int64_t get_estimated_tasks(column_family& cf) const { - int base = cf.schema()->min_compaction_threshold(); - int64_t now = get_now(cf); - std::vector sstables; - int64_t n = 0; - - sstables.reserve(cf.sstables_count()); - for (auto& entry : *cf.get_sstables()) { - sstables.push_back(entry); - } - auto candidates = filter_old_sstables(sstables, _options.max_sstable_age, now); - auto buckets = get_buckets(create_sst_and_min_timestamp_pairs(candidates), _options.base_time, base, now); - - for (auto& bucket : buckets) { - if (bucket.size() >= size_t(cf.schema()->min_compaction_threshold())) { - n += std::ceil(double(bucket.size()) / cf.schema()->max_compaction_threshold()); - } - } - return n; - } -private: - std::vector - get_next_non_expired_sstables(column_family& cf, std::vector& non_expiring_sstables, gc_clock::time_point gc_before) { - int base = cf.schema()->min_compaction_threshold(); - int64_t now = get_now(cf); - auto most_interesting = get_compaction_candidates(cf, non_expiring_sstables, now, base); - - return most_interesting; - - // FIXME: implement functionality below that will look for a single sstable with worth dropping tombstone, - // iff strategy didn't find anything to compact. So it's not essential. -#if 0 - // if there is no sstable to compact in standard way, try compacting single sstable whose droppable tombstone - // ratio is greater than threshold. - - List sstablesWithTombstones = Lists.newArrayList(); - for (SSTableReader sstable : nonExpiringSSTables) - { - if (worthDroppingTombstones(sstable, gcBefore)) - sstablesWithTombstones.add(sstable); - } - if (sstablesWithTombstones.isEmpty()) - return Collections.emptyList(); - - return Collections.singletonList(Collections.min(sstablesWithTombstones, new SSTableReader.SizeComparator())); -#endif - } - - std::vector - get_compaction_candidates(column_family& cf, std::vector candidate_sstables, int64_t now, int base) { - int min_threshold = cf.schema()->min_compaction_threshold(); - int max_threshold = cf.schema()->max_compaction_threshold(); - auto candidates = filter_old_sstables(candidate_sstables, _options.max_sstable_age, now); - - auto buckets = get_buckets(create_sst_and_min_timestamp_pairs(candidates), _options.base_time, base, now); - - return newest_bucket(buckets, min_threshold, max_threshold, now, _options.base_time); - } - - /** - * Gets the timestamp that DateTieredCompactionStrategy considers to be the "current time". - * @return the maximum timestamp across all SSTables. - */ - static int64_t get_now(column_family& cf) { - int64_t max_timestamp = 0; - for (auto& sst : *cf.get_sstables()) { - int64_t candidate = sst->get_stats_metadata().max_timestamp; - max_timestamp = candidate > max_timestamp ? candidate : max_timestamp; - } - return max_timestamp; - } - - /** - * Removes all sstables with max timestamp older than maxSSTableAge. - * @return a list of sstables with the oldest sstables excluded - */ - static std::vector - filter_old_sstables(std::vector sstables, api::timestamp_type max_sstable_age, int64_t now) { - if (max_sstable_age == 0) { - return sstables; - } - int64_t cutoff = now - max_sstable_age; - - sstables.erase(std::remove_if(sstables.begin(), sstables.end(), [cutoff] (auto& sst) { - return sst->get_stats_metadata().max_timestamp < cutoff; - }), sstables.end()); - - return sstables; - } - - /** - * - * @param sstables - * @return - */ - static std::vector> - create_sst_and_min_timestamp_pairs(const std::vector& sstables) { - std::vector> sstable_min_timestamp_pairs; - sstable_min_timestamp_pairs.reserve(sstables.size()); - for (auto& sst : sstables) { - sstable_min_timestamp_pairs.emplace_back(sst, sst->get_stats_metadata().min_timestamp); - } - return sstable_min_timestamp_pairs; - } - - /** - * A target time span used for bucketing SSTables based on timestamps. - */ - struct target { - // How big a range of timestamps fit inside the target. - int64_t size; - // A timestamp t hits the target iff t / size == divPosition. - int64_t div_position; - - target() = delete; - target(int64_t size, int64_t div_position) : size(size), div_position(div_position) {} - - /** - * Compares the target to a timestamp. - * @param timestamp the timestamp to compare. - * @return a negative integer, zero, or a positive integer as the target lies before, covering, or after than the timestamp. - */ - int compare_to_timestamp(int64_t timestamp) { - auto ts1 = div_position; - auto ts2 = timestamp / size; - return (ts1 > ts2 ? 1 : (ts1 == ts2 ? 0 : -1)); - } - - /** - * Tells if the timestamp hits the target. - * @param timestamp the timestamp to test. - * @return true iff timestamp / size == divPosition. - */ - bool on_target(int64_t timestamp) { - return compare_to_timestamp(timestamp) == 0; - } - - /** - * Gets the next target, which represents an earlier time span. - * @param base The number of contiguous targets that will have the same size. Targets following those will be base times as big. - * @return - */ - target next_target(int base) - { - if (div_position % base > 0) { - return target(size, div_position - 1); - } else { - return target(size * base, div_position / base - 1); - } - } - }; - - - /** - * Group files with similar min timestamp into buckets. Files with recent min timestamps are grouped together into - * buckets designated to short timespans while files with older timestamps are grouped into buckets representing - * longer timespans. - * @param files pairs consisting of a file and its min timestamp - * @param timeUnit - * @param base - * @param now - * @return a list of buckets of files. The list is ordered such that the files with newest timestamps come first. - * Each bucket is also a list of files ordered from newest to oldest. - */ - std::vector> - get_buckets(std::vector>&& files, api::timestamp_type time_unit, int base, int64_t now) const { - // Sort files by age. Newest first. - std::sort(files.begin(), files.end(), [] (auto& i, auto& j) { - return i.second > j.second; - }); - - std::vector> buckets; - auto target = get_initial_target(now, time_unit); - auto it = files.begin(); - - while (it != files.end()) { - bool finish = false; - while (!target.on_target(it->second)) { - // If the file is too new for the target, skip it. - if (target.compare_to_timestamp(it->second) < 0) { - it++; - if (it == files.end()) { - finish = true; - break; - } - } else { // If the file is too old for the target, switch targets. - target = target.next_target(base); - } - } - if (finish) { - break; - } - - std::vector bucket; - while (target.on_target(it->second)) { - bucket.push_back(it->first); - it++; - if (it == files.end()) { - break; - } - } - buckets.push_back(bucket); - } - - return buckets; - } - - target get_initial_target(uint64_t now, int64_t time_unit) const { - return target(time_unit, now / time_unit); - } - - /** - * @param buckets list of buckets, sorted from newest to oldest, from which to return the newest bucket within thresholds. - * @param minThreshold minimum number of sstables in a bucket to qualify. - * @param maxThreshold maximum number of sstables to compact at once (the returned bucket will be trimmed down to this). - * @return a bucket (list) of sstables to compact. - */ - std::vector - newest_bucket(std::vector>& buckets, int min_threshold, int max_threshold, - int64_t now, api::timestamp_type base_time) { - - // If the "incoming window" has at least minThreshold SSTables, choose that one. - // For any other bucket, at least 2 SSTables is enough. - // In any case, limit to maxThreshold SSTables. - target incoming_window = get_initial_target(now, base_time); - for (auto& bucket : buckets) { - auto min_timestamp = bucket.front()->get_stats_metadata().min_timestamp; - if (bucket.size() >= size_t(min_threshold) || - (bucket.size() >= 2 && !incoming_window.on_target(min_timestamp))) { - trim_to_threshold(bucket, max_threshold); - return bucket; - } - } - return {}; - } - - - /** - * @param bucket list of sstables, ordered from newest to oldest by getMinTimestamp(). - * @param maxThreshold maximum number of sstables in a single compaction task. - * @return A bucket trimmed to the maxThreshold newest sstables. - */ - static void trim_to_threshold(std::vector& bucket, int max_threshold) { - // Trim the oldest sstables off the end to meet the maxThreshold - bucket.resize(std::min(bucket.size(), size_t(max_threshold))); - } -}; diff --git a/scylla/sstables/disk_types.hh b/scylla/sstables/disk_types.hh deleted file mode 100644 index 7bdcaa8..0000000 --- a/scylla/sstables/disk_types.hh +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "bytes.hh" -#include -#include -#include -#include -#include -#include - -namespace sstables { - -// Some in-disk structures have an associated integer (of varying sizes) that -// represents how large they are. They can be a byte-length, in the case of a -// string, number of elements, in the case of an array, etc. -// -// For those elements, we encapsulate the underlying type in an outter -// structure that embeds how large is the in-disk size. It is a lot more -// convenient to embed it in the size than explicitly writing it in the parser. -// This way, we don't need to encode this information in multiple places at -// once - it is already part of the type. -template -struct disk_string { - bytes value; - explicit operator bytes_view() const { - return value; - } -}; - -template -struct disk_string_view { - bytes_view value; -}; - -template -struct disk_array { - static_assert(std::is_integral::value, "Length type must be convertible to integer"); - std::deque elements; -}; - -template -struct disk_hash { - std::unordered_map> map; -}; - -template -struct disk_tagged_union_member { - // stored as: tag, value-size-on-disk, value - using tag_type = TagType; - static constexpr tag_type tag() { return Tag; } - using type = T; - T value; -}; - -template -struct disk_tagged_union { - using variant_type = boost::variant; - variant_type data; -}; - -// Each element of Members... is a disk_tagged_union_member<> -template -struct disk_set_of_tagged_union { - using tag_type = TagType; - using key_type = std::conditional_t::value, std::underlying_type_t, TagType>; - using hash_type = std::conditional_t::value, enum_hash, TagType>; - using value_type = boost::variant; - std::unordered_map data; - - template - T* get() { - // FIXME: static_assert that is a member - auto i = data.find(Tag); - if (i == data.end()) { - return nullptr; - } else { - return &boost::get>(i->second).value; - } - } - template - const T* get() const { - return const_cast(this)->get(); - } - template - void set(T&& value) { - data[Tag] = disk_tagged_union_member{std::forward(value)}; - } - struct serdes; - static struct serdes s_serdes; -}; - -} diff --git a/scylla/sstables/downsampling.hh b/scylla/sstables/downsampling.hh deleted file mode 100644 index cd26504..0000000 --- a/scylla/sstables/downsampling.hh +++ /dev/null @@ -1,205 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/* - * Copyright 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include -#include -#include -#include -#include - -namespace sstables { - -class downsampling { -public: - /** - * The base (down)sampling level determines the granularity at which we can down/upsample. - * - * A higher number allows us to approximate more closely the ideal sampling. (It could also mean we do a lot of - * expensive almost-no-op resamplings from N to N-1, but the thresholds in IndexSummaryManager prevent that.) - * - * BSL must be a power of two in order to have good sampling patterns. This cannot be changed without rebuilding - * all index summaries at full sampling; for now we treat it as a constant. - */ - static constexpr int BASE_SAMPLING_LEVEL = 128; - - static thread_local std::array, BASE_SAMPLING_LEVEL> _sample_pattern_cache; - - static thread_local std::array, BASE_SAMPLING_LEVEL> _original_index_cache; - - /** - * Gets a list L of starting indices for downsampling rounds: the first round should start with the offset - * given by L[0], the second by the offset in L[1], etc. - * - * @param sampling_level the base sampling level - * - * @return A list of `sampling_level` unique indices between 0 and `sampling_level` - */ - static const std::vector& get_sampling_pattern(int sampling_level) { - assert(sampling_level > 0 && sampling_level <= BASE_SAMPLING_LEVEL); - auto& entry = _sample_pattern_cache[sampling_level-1]; - if (!entry.empty()) { - return entry; - } - - if (sampling_level <= 1) { - assert(_sample_pattern_cache[0].empty()); - _sample_pattern_cache[0].push_back(0); - return _sample_pattern_cache[0]; - } - - std::vector odds; - std::vector evens; - odds.resize(sampling_level / 2); - evens.resize(sampling_level / 2); - for (int i = 1; i < sampling_level; i += 2) { - odds[i/2] = i; - } - for (int i = 0; i < sampling_level; i += 2) { - evens[i/2] = i; - } - - // especially for latter rounds, it's important that we spread out the start points, so we'll - // make a recursive call to get an ordering for this list of start points - const std::vector& ordering = get_sampling_pattern(sampling_level/2); - std::vector start_indices; - start_indices.reserve(sampling_level); - - for (auto index : ordering) { - start_indices.push_back(odds[index]); - } - for (auto index : ordering) { - start_indices.push_back(evens[index]); - } - - _sample_pattern_cache[sampling_level-1] = std::move(start_indices); - return _sample_pattern_cache[sampling_level-1]; - } - - /** - * Returns a list that can be used to translate current index summary indexes to their original index before - * downsampling. (This repeats every `sampling_level`, so that's how many entries we return.) - * - * For example, if [7, 15] is returned, the current index summary entry at index 0 was originally - * at index 7, and the current index 1 was originally at index 15. - * - * @param sampling_level the current sampling level for the index summary - * - * @return a list of original indexes for current summary entries - */ - static const std::vector& get_original_indexes(int sampling_level) { - assert(sampling_level > 0 && sampling_level <= BASE_SAMPLING_LEVEL); - auto& entry = _original_index_cache[sampling_level-1]; - if (!entry.empty()) { - return entry; - } - - const std::vector& pattern = get_sampling_pattern(BASE_SAMPLING_LEVEL); - std::vector original_indexes; - - auto pattern_end = pattern.begin() + (BASE_SAMPLING_LEVEL - sampling_level); - for (int j = 0; j < BASE_SAMPLING_LEVEL; j++) { - auto it = std::find(pattern.begin(), pattern_end, j); - if (it == pattern_end) { - // add j to original_indexes if not found in pattern. - original_indexes.push_back(j); - } - } - - _original_index_cache[sampling_level-1] = std::move(original_indexes); - return _original_index_cache[sampling_level-1]; - } - - /** - * Calculates the effective index interval after the entry at `index` in an IndexSummary. In other words, this - * returns the number of partitions in the primary on-disk index before the next partition that has an entry in - * the index summary. If sampling_level == BASE_SAMPLING_LEVEL, this will be equal to the index interval. - * @param index an index into an IndexSummary - * @param sampling_level the current sampling level for that IndexSummary - * @param min_index_interval the min index interval (effective index interval at full sampling) - * @return the number of partitions before the next index summary entry, inclusive on one end - */ - static int get_effective_index_interval_after_index(int index, int sampling_level, int min_index_interval) { - assert(index >= -1); - const std::vector& original_indexes = get_original_indexes(sampling_level); - if (index == -1) { - return original_indexes[0] * min_index_interval; - } - - index %= sampling_level; - if (size_t(index) == original_indexes.size() - 1) { - // account for partitions after the "last" entry as well as partitions before the "first" entry - return ((BASE_SAMPLING_LEVEL - original_indexes[index]) + original_indexes[0]) * min_index_interval; - } else { - return (original_indexes[index + 1] - original_indexes[index]) * min_index_interval; - } - } -#if 0 - public static int[] getStartPoints(int currentSamplingLevel, int newSamplingLevel) - { - List allStartPoints = getSamplingPattern(BASE_SAMPLING_LEVEL); - - // calculate starting indexes for sampling rounds - int initialRound = BASE_SAMPLING_LEVEL - currentSamplingLevel; - int numRounds = Math.abs(currentSamplingLevel - newSamplingLevel); - int[] startPoints = new int[numRounds]; - for (int i = 0; i < numRounds; ++i) - { - int start = allStartPoints.get(initialRound + i); - - // our "ideal" start points will be affected by the removal of items in earlier rounds, so go through all - // earlier rounds, and if we see an index that comes before our ideal start point, decrement the start point - int adjustment = 0; - for (int j = 0; j < initialRound; ++j) - { - if (allStartPoints.get(j) < start) - adjustment++; - } - startPoints[i] = start - adjustment; - } - return startPoints; - } -#endif -}; - -} diff --git a/scylla/sstables/exceptions.hh b/scylla/sstables/exceptions.hh deleted file mode 100644 index 3da97d1..0000000 --- a/scylla/sstables/exceptions.hh +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - * - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once -namespace sstables { -class malformed_sstable_exception : public std::exception { - sstring _msg; -public: - malformed_sstable_exception(sstring msg, sstring filename) - : malformed_sstable_exception{sprint("%s in sstable %s", msg, filename)} - {} - malformed_sstable_exception(sstring s) : _msg(s) {} - const char *what() const noexcept { - return _msg.c_str(); - } -}; - -struct bufsize_mismatch_exception : malformed_sstable_exception { - bufsize_mismatch_exception(size_t size, size_t expected) : - malformed_sstable_exception(sprint("Buffer improperly sized to hold requested data. Got: %ld. Expected: %ld", size, expected)) - {} -}; - -class compaction_stop_exception : public std::exception { - sstring _msg; -public: - compaction_stop_exception(sstring ks, sstring cf, sstring reason) : - _msg(sprint("Compaction for %s/%s was stopped due to %s.", ks, cf, reason)) {} - const char *what() const noexcept { - return _msg.c_str(); - } -}; - -} diff --git a/scylla/sstables/filter.cc b/scylla/sstables/filter.cc deleted file mode 100644 index ca364e7..0000000 --- a/scylla/sstables/filter.cc +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - * - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ -#include "core/future.hh" -#include "core/future-util.hh" -#include "core/shared_ptr.hh" -#include "core/do_with.hh" -#include - -#include "types.hh" -#include "sstables.hh" -#include "utils/bloom_filter.hh" - -#include "disk-error-handler.hh" - -namespace sstables { - -future<> sstable::read_filter(const io_priority_class& pc) { - if (!has_component(sstable::component_type::Filter)) { - _components->filter = std::make_unique(); - return make_ready_future<>(); - } - - return do_with(sstables::filter(), [this, &pc] (auto& filter) { - return this->read_simple(filter, pc).then([this, &filter] { - large_bitset bs(filter.buckets.elements.size() * 64); - bs.load(filter.buckets.elements.begin(), filter.buckets.elements.end()); - _components->filter = utils::filter::create_filter(filter.hashes, std::move(bs)); - }); - }); -} - -void sstable::write_filter(const io_priority_class& pc) { - if (!has_component(sstable::component_type::Filter)) { - return; - } - - auto f = static_cast(_components->filter.get()); - - auto&& bs = f->bits(); - std::deque v(align_up(bs.size(), size_t(64)) / 64); - bs.save(v.begin()); - auto filter = sstables::filter(f->num_hashes(), std::move(v)); - write_simple(filter, pc); -} - -} diff --git a/scylla/sstables/filter.hh b/scylla/sstables/filter.hh deleted file mode 100644 index 22ecc65..0000000 --- a/scylla/sstables/filter.hh +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ -#pragma once - -namespace sstables { -class sstable; -} - -class filter_tracker { - uint64_t false_positive = 0; - uint64_t true_positive = 0; - - uint64_t last_false_positive = 0; - uint64_t last_true_positive = 0; -public: - void add_false_positive() { - false_positive++; - } - - void add_true_positive() { - true_positive++; - } - - friend class sstables::sstable; -}; diff --git a/scylla/sstables/hyperloglog.hh b/scylla/sstables/hyperloglog.hh deleted file mode 100644 index c3ad93d..0000000 --- a/scylla/sstables/hyperloglog.hh +++ /dev/null @@ -1,344 +0,0 @@ -/* - * Copyright (c) 2013 Hideaki Ohno - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the 'Software'), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so. - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -/* - * Copyright (C) 2011 Clearspring Technologies, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -#pragma once - -/** - * @file hyperloglog.hpp - * @brief HyperLogLog cardinality estimator - * @date Created 2013/3/20 - * @author Hideaki Ohno - */ - -#include -#include -#include -#include -#include -#include -#if 0 -#include "murmur3.h" -#endif - -#define HLL_HASH_SEED 313 - -namespace hll { - -static const double pow_2_32 = 4294967296.0; ///< 2^32 -static const double neg_pow_2_32 = -4294967296.0; ///< -(2^32) - - -static inline size_t size_unsigned_var_int(unsigned int value) { - size_t size = 0; - while ((value & 0xFFFFFF80) != 0L) { - size++; - value >>= 7; - } - size++; - return size; -} - -static inline size_t write_unsigned_var_int(unsigned int value, uint8_t* to) { - size_t size = 0; - while ((value & 0xFFFFFF80) != 0L) { - *to = (value & 0x7F) | 0x80; - value >>= 7; - to++; - size++; - } - *to = value & 0x7F; - size++; - return size; -} - -/** @class HyperLogLog - * @brief Implement of 'HyperLogLog' estimate cardinality algorithm - */ -class HyperLogLog { -public: - - /** - * Constructor - * - * @param[in] b bit width (register size will be 2 to the b power). - * This value must be in the range[4,16].Default value is 4. - * - * @exception std::invalid_argument the argument is out of range. - */ - HyperLogLog(uint8_t b = 4) : - b_(b), m_(1 << b), M_(m_, 0) { - - if (b < 4 || 16 < b) { - throw std::invalid_argument("bit width must be in the range [4,16]"); - } - - double alpha; - switch (m_) { - case 16: - alpha = 0.673; - break; - case 32: - alpha = 0.697; - break; - case 64: - alpha = 0.709; - break; - default: - alpha = 0.7213 / (1.0 + 1.079 / m_); - break; - } - alphaMM_ = alpha * m_ * m_; - } - - static HyperLogLog from_bytes(temporary_buffer bytes) { - // FIXME: implement class that creates a HyperLogLog from an array of bytes. - // This will useful if we need to work with the cardinality data from the - // compaction metadata. - abort(); - } - - /** - * Adds element to the estimator - * - * @param[in] str string to add - * @param[in] len length of string - */ -#if 0 - void add(const char* str, uint32_t len) { - uint32_t hash; - MurmurHash3_x86_32(str, len, HLL_HASH_SEED, (void*) &hash); - uint32_t index = hash >> (32 - b_); - uint8_t rank = rho((hash << b_), 32 - b_); - if (rank > M_[index]) { - M_[index] = rank; - } - } -#endif - void offer_hashed(uint64_t hash) { - uint32_t index = hash >> (64 - b_); - uint8_t rank = rho((hash << b_), 64 - b_); - - if (rank > M_[index]) { - M_[index] = rank; - } - } - - /* - * Calculate the size of buffer returned by get_bytes(). - */ - size_t get_bytes_size() { - size_t size = 0; - size += sizeof(int); // version - size += size_unsigned_var_int(b_); // p; register width = b_. - size += size_unsigned_var_int(0); // sp; // sparse set = 0. - size += size_unsigned_var_int(0); // type; - size += size_unsigned_var_int(M_.size()); // register size; - size += M_.size(); - return size; - } - - temporary_buffer get_bytes() { - // FIXME: add support to SPARSE format. - static constexpr int version = 2; - - size_t s = get_bytes_size(); - temporary_buffer bytes(s); - size_t offset = 0; - // write version - write_be(reinterpret_cast(bytes.get_write() + offset), -version); - offset += sizeof(int); - - // write register width - offset += write_unsigned_var_int(b_, bytes.get_write() + offset); - // NOTE: write precision value for sparse set (not supported). - offset += write_unsigned_var_int(0, bytes.get_write() + offset); - // write type (NORMAL always!) - offset += write_unsigned_var_int(0, bytes.get_write() + offset); - // write register size - offset += write_unsigned_var_int(M_.size(), bytes.get_write() + offset); - // write register - memcpy(bytes.get_write() + offset, M_.data(), M_.size()); - offset += M_.size(); - - bytes.trim(offset); - if (s != offset) { - throw std::runtime_error("possible overflow while generating cardinality metadata"); - } - return bytes; - } - - /** - * Estimates cardinality value. - * - * @return Estimated cardinality value. - */ - double estimate() const { - double estimate; - double sum = 0.0; - for (uint32_t i = 0; i < m_; i++) { - sum += 1.0 / pow(2.0, M_[i]); - } - estimate = alphaMM_ / sum; // E in the original paper - if (estimate <= 2.5 * m_) { - uint32_t zeros = 0; - for (uint32_t i = 0; i < m_; i++) { - if (M_[i] == 0) { - zeros++; - } - } - if (zeros != 0) { - estimate = m_ * log(static_cast(m_)/ zeros); - } - } else if (estimate > (1.0 / 30.0) * pow_2_32) { - estimate = neg_pow_2_32 * log(1.0 - (estimate / pow_2_32)); - } - return estimate; - } - - /** - * Merges the estimate from 'other' into this object, returning the estimate of their union. - * The number of registers in each must be the same. - * - * @param[in] other HyperLogLog instance to be merged - * - * @exception std::invalid_argument number of registers doesn't match. - */ - void merge(const HyperLogLog& other) { - if (m_ != other.m_) { - std::stringstream ss; - ss << "number of registers doesn't match: " << m_ << " != " << other.m_; - throw std::invalid_argument(ss.str().c_str()); - } - for (uint32_t r = 0; r < m_; ++r) { - if (M_[r] < other.M_[r]) { - M_[r] = other.M_[r]; - } - } - } - - /** - * Clears all internal registers. - */ - void clear() { - std::fill(M_.begin(), M_.end(), 0); - } - - /** - * Returns size of register. - * - * @return Register size - */ - uint32_t registerSize() const { - return m_; - } - - /** - * Exchanges the content of the instance - * - * @param[in,out] rhs Another HyperLogLog instance - */ - void swap(HyperLogLog& rhs) { - std::swap(b_, rhs.b_); - std::swap(m_, rhs.m_); - std::swap(alphaMM_, rhs.alphaMM_); - M_.swap(rhs.M_); - } - - /** - * Dump the current status to a stream - * - * @param[out] os The output stream where the data is saved - * - * @exception std::runtime_error When failed to dump. - */ - void dump(std::ostream& os) const { - os.write((char*)&b_, sizeof(b_)); - os.write((char*)&M_[0], sizeof(M_[0]) * M_.size()); - if(os.fail()){ - throw std::runtime_error("Failed to dump"); - } - } - - /** - * Restore the status from a stream - * - * @param[in] is The input stream where the status is saved - * - * @exception std::runtime_error When failed to restore. - */ - void restore(std::istream& is) { - uint8_t b = 0; - is.read((char*)&b, sizeof(b)); - HyperLogLog tempHLL(b); - is.read((char*)&(tempHLL.M_[0]), sizeof(M_[0]) * tempHLL.m_); - if(is.fail()){ - throw std::runtime_error("Failed to restore"); - } - swap(tempHLL); - } - -private: - uint8_t b_; ///< register bit width - uint32_t m_; ///< register size - double alphaMM_; ///< alpha * m^2 - std::vector M_; ///< registers - - uint8_t rho(uint32_t x, uint8_t b) { - uint8_t v = 1; - while (v <= b && !(x & 0x80000000)) { - v++; - x <<= 1; - } - return v; - } - -}; - -} // namespace hll diff --git a/scylla/sstables/index_reader.hh b/scylla/sstables/index_reader.hh deleted file mode 100644 index d3287b6..0000000 --- a/scylla/sstables/index_reader.hh +++ /dev/null @@ -1,633 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once -#include "sstables.hh" -#include "consumer.hh" -#include "downsampling.hh" -#include "sstables/shared_index_lists.hh" - -namespace sstables { - -class index_consumer { - uint64_t max_quantity; -public: - index_list indexes; - - index_consumer(uint64_t q) : max_quantity(q) { - indexes.reserve(q); - } - - bool should_continue() { - return indexes.size() < max_quantity; - } - void consume_entry(index_entry&& ie, uint64_t offset) { - indexes.push_back(std::move(ie)); - } - void reset() { - indexes.clear(); - } -}; - -// IndexConsumer is a concept that implements: -// -// bool should_continue(); -// void consume_entry(index_entry&& ie, uintt64_t offset); -template -class index_consume_entry_context: public data_consumer::continuous_data_consumer> { - using proceed = data_consumer::proceed; - using continuous_data_consumer = data_consumer::continuous_data_consumer>; -private: - IndexConsumer& _consumer; - uint64_t _entry_offset; - - enum class state { - START, - KEY_SIZE, - KEY_BYTES, - POSITION, - PROMOTED_SIZE, - PROMOTED_BYTES, - CONSUME_ENTRY, - } _state = state::START; - - temporary_buffer _key; - temporary_buffer _promoted; - -public: - void verify_end_state() { - } - - bool non_consuming() const { - return ((_state == state::CONSUME_ENTRY) || (_state == state::START) || - ((_state == state::PROMOTED_BYTES) && (continuous_data_consumer::_prestate == continuous_data_consumer::prestate::NONE))); - } - - proceed process_state(temporary_buffer& data) { - switch (_state) { - // START comes first, to make the handling of the 0-quantity case simpler - case state::START: - if (!_consumer.should_continue()) { - return proceed::no; - } - _state = state::KEY_SIZE; - break; - case state::KEY_SIZE: - if (this->read_16(data) != continuous_data_consumer::read_status::ready) { - _state = state::KEY_BYTES; - break; - } - case state::KEY_BYTES: - if (this->read_bytes(data, this->_u16, _key) != continuous_data_consumer::read_status::ready) { - _state = state::POSITION; - break; - } - case state::POSITION: - if (this->read_64(data) != continuous_data_consumer::read_status::ready) { - _state = state::PROMOTED_SIZE; - break; - } - case state::PROMOTED_SIZE: - if (this->read_32(data) != continuous_data_consumer::read_status::ready) { - _state = state::PROMOTED_BYTES; - break; - } - case state::PROMOTED_BYTES: - if (this->read_bytes(data, this->_u32, _promoted) != continuous_data_consumer::read_status::ready) { - _state = state::CONSUME_ENTRY; - break; - } - case state::CONSUME_ENTRY: { - auto len = (_key.size() + _promoted.size() + 14); - _consumer.consume_entry(index_entry(std::move(_key), this->_u64, std::move(_promoted)), _entry_offset); - _entry_offset += len; - _state = state::START; - } - break; - default: - throw malformed_sstable_exception("unknown state"); - } - return proceed::yes; - } - - index_consume_entry_context(IndexConsumer& consumer, - input_stream&& input, uint64_t start, uint64_t maxlen) - : continuous_data_consumer(std::move(input), start, maxlen) - , _consumer(consumer), _entry_offset(start) - {} - - void reset(uint64_t offset) { - _state = state::START; - _entry_offset = offset; - _consumer.reset(); - } -}; - -// Less-comparator for lookups in the partition index. -class index_comparator { - dht::ring_position_comparator _tri_cmp; -public: - index_comparator(const schema& s) : _tri_cmp(s) {} - - bool operator()(const summary_entry& e, dht::ring_position_view rp) const { - return _tri_cmp(e.get_decorated_key(), rp) < 0; - } - - bool operator()(const index_entry& e, dht::ring_position_view rp) const { - return _tri_cmp(e.get_decorated_key(), rp) < 0; - } - - bool operator()(dht::ring_position_view rp, const summary_entry& e) const { - return _tri_cmp(e.get_decorated_key(), rp) > 0; - } - - bool operator()(dht::ring_position_view rp, const index_entry& e) const { - return _tri_cmp(e.get_decorated_key(), rp) > 0; - } -}; - -// Provides access to sstable indexes. -// -// Maintains logical cursor to sstable elements (partitions, cells). -// Initially the cursor is positioned on the first partition in the sstable. -// The cursor can be advanced forward using advance_to(). -// -// If eof() then the cursor is positioned past all partitions in the sstable. -class index_reader { - shared_sstable _sstable; - shared_index_lists::list_ptr _current_list; - - // We keep two pages alive so that when we have two index readers where - // one is catching up with the other, each page will be read only once. - // There is a case when a single advance_to() may need to read two pages. - shared_index_lists::list_ptr _prev_list; - - const io_priority_class& _pc; - - struct reader { - index_consumer _consumer; - index_consume_entry_context _context; - - static auto create_file_input_stream(shared_sstable sst, const io_priority_class& pc, uint64_t begin, uint64_t end) { - file_input_stream_options options; - options.buffer_size = sst->sstable_buffer_size; - options.read_ahead = 2; - options.io_priority_class = pc; - return make_file_input_stream(sst->_index_file, begin, end - begin, std::move(options)); - } - - reader(shared_sstable sst, const io_priority_class& pc, uint64_t begin, uint64_t end, uint64_t quantity) - : _consumer(quantity) - , _context(_consumer, create_file_input_stream(sst, pc, begin, end), begin, end - begin) - { } - }; - - stdx::optional _reader; - - uint64_t _previous_summary_idx = 0; - uint64_t _current_summary_idx = 0; - uint64_t _current_index_idx = 0; - uint64_t _current_pi_idx = 0; // Points to upper bound of the cursor. - uint64_t _data_file_position = 0; - indexable_element _element = indexable_element::partition; -private: - future<> advance_to_end() { - sstlog.trace("index {}: advance_to_end()", this); - _data_file_position = data_file_end(); - _element = indexable_element::partition; - _prev_list = std::move(_current_list); - return close_reader().finally([this] { - _reader = stdx::nullopt; - }); - } - - // Must be called for non-decreasing summary_idx. - future<> advance_to_page(uint64_t summary_idx) { - sstlog.trace("index {}: advance_to_page({})", this, summary_idx); - assert(!_current_list || _current_summary_idx <= summary_idx); - if (_current_list && _current_summary_idx == summary_idx) { - sstlog.trace("index {}: same page", this); - return make_ready_future<>(); - } - - auto& summary = _sstable->get_summary(); - if (summary_idx >= summary.header.size) { - sstlog.trace("index {}: eof", this); - return advance_to_end(); - } - - auto loader = [this] (uint64_t summary_idx) -> future { - auto& summary = _sstable->get_summary(); - uint64_t position = summary.entries[summary_idx].position; - uint64_t quantity = downsampling::get_effective_index_interval_after_index(summary_idx, summary.header.sampling_level, - summary.header.min_index_interval); - - uint64_t end; - if (summary_idx + 1 >= summary.header.size) { - end = _sstable->index_size(); - } else { - end = summary.entries[summary_idx + 1].position; - } - - return close_reader().then_wrapped([this, position, end, quantity, summary_idx] (auto&& f) { - try { - f.get(); - _reader.emplace(_sstable, _pc, position, end, quantity); - } catch (...) { - _reader = stdx::nullopt; - throw; - } - return _reader->_context.consume_input(_reader->_context).then([this] { - return std::move(_reader->_consumer.indexes); - }); - }); - }; - - return _sstable->_index_lists.get_or_load(summary_idx, loader).then([this, summary_idx] (shared_index_lists::list_ptr ref) { - _prev_list = std::move(_current_list); - _current_list = std::move(ref); - _current_summary_idx = summary_idx; - _current_index_idx = 0; - _current_pi_idx = 0; - assert(!_current_list->empty()); - _data_file_position = (*_current_list)[0].position(); - _element = indexable_element::partition; - - if (sstlog.is_enabled(seastar::log_level::trace)) { - sstlog.trace("index {}: page:", this); - for (const index_entry& e : *_current_list) { - auto dk = dht::global_partitioner().decorate_key(*_sstable->_schema, - e.get_key().to_partition_key(*_sstable->_schema)); - sstlog.trace(" {} -> {}", dk, e.position()); - } - } - }); - } -public: - future<> advance_to_start(const dht::partition_range& range) { - if (range.start()) { - return advance_to(dht::ring_position_view(range.start()->value(), - dht::ring_position_view::after_key(!range.start()->is_inclusive()))); - } - return make_ready_future<>(); - } - - future<> advance_to_end(const dht::partition_range& range) { - if (range.end()) { - return advance_to(dht::ring_position_view(range.end()->value(), - dht::ring_position_view::after_key(range.end()->is_inclusive()))); - } - return advance_to_end(); - } -public: - index_reader(shared_sstable sst, const io_priority_class& pc) - : _sstable(std::move(sst)) - , _pc(pc) - { - sstlog.trace("index {}: index_reader for {}", this, _sstable->get_filename()); - } - - index_reader(const index_reader& r) - : _sstable(r._sstable) - , _current_list(r._current_list) - , _prev_list(r._prev_list) - , _pc(r._pc) - , _previous_summary_idx(r._previous_summary_idx) - , _current_summary_idx(r._current_summary_idx) - , _current_index_idx(r._current_index_idx) - , _current_pi_idx(r._current_pi_idx) - , _data_file_position(r._data_file_position) - , _element(r._element) - { - sstlog.trace("index {}: index_reader for {}", this, _sstable->get_filename()); - } - - // Cannot be used twice on the same summary_idx and together with advance_to(). - [[deprecated]] - future get_index_entries(uint64_t summary_idx) { - return advance_to_page(summary_idx).then([this] { - return _current_list ? _current_list.release() : index_list(); - }); - } - - // Valid if partition_data_ready() - index_entry& current_partition_entry() { - assert(_current_list); - return (*_current_list)[_current_index_idx]; - } -public: - // Returns tombstone for current partition, if it was recorded in the sstable. - // It may be unavailable for old sstables for which this information was not generated. - // Can be called only when partition_data_ready(). - stdx::optional partition_tombstone() { - index_entry& e = current_partition_entry(); - auto pi = e.get_promoted_index_view(); - if (!pi) { - return stdx::nullopt; - } - return pi.get_deletion_time(); - } - - // Returns the key for current partition. - // Can be called only when partition_data_ready(). - // The result is valid as long as index_reader is valid. - key_view partition_key() { - index_entry& e = current_partition_entry(); - return e.get_key(); - } - - // Tells whether details about current partition can be accessed. - // If this returns false, you have to call read_partition_data(). - // - // Calling read_partition_data() may involve doing I/O. The reason - // why control over this is exposed and not done under the hood is that - // in some cases it only makes sense to access partition details from index - // if it is readily available, and if it is not, we're better off obtaining - // them by continuing reading from sstable. - bool partition_data_ready() const { - return static_cast(_current_list); - } - - // Ensures that partition_data_ready() returns true. - // Can be called only when !eof() - future<> read_partition_data() { - assert(!eof()); - if (partition_data_ready()) { - return make_ready_future<>(); - } - // The only case when _current_list may be missing is when the cursor is at the beginning - assert(_current_summary_idx == 0); - return advance_to_page(0); - } - - // Forwards the cursor to given position in current partition. - // - // Note that the index within partition, unlike the partition index, doesn't cover all keys. - // So this may forward the cursor to some position pos' which precedes pos, even though - // there exist rows with positions in the range [pos', pos]. - // - // Must be called for non-decreasing positions. - // Must be called only after advanced to some partition and !eof(). - future<> advance_to(position_in_partition_view pos) { - sstlog.trace("index {}: advance_to({}), current data_file_pos={}", this, pos, _data_file_position); - - if (!partition_data_ready()) { - return read_partition_data().then([this, pos] { - sstlog.trace("index {}: page done", this); - assert(partition_data_ready()); - return advance_to(pos); - }); - } - - const schema& s = *_sstable->_schema; - index_entry& e = current_partition_entry(); - promoted_index* pi = nullptr; - try { - pi = e.get_promoted_index(s); - } catch (...) { - sstlog.error("Failed to get promoted index for sstable {}, page {}, index {}: {}", _sstable->get_filename(), - _current_summary_idx, _current_index_idx, std::current_exception()); - } - if (!pi) { - sstlog.trace("index {}: no promoted index", this); - return make_ready_future<>(); - } - - if (sstlog.is_enabled(seastar::log_level::trace)) { - sstlog.trace("index {}: promoted index:", this); - for (auto&& e : pi->entries) { - sstlog.trace(" {}-{}: +{} len={}", e.start, e.end, e.offset, e.width); - } - } - - auto cmp_with_start = [pos_cmp = position_in_partition::composite_less_compare(s)] - (position_in_partition_view pos, const promoted_index::entry& e) -> bool { - return pos_cmp(pos, e.start); - }; - - // Optimize short skips which typically land in the same block - if (_current_pi_idx >= pi->entries.size() || cmp_with_start(pos, pi->entries[_current_pi_idx])) { - sstlog.trace("index {}: position in current block", this); - return make_ready_future<>(); - } - - auto i = std::upper_bound(pi->entries.begin() + _current_pi_idx, pi->entries.end(), pos, cmp_with_start); - _current_pi_idx = std::distance(pi->entries.begin(), i); - if (i != pi->entries.begin()) { - --i; - } - _data_file_position = e.position() + i->offset; - _element = indexable_element::cell; - sstlog.trace("index {}: skipped to cell, _current_pi_idx={}, _data_file_position={}", this, _current_pi_idx, _data_file_position); - return make_ready_future<>(); - } - - // Forwards the cursor to a position which is greater than given position in current partition. - // - // Note that the index within partition, unlike the partition index, doesn't cover all keys. - // So this may not forward to the smallest position which is greater than pos. - // - // May advance to the next partition if it's not possible to find a suitable position inside - // current partition. - // - // Must be called only when !eof(). - future<> advance_past(position_in_partition_view pos) { - sstlog.trace("index {}: advance_past({}), current data_file_pos={}", this, pos, _data_file_position); - - if (!partition_data_ready()) { - return read_partition_data().then([this, pos] { - assert(partition_data_ready()); - return advance_past(pos); - }); - } - - const schema& s = *_sstable->_schema; - index_entry& e = current_partition_entry(); - promoted_index* pi = nullptr; - try { - pi = e.get_promoted_index(s); - } catch (...) { - sstlog.error("Failed to get promoted index for sstable {}, page {}, index {}: {}", _sstable->get_filename(), - _current_summary_idx, _current_index_idx, std::current_exception()); - } - if (!pi || pi->entries.empty()) { - sstlog.trace("index {}: no promoted index", this); - return advance_to_next_partition(); - } - - auto cmp_with_start = [pos_cmp = position_in_partition::composite_less_compare(s)] - (position_in_partition_view pos, const promoted_index::entry& e) -> bool { - return pos_cmp(pos, e.start); - }; - - auto i = std::upper_bound(pi->entries.begin() + _current_pi_idx, pi->entries.end(), pos, cmp_with_start); - _current_pi_idx = std::distance(pi->entries.begin(), i); - if (i == pi->entries.end()) { - return advance_to_next_partition(); - } - - _data_file_position = e.position() + i->offset; - _element = indexable_element::cell; - sstlog.trace("index {}: skipped to cell, _current_pi_idx={}, _data_file_position={}", this, _current_pi_idx, _data_file_position); - return make_ready_future<>(); - } - - // Like advance_to(dht::ring_position_view), but returns information whether the key was found - future advance_and_check_if_present(dht::ring_position_view key) { - return advance_to(key).then([this, key] { - if (eof()) { - return make_ready_future(false); - } - return read_partition_data().then([this, key] { - index_comparator cmp(*_sstable->_schema); - return cmp(key, current_partition_entry()) == 0; - }); - }); - } - - // Moves the cursor to the beginning of next partition. - // Can be called only when !eof(). - future<> advance_to_next_partition() { - sstlog.trace("index {}: advance_to_next_partition()", this); - if (!_current_list) { - return advance_to_page(0).then([this] { - return advance_to_next_partition(); - }); - } - if (_current_index_idx + 1 < _current_list->size()) { - ++_current_index_idx; - _data_file_position = (*_current_list)[_current_index_idx].position(); - _element = indexable_element::partition; - return make_ready_future<>(); - } - auto& summary = _sstable->get_summary(); - if (_current_summary_idx + 1 < summary.header.size) { - return advance_to_page(_current_summary_idx + 1); - } - return advance_to_end(); - } - - // Positions the cursor on the first partition which is not smaller than pos (like std::lower_bound). - // Must be called for non-decreasing positions. - future<> advance_to(dht::ring_position_view pos) { - sstlog.trace("index {}: advance_to({}), _previous_summary_idx={}, _current_summary_idx={}", this, pos, _previous_summary_idx, _current_summary_idx); - - if (pos.is_min()) { - sstlog.trace("index {}: first entry", this); - return make_ready_future<>(); - } else if (pos.is_max()) { - return advance_to_end(); - } - - auto& summary = _sstable->get_summary(); - _previous_summary_idx = std::distance(std::begin(summary.entries), - std::lower_bound(summary.entries.begin() + _previous_summary_idx, summary.entries.end(), pos, index_comparator(*_sstable->_schema))); - - if (_previous_summary_idx == 0) { - sstlog.trace("index {}: first entry", this); - return make_ready_future<>(); - } - - auto summary_idx = _previous_summary_idx - 1; - - sstlog.trace("index {}: summary_idx={}", this, summary_idx); - - // Despite the requirement that the values of 'pos' in subsequent calls - // are increasing we still may encounter a situation when we try to read - // the previous bucket. - // For example, let's say we have index like this: - // summary: A K ... - // index: A C D F K M N O ... - // Now, we want to get positions for range [G, J]. We start with [G, - // summary look up will tel us to check the first bucket. However, there - // is no G in that bucket so we read the following one to get the - // position (see the advance_to_page() call below). After we've got it, it's time to - // get J] position. Again, summary points us to the first bucket and we - // hit an assert since the reader is already at the second bucket and we - // cannot go backward. - // The solution is this condition above. If our lookup requires reading - // the previous bucket we assume that the entry doesn't exist and return - // the position of the first one in the current index bucket. - if (summary_idx + 1 == _current_summary_idx) { - return make_ready_future<>(); - } - - return advance_to_page(summary_idx).then([this, pos, summary_idx] { - index_list& il = *_current_list; - sstlog.trace("index {}: old page index = {}", this, _current_index_idx); - auto i = std::lower_bound(il.begin() + _current_index_idx, il.end(), pos, index_comparator(*_sstable->_schema)); - if (i == il.end()) { - sstlog.trace("index {}: not found", this); - return advance_to_page(summary_idx + 1); - } - _current_index_idx = std::distance(il.begin(), i); - _current_pi_idx = 0; - _data_file_position = i->position(); - _element = indexable_element::partition; - sstlog.trace("index {}: new page index = {}, pos={}", this, _current_index_idx, _data_file_position); - return make_ready_future<>(); - }); - } - - // Returns position in the data file of the cursor. - // Returns non-decreasing positions. - // When eof(), returns data_file_end(). - uint64_t data_file_position() const { - return _data_file_position; - } - - // Returns the kind of sstable element the cursor is pointing at. - indexable_element element_kind() const { - return _element; - } - - // Returns position right after all partitions in the sstable - uint64_t data_file_end() const { - return _sstable->data_size(); - } - - bool eof() const { - return _data_file_position == data_file_end(); - } -private: - future<> close_reader() { - if (_reader) { - return _reader->_context.close(); - } - return make_ready_future<>(); - } -public: - future get_disk_read_range(const dht::partition_range& range) { - return advance_to_start(range).then([this, &range] () { - uint64_t start = data_file_position(); - return advance_to_end(range).then([this, &range, start] () { - uint64_t end = data_file_position(); - return sstable::disk_read_range(start, end); - }); - }); - } - - future<> close() { - return close_reader(); - } -}; - -} diff --git a/scylla/sstables/key.hh b/scylla/sstables/key.hh deleted file mode 100644 index fb5f83e..0000000 --- a/scylla/sstables/key.hh +++ /dev/null @@ -1,162 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once -#include "bytes.hh" -#include "schema.hh" -#include "core/future.hh" -#include "database_fwd.hh" -#include "keys.hh" -#include "compound_compat.hh" -#include "dht/i_partitioner.hh" - -namespace sstables { - -class key_view { - bytes_view _bytes; -public: - explicit key_view(bytes_view b) : _bytes(b) {} - key_view() : _bytes() {} - - std::vector explode(const schema& s) const { - return composite_view(_bytes, s.partition_key_size() > 1).explode(); - } - - partition_key to_partition_key(const schema& s) const { - return partition_key::from_exploded_view(explode(s)); - } - - bool operator==(const key_view& k) const { return k._bytes == _bytes; } - bool operator!=(const key_view& k) const { return !(k == *this); } - - bool empty() { return _bytes.empty(); } - - explicit operator bytes_view() const { - return _bytes; - } - - int tri_compare(key_view other) const { - return compare_unsigned(_bytes, other._bytes); - } - - int tri_compare(const schema& s, partition_key_view other) const { - auto lf = other.legacy_form(s); - return lexicographical_tri_compare( - _bytes.begin(), _bytes.end(), lf.begin(), lf.end(), - [] (uint8_t b1, uint8_t b2) { return (int)b1 - b2; }); - } -}; - -// Our internal representation differs slightly (in the way it serializes) from Origin. -// In order to be able to achieve read and write compatibility for sstables - so they can -// be imported and exported - we need to always convert a key to this representation. -class key { -public: - enum class kind { - before_all_keys, - regular, - after_all_keys, - }; -private: - kind _kind; - bytes _bytes; - - static bool is_compound(const schema& s) { - return s.partition_key_size() > 1; - } -public: - key(bytes&& b) : _kind(kind::regular), _bytes(std::move(b)) {} - key(kind k) : _kind(k) {} - static key from_bytes(bytes b) { - return key(std::move(b)); - } - template - static key make_key(const schema& s, RangeOfSerializedComponents&& values) { - return key(composite::serialize_value(std::forward(values), is_compound(s)).release_bytes()); - } - static key from_deeply_exploded(const schema& s, const std::vector& v) { - return make_key(s, v); - } - static key from_exploded(const schema& s, std::vector& v) { - return make_key(s, v); - } - static key from_exploded(const schema& s, std::vector&& v) { - return make_key(s, std::move(v)); - } - // Unfortunately, the _bytes field for the partition_key are not public. We can't move. - static key from_partition_key(const schema& s, partition_key_view pk) { - return make_key(s, pk); - } - partition_key to_partition_key(const schema& s) const { - return partition_key::from_exploded_view(explode(s)); - } - - std::vector explode(const schema& s) const { - return composite_view(_bytes, is_compound(s)).explode(); - } - - int32_t tri_compare(key_view k) const { - if (_kind == kind::before_all_keys) { - return -1; - } - if (_kind == kind::after_all_keys) { - return 1; - } - return key_view(_bytes).tri_compare(k); - } - operator key_view() const { - return key_view(_bytes); - } - explicit operator bytes_view() const { - return _bytes; - } - const bytes& get_bytes() const { - return _bytes; - } - friend key minimum_key(); - friend key maximum_key(); -}; - -inline key minimum_key() { - return key(key::kind::before_all_keys); -}; - -inline key maximum_key() { - return key(key::kind::after_all_keys); -}; - -class decorated_key_view { - const dht::token& _token; - key_view _partition_key; -public: - decorated_key_view(const dht::token& token, key_view partition_key) noexcept - : _token(token), _partition_key(partition_key) { } - - const dht::token& token() const { - return _token; - } - - key_view key() const { - return _partition_key; - } -}; - -} diff --git a/scylla/sstables/leveled_manifest.hh b/scylla/sstables/leveled_manifest.hh deleted file mode 100644 index 279ec1a..0000000 --- a/scylla/sstables/leveled_manifest.hh +++ /dev/null @@ -1,699 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "sstables.hh" -#include "compaction.hh" -#include "range.hh" -#include "log.hh" - -class leveled_manifest { - static logging::logger logger; - - schema_ptr _schema; - std::vector> _generations; - uint64_t _max_sstable_size_in_bytes; -#if 0 - private final SizeTieredCompactionStrategyOptions options; -#endif - - struct candidates_info { - std::vector candidates; - bool can_promote = true; - }; -public: - /** - * limit the number of L0 sstables we do at once, because compaction bloom filter creation - * uses a pessimistic estimate of how many keys overlap (none), so we risk wasting memory - * or even OOMing when compacting highly overlapping sstables - */ - static constexpr int MAX_COMPACTING_L0 = 32; - /** - * If we go this many rounds without compacting - * in the highest level, we start bringing in sstables from - * that level into lower level compactions - */ - static constexpr int NO_COMPACTION_LIMIT = 25; - - static constexpr int MAX_LEVELS = 9; // log10(1000^3); - - leveled_manifest(column_family& cfs, int max_sstable_size_in_MB) - : _schema(cfs.schema()) - , _max_sstable_size_in_bytes(max_sstable_size_in_MB * 1024 * 1024) - { - // allocate enough generations for a PB of data, with a 1-MB sstable size. (Note that if maxSSTableSize is - // updated, we will still have sstables of the older, potentially smaller size. So don't make this - // dependent on maxSSTableSize.) - _generations.resize(MAX_LEVELS); - } - - static leveled_manifest create(column_family& cfs, std::vector& sstables, int max_sstable_size_in_mb) { - leveled_manifest manifest = leveled_manifest(cfs, max_sstable_size_in_mb); - - // ensure all SSTables are in the manifest - for (auto& sstable : sstables) { - // unconditionally add a sstable to a list of its level. - manifest.add(sstable); - } - - return manifest; - } - - void add(sstables::shared_sstable& sstable) { - uint32_t level = sstable->get_sstable_level(); - - if (level >= _generations.size()) { - throw std::runtime_error(sprint("Invalid level %u out of %ld", level, (_generations.size() - 1))); - } - logger.debug("Adding {} to L{}", sstable->get_filename(), level); - _generations[level].push_back(sstable); - } - - // Return first set of overlapping sstables for a given level. - // Assumes _generations[level] is already sorted by first key. - std::vector overlapping_sstables(int level) { - const schema& s = *_schema; - std::unordered_set result; - stdx::optional previous; - stdx::optional last; // keeps track of highest last key in result. - - for (auto& current : _generations[level]) { - auto current_first = current->get_first_decorated_key(); - auto current_last = current->get_last_decorated_key(); - - if (previous && current_first.tri_compare(s, (*previous)->get_last_decorated_key()) <= 0) { - result.insert(*previous); - result.insert(current); - } else if (last && current_first.tri_compare(s, *last) <= 0) { - // current may also overlap on some sstable other than the previous one, if there's - // a large token span sstable that comes previously. - result.insert(current); - } else if (!result.empty()) { - // first overlapping set is returned when current doesn't overlap with it - break; - } - - if (!last || current_last.tri_compare(s, *last) > 0) { - last = std::move(current_last); - } - previous = current; - } - return std::vector(result.begin(), result.end()); - } - - /** - * Checks if adding the sstable creates an overlap in the level - * @param sstable the sstable to add - * @return true if it is safe to add the sstable in the level. - */ - bool can_add_sstable(sstables::shared_sstable& sstable) { - uint32_t level = sstable->get_sstable_level(); - const schema& s = *_schema; - - if (level == 0) { - return true; - } - - auto copy_level = _generations[level]; - copy_level.push_back(sstable); - copy_level.sort([&s] (auto& i, auto& j) { - return i->compare_by_first_key(*j) < 0; - }); - - const sstables::sstable *previous = nullptr; - for (auto& current : copy_level) { - if (previous != nullptr) { - auto current_first = current->get_first_decorated_key(); - auto previous_last = previous->get_last_decorated_key(); - - if (current_first.tri_compare(s, previous_last) <= 0) { - return false; - } - } - previous = &*current; - } - - return true; - } - - void send_back_to_L0(sstables::shared_sstable& sstable) { - remove(sstable); - _generations[0].push_back(sstable); - sstable->set_sstable_level(0); - } - -#if 0 - private String toString(Collection sstables) - { - StringBuilder builder = new StringBuilder(); - for (SSTableReader sstable : sstables) - { - builder.append(sstable.descriptor.cfname) - .append('-') - .append(sstable.descriptor.generation) - .append("(L") - .append(sstable.getSSTableLevel()) - .append("), "); - } - return builder.toString(); - } -#endif - - static uint64_t max_bytes_for_level(int level, uint64_t max_sstable_size_in_bytes) { - if (level == 0) { - return 4L * max_sstable_size_in_bytes; - } - double bytes = pow(10, level) * max_sstable_size_in_bytes; - if (bytes > std::numeric_limits::max()) { - throw std::runtime_error(sprint("At most %ld bytes may be in a compaction level; your maxSSTableSize must be absurdly high to compute %f", - std::numeric_limits::max(), bytes)); - } - uint64_t bytes_u64 = bytes; - return bytes_u64; - } - - uint64_t max_bytes_for_level(int level) { - return max_bytes_for_level(level, _max_sstable_size_in_bytes); - } - - /** - * @return highest-priority sstables to compact, and level to compact them to - * If no compactions are necessary, will return null - */ - sstables::compaction_descriptor get_compaction_candidates(const std::vector>& last_compacted_keys, - std::vector& compaction_counter) { -#if 0 - // during bootstrap we only do size tiering in L0 to make sure - // the streamed files can be placed in their original levels - if (StorageService.instance.isBootstrapMode()) - { - List mostInteresting = getSSTablesForSTCS(getLevel(0)); - if (!mostInteresting.isEmpty()) - { - logger.info("Bootstrapping - doing STCS in L0"); - return new CompactionCandidate(mostInteresting, 0, Long.MAX_VALUE); - } - return null; - } -#endif - // LevelDB gives each level a score of how much data it contains vs its ideal amount, and - // compacts the level with the highest score. But this falls apart spectacularly once you - // get behind. Consider this set of levels: - // L0: 988 [ideal: 4] - // L1: 117 [ideal: 10] - // L2: 12 [ideal: 100] - // - // The problem is that L0 has a much higher score (almost 250) than L1 (11), so what we'll - // do is compact a batch of MAX_COMPACTING_L0 sstables with all 117 L1 sstables, and put the - // result (say, 120 sstables) in L1. Then we'll compact the next batch of MAX_COMPACTING_L0, - // and so forth. So we spend most of our i/o rewriting the L1 data with each batch. - // - // If we could just do *all* L0 a single time with L1, that would be ideal. But we can't - // -- see the javadoc for MAX_COMPACTING_L0. - // - // LevelDB's way around this is to simply block writes if L0 compaction falls behind. - // We don't have that luxury. - // - // So instead, we - // 1) force compacting higher levels first, which minimizes the i/o needed to compact - // optimially which gives us a long term win, and - // 2) if L0 falls behind, we will size-tiered compact it to reduce read overhead until - // we can catch up on the higher levels. - // - // This isn't a magic wand -- if you are consistently writing too fast for LCS to keep - // up, you're still screwed. But if instead you have intermittent bursts of activity, - // it can help a lot. - for (auto i = _generations.size() - 1; i > 0; i--) { - auto& sstables = get_level(i); - if (sstables.empty()) { - continue; // mostly this just avoids polluting the debug log with zero scores - } -#if 0 - // we want to calculate score excluding compacting ones - Set sstablesInLevel = Sets.newHashSet(sstables); - Set remaining = Sets.difference(sstablesInLevel, cfs.getDataTracker().getCompacting()); -#endif - double score = (double) get_total_bytes(sstables) / (double) max_bytes_for_level(i); - - logger.debug("Compaction score for level {} is {}", i, score); - - if (score > 1.001) { - // before proceeding with a higher level, let's see if L0 is far enough behind to warrant STCS - // TODO: we shouldn't proceed with size tiered strategy if cassandra.disable_stcs_in_l0 is true. - if (get_level_size(0) > MAX_COMPACTING_L0) { - auto most_interesting = size_tiered_most_interesting_bucket(get_level(0)); - if (!most_interesting.empty()) { - logger.debug("L0 is too far behind, performing size-tiering there first"); - return sstables::compaction_descriptor(std::move(most_interesting)); - } - } - // L0 is fine, proceed with this level - auto info = get_candidates_for(i, last_compacted_keys); - if (!info.candidates.empty()) { - int next_level = get_next_level(info.candidates, info.can_promote); - - if (info.can_promote) { - info.candidates = get_overlapping_starved_sstables(next_level, std::move(info.candidates), compaction_counter); - } -#if 0 - if (logger.isDebugEnabled()) - logger.debug("Compaction candidates for L{} are {}", i, toString(candidates)); -#endif - return sstables::compaction_descriptor(std::move(info.candidates), next_level, _max_sstable_size_in_bytes); - } - else { - logger.debug("No compaction candidates for L{}", i); - } - } - } - - // Higher levels are happy, time for a standard, non-STCS L0 compaction - if (get_level(0).empty()) { - return sstables::compaction_descriptor(); - } - - auto info = get_candidates_for(0, last_compacted_keys); - if (info.candidates.empty()) { - return sstables::compaction_descriptor(); - } - auto next_level = get_next_level(info.candidates, info.can_promote); - return sstables::compaction_descriptor(std::move(info.candidates), next_level, _max_sstable_size_in_bytes); - } - -#if 0 - private List getSSTablesForSTCS(Collection sstables) - { - Iterable candidates = cfs.getDataTracker().getUncompactingSSTables(sstables); - List> pairs = SizeTieredCompactionStrategy.createSSTableAndLengthPairs(AbstractCompactionStrategy.filterSuspectSSTables(candidates)); - List> buckets = SizeTieredCompactionStrategy.getBuckets(pairs, - options.bucketHigh, - options.bucketLow, - options.minSSTableSize); - return SizeTieredCompactionStrategy.mostInterestingBucket(buckets, 4, 32); - } -#endif - - /** - * If we do something that makes many levels contain too little data (cleanup, change sstable size) we will "never" - * compact the high levels. - * - * This method finds if we have gone many compaction rounds without doing any high-level compaction, if so - * we start bringing in one sstable from the highest level until that level is either empty or is doing compaction. - * - * @param targetLevel the level the candidates will be compacted into - * @param candidates the original sstables to compact - * @return - */ - std::vector - get_overlapping_starved_sstables(int target_level, std::vector&& candidates, std::vector& compaction_counter) { - for (int i = _generations.size() - 1; i > 0; i--) { - compaction_counter[i]++; - } - compaction_counter[target_level] = 0; - - if (logger.level() == logging::log_level::debug) { - for (auto j = 0U; j < compaction_counter.size(); j++) { - logger.debug("CompactionCounter: {}: {}", j, compaction_counter[j]); - } - } - - for (int i = _generations.size() - 1; i > 0; i--) { - if (get_level_size(i) > 0) { - if (compaction_counter[i] > NO_COMPACTION_LIMIT) { - // we try to find an sstable that is fully contained within the boundaries we are compacting; - // say we are compacting 3 sstables: 0->30 in L1 and 0->12, 12->33 in L2 - // this means that we will not create overlap in L2 if we add an sstable - // contained within 0 -> 33 to the compaction - stdx::optional max; - stdx::optional min; - for (auto& candidate : candidates) { - auto& candidate_first = candidate->get_first_decorated_key(); - if (!min || candidate_first.tri_compare(*_schema, *min) < 0) { - min = candidate_first; - } - auto& candidate_last = candidate->get_last_decorated_key(); - if (!max || candidate_last.tri_compare(*_schema, *max) > 0) { - max = candidate_last; - } - } -#if 0 - // NOTE: We don't need to filter out compacting sstables by now because strategy only deals with - // uncompacting sstables and parallel compaction is also disabled for lcs. - Set compacting = cfs.getDataTracker().getCompacting(); -#endif - auto boundaries = ::range::make(*min, *max); - for (auto& sstable : get_level(i)) { - auto r = ::range::make(sstable->get_first_decorated_key(), sstable->get_last_decorated_key()); - if (boundaries.contains(r, dht::ring_position_comparator(*_schema))) { - logger.info("Adding high-level (L{}) {} to candidates", sstable->get_sstable_level(), sstable->get_filename()); - - auto result = std::find_if(std::begin(candidates), std::end(candidates), [&sstable] (auto& candidate) { - return sstable->generation() == candidate->generation(); - }); - if (result != std::end(candidates)) { - continue; - } - candidates.push_back(sstable); - return candidates; - } - } - } - return candidates; - } - } - - return candidates; - } - - size_t get_level_size(uint32_t level) { - return get_level(level).size(); - } - -#if 0 - public synchronized int[] getAllLevelSize() - { - int[] counts = new int[generations.length]; - for (int i = 0; i < counts.length; i++) - counts[i] = getLevel(i).size(); - return counts; - } - - private void logDistribution() - { - if (logger.isDebugEnabled()) - { - for (int i = 0; i < generations.length; i++) - { - if (!getLevel(i).isEmpty()) - { - logger.debug("L{} contains {} SSTables ({} bytes) in {}", - i, getLevel(i).size(), SSTableReader.getTotalBytes(getLevel(i)), this); - } - } - } - } -#endif - - uint32_t remove(sstables::shared_sstable& sstable) { - uint32_t level = sstable->get_sstable_level(); - if (level >= _generations.size()) { - throw std::runtime_error("Invalid level"); - } - _generations[level].remove(sstable); - return level; - } - - template - static std::vector overlapping(const schema& s, std::vector& candidates, T& others) { - assert(!candidates.empty()); - /* - * Picking each sstable from others that overlap one of the sstable of candidates is not enough - * because you could have the following situation: - * candidates = [ s1(a, c), s2(m, z) ] - * others = [ s3(e, g) ] - * In that case, s2 overlaps none of s1 or s2, but if we compact s1 with s2, the resulting sstable will - * overlap s3, so we must return s3. - * - * Thus, the correct approach is to pick sstables overlapping anything between the first key in all - * the candidate sstables, and the last. - */ - auto it = candidates.begin(); - auto& first_sstable = *it; - it++; - dht::token first = first_sstable->get_first_decorated_key()._token; - dht::token last = first_sstable->get_last_decorated_key()._token; - while (it != candidates.end()) { - auto& candidate_sstable = *it; - it++; - dht::token first_candidate = candidate_sstable->get_first_decorated_key()._token; - dht::token last_candidate = candidate_sstable->get_last_decorated_key()._token; - - first = first <= first_candidate? first : first_candidate; - last = last >= last_candidate ? last : last_candidate; - } - return overlapping(s, first, last, others); - } - - template - static std::vector overlapping(const schema& s, sstables::shared_sstable& sstable, T& others) { - return overlapping(s, sstable->get_first_decorated_key()._token, sstable->get_last_decorated_key()._token, others); - } - - /** - * @return sstables from @param sstables that contain keys between @param start and @param end, inclusive. - */ - template - static std::vector overlapping(const schema& s, dht::token start, dht::token end, T& sstables) { - assert(start <= end); - - std::vector overlapped; - auto range = ::range::make(start, end); - - for (auto& candidate : sstables) { - auto candidate_range = ::range::make(candidate->get_first_decorated_key()._token, candidate->get_last_decorated_key()._token); - - if (range.overlaps(candidate_range, dht::token_comparator())) { - overlapped.push_back(candidate); - } - } - return overlapped; - } - -#if 0 - private static final Predicate suspectP = new Predicate() - { - public boolean apply(SSTableReader candidate) - { - return candidate.isMarkedSuspect(); - } - }; -#endif - - bool worth_promoting_L0_candidates(uint64_t candidates_total_size) const { - return candidates_total_size >= _max_sstable_size_in_bytes; - } - - /** - * @return highest-priority sstables to compact for the given level. - * If no compactions are possible (because of concurrent compactions or because some sstables are blacklisted - * for prior failure), will return an empty list. Never returns null. - */ - candidates_info get_candidates_for(int level, const std::vector>& last_compacted_keys) { - const schema& s = *_schema; - assert(!get_level(level).empty()); - - logger.debug("Choosing candidates for L{}", level); - - if (level == 0) { - - // L0 is the dumping ground for new sstables which thus may overlap each other. - // - // We treat L0 compactions specially: - // 1a. add sstables to the candidate set until we have at least maxSSTableSizeInMB - // 1b. prefer choosing older sstables as candidates, to newer ones - // 1c. any L0 sstables that overlap a candidate, will also become candidates - // 2. At most MAX_COMPACTING_L0 sstables from L0 will be compacted at once - // 3. If total candidate size is less than maxSSTableSizeInMB, we won't bother compacting with L1, - // and the result of the compaction will stay in L0 instead of being promoted (see promote()) - // - // Note that we ignore suspect-ness of L1 sstables here, since if an L1 sstable is suspect we're - // basically screwed, since we expect all or most L0 sstables to overlap with each L1 sstable. - // So if an L1 sstable is suspect we can't do much besides try anyway and hope for the best. - auto candidates = boost::copy_range>(get_level(0)); - - if (candidates.size() > MAX_COMPACTING_L0) { - // limit to only the MAX_COMPACTING_L0 oldest candidates - sort_sstables_by_age(candidates); - candidates.resize(MAX_COMPACTING_L0); - } - - // leave everything in L0 if we didn't end up with a full sstable's worth of data - if (worth_promoting_L0_candidates(get_total_bytes(candidates))) { - // add sstables from L1 that overlap candidates - // if the overlapping ones are already busy in a compaction, leave it out. - // TODO try to find a set of L0 sstables that only overlaps with non-busy L1 sstables - auto l1overlapping = overlapping(*_schema, candidates, get_level(1)); - candidates.insert(candidates.end(), l1overlapping.begin(), l1overlapping.end()); - } - if (candidates.size() < 2) { - return {}; - } else { - return { candidates, true }; - } - } - - // for non-L0 compactions, pick up where we left off last time - std::list& sstables = get_level(level); - sstables.sort([&s] (auto& i, auto& j) { - return i->compare_by_first_key(*j) < 0; - }); - - // Restore invariant for current level, when a large token spanning sstable finds its - // way into a level higher than 0, due to resharding or refresh, by compacting first - // set of overlapping sstables. It means more than one compaction may be required for - // invariant to be restored. - auto overlapping_current_level = overlapping_sstables(level); - if (!overlapping_current_level.empty()) { - logger.info("Leveled compaction strategy is restoring invariant of level {} by compacting {} sstables on behalf of {}.{}", - level, overlapping_current_level.size(), s.ks_name(), s.cf_name()); - return { overlapping_current_level, false }; - } - - int start = 0; // handles case where the prior compaction touched the very last range - int idx = 0; - for (auto& sstable : sstables) { - if (uint32_t(level) >= last_compacted_keys.size()) { - throw std::runtime_error(sprint("Invalid level %u out of %ld", level, (last_compacted_keys.size() - 1))); - } - auto& sstable_first = sstable->get_first_decorated_key(); - if (!last_compacted_keys[level] || sstable_first.tri_compare(s, *last_compacted_keys[level]) > 0) { - start = idx; - break; - } - idx++; - } - - // look for a non-suspect keyspace to compact with, starting with where we left off last time, - // and wrapping back to the beginning of the generation if necessary - for (auto i = 0U; i < sstables.size(); i++) { - // get an iterator to the element of position pos from the list get_level(level). - auto pos = (start + i) % sstables.size(); - auto it = sstables.begin(); - std::advance(it, pos); - - auto& sstable = *it; - auto candidates = overlapping(*_schema, sstable, get_level(level + 1)); - - candidates.push_back(sstable); -#if 0 - if (Iterables.any(candidates, suspectP)) - continue; - if (Sets.intersection(candidates, compacting).isEmpty()) - return candidates; -#endif - return { candidates, true }; - } - - // all the sstables were suspect or overlapped with something suspect - return {}; - } - - void sort_sstables_by_age(std::vector& candidates) { - std::sort(candidates.begin(), candidates.end(), [] (auto& i, auto& j) { - return i->compare_by_max_timestamp(*j) < 0; - }); - } -#if 0 - @Override - public String toString() - { - return "Manifest@" + hashCode(); - } -#endif - uint32_t get_level_count() { - for (int i = _generations.size() - 1; i >= 0; i--) { - if (get_level(i).size() > 0) { - return i; - } - } - return 0; - } -#if 0 - public synchronized SortedSet getLevelSorted(int level, Comparator comparator) - { - return ImmutableSortedSet.copyOf(comparator, getLevel(level)); - } -#endif - std::list& get_level(uint32_t level) { - if (level >= _generations.size()) { - throw std::runtime_error("Invalid level"); - } - return _generations[level]; - } - - int64_t get_estimated_tasks() { - int64_t tasks = 0; - - for (int i = static_cast(_generations.size()) - 1; i >= 0; i--) { - const auto& sstables = get_level(i); - uint64_t total_bytes_for_this_level = get_total_bytes(sstables); - uint64_t max_bytes_for_this_level = max_bytes_for_level(i); - - if (total_bytes_for_this_level < max_bytes_for_this_level) { - continue; - } - // add to tasks an estimate about number of sstables that make this level go beyond its limit. - tasks += (total_bytes_for_this_level - max_bytes_for_this_level) / _max_sstable_size_in_bytes; - } - return tasks; - } - - int get_next_level(const std::vector& sstables, bool can_promote = true) { - int maximum_level = std::numeric_limits::min(); - int minimum_level = std::numeric_limits::max(); - auto total_bytes = get_total_bytes(sstables); - - for (auto& sstable : sstables) { - int sstable_level = sstable->get_sstable_level(); - maximum_level = std::max(sstable_level, maximum_level); - minimum_level = std::min(sstable_level, minimum_level); - } - - int new_level; - if (minimum_level == 0 && minimum_level == maximum_level && !worth_promoting_L0_candidates(total_bytes)) { - new_level = 0; - } else { - new_level = (minimum_level == maximum_level && can_promote) ? maximum_level + 1 : maximum_level; - assert(new_level > 0); - } - return new_level; - } - - template - static uint64_t get_total_bytes(const T& sstables) { - uint64_t sum = 0; - for (auto& sstable : sstables) { - sum += sstable->ondisk_data_size(); - } - return sum; - } -}; diff --git a/scylla/sstables/metadata_collector.hh b/scylla/sstables/metadata_collector.hh deleted file mode 100644 index e835270..0000000 --- a/scylla/sstables/metadata_collector.hh +++ /dev/null @@ -1,338 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "types.hh" -#include "utils/murmur_hash.hh" -#include "hyperloglog.hh" -#include "db/commitlog/replay_position.hh" -#include - -namespace sstables { - -static constexpr int TOMBSTONE_HISTOGRAM_BIN_SIZE = 100; - -class min_long_tracker { - uint64_t _default_value; - bool _is_set = false; - uint64_t _value; -public: - min_long_tracker() {} - min_long_tracker(uint64_t default_value) { - _default_value = default_value; - } - - void update(uint64_t value) { - if (!_is_set) { - _value = value; - _is_set = true; - } else { - if (value < _value) { - _value = value; - } - } - } - - uint64_t get() { - if (_is_set) { - return _value; - } - return _default_value; - } -}; - -class max_long_tracker { - uint64_t _default_value; - bool _is_set = false; - uint64_t _value; -public: - max_long_tracker() {} - max_long_tracker(uint64_t default_value) { - _default_value = default_value; - } - - void update(uint64_t value) { - if (!_is_set) { - _value = value; - _is_set = true; - } else { - if (value > _value) { - _value = value; - } - } - } - - uint64_t get() { - if (_is_set) { - return _value; - } - return _default_value; - } -}; - -class max_int_tracker { - int _default_value; - bool _is_set = false; - int _value; -public: - max_int_tracker() {} - max_int_tracker(int default_value) { - _default_value = default_value; - } - - void update(int value) { - if (!_is_set) { - _value = value; - _is_set = true; - } else { - if (value > _value) { - _value = value; - } - } - } - - int get() { - if (_is_set) { - return _value; - } - return _default_value; - } -}; - -/** - * ColumnStats holds information about the columns for one row inside sstable - */ -struct column_stats { - /** how many columns are there in the row */ - uint64_t column_count; - - uint64_t start_offset; - uint64_t row_size; - - /** the largest (client-supplied) timestamp in the row */ - min_long_tracker min_timestamp; - max_long_tracker max_timestamp; - max_int_tracker max_local_deletion_time; - /** histogram of tombstone drop time */ - streaming_histogram tombstone_histogram; - - bool has_legacy_counter_shards; - - column_stats() : - column_count(0), - start_offset(0), - row_size(0), - min_timestamp(min_long_tracker(std::numeric_limits::min())), - max_timestamp(max_long_tracker(std::numeric_limits::max())), - max_local_deletion_time(max_int_tracker(std::numeric_limits::max())), - tombstone_histogram(TOMBSTONE_HISTOGRAM_BIN_SIZE), - has_legacy_counter_shards(false) - { - } - - void reset() { - *this = column_stats(); - } - - void update_min_timestamp(uint64_t potential_min) { - min_timestamp.update(potential_min); - } - void update_max_timestamp(uint64_t potential_max) { - max_timestamp.update(potential_max); - } - void update_max_local_deletion_time(int potential_value) { - max_local_deletion_time.update(potential_value); - } - -}; - -class metadata_collector { -public: - static constexpr double NO_COMPRESSION_RATIO = -1.0; - - static hll::HyperLogLog hyperloglog(int p, int sp) { - // FIXME: hll::HyperLogLog doesn't support sparse format, so ignoring parameters by the time being. - return hll::HyperLogLog(); - } -private: - // EH of 150 can track a max value of 1697806495183, i.e., > 1.5PB - utils::estimated_histogram _estimated_row_size{150}; - // EH of 114 can track a max value of 2395318855, i.e., > 2B columns - utils::estimated_histogram _estimated_column_count{114}; - db::replay_position _replay_position; - uint64_t _min_timestamp = std::numeric_limits::max(); - uint64_t _max_timestamp = std::numeric_limits::min(); - uint64_t _repaired_at = 0; - int _max_local_deletion_time = std::numeric_limits::min(); - double _compression_ratio = NO_COMPRESSION_RATIO; - std::set _ancestors; - streaming_histogram _estimated_tombstone_drop_time{TOMBSTONE_HISTOGRAM_BIN_SIZE}; - int _sstable_level = 0; - std::vector _min_column_names; - std::vector _max_column_names; - bool _has_legacy_counter_shards = false; - - /** - * Default cardinality estimation method is to use HyperLogLog++. - * Parameter here(p=13, sp=25) should give reasonable estimation - * while lowering bytes required to hold information. - * See CASSANDRA-5906 for detail. - */ - hll::HyperLogLog _cardinality = hyperloglog(13, 25); -private: - /* - * Convert a vector of bytes into a disk array of disk_string. - */ - static void convert(disk_array>&to, std::vector&& from) { - for (auto i = 0U; i < from.size(); i++) { - if (!from[i]) { - break; - } - disk_string s; - s.value = std::move(from[i].value()); - to.elements.push_back(std::move(s)); - } - } -public: - void add_key(bytes_view key) { - long hashed = utils::murmur_hash::hash2_64(key, 0); - _cardinality.offer_hashed(hashed); - } - - void add_row_size(uint64_t row_size) { - _estimated_row_size.add(row_size); - } - - void add_column_count(uint64_t column_count) { - _estimated_column_count.add(column_count); - } - - void merge_tombstone_histogram(streaming_histogram& histogram) { - _estimated_tombstone_drop_time.merge(histogram); - } - - /** - * Ratio is compressed/uncompressed and it is - * if you have 1.x then compression isn't helping - */ - void add_compression_ratio(uint64_t compressed, uint64_t uncompressed) { - _compression_ratio = (double) compressed/uncompressed; - } - - void update_min_timestamp(uint64_t potential_min) { - _min_timestamp = std::min(_min_timestamp, potential_min); - } - - void update_max_timestamp(uint64_t potential_max) { - _max_timestamp = std::max(_max_timestamp, potential_max); - } - - void update_max_local_deletion_time(int max_local_deletion_time) { - _max_local_deletion_time = std::max(_max_local_deletion_time, max_local_deletion_time); - } - - void set_replay_position(const db::replay_position & rp) { - _replay_position = rp; - } - - void set_repaired_at(uint64_t repaired_at) { - _repaired_at = repaired_at; - } - - void add_ancestor(int generation) { - _ancestors.insert(generation); - } - - void sstable_level(int sstable_level) { - _sstable_level = sstable_level; - } - - std::vector& min_column_names() { - return _min_column_names; - } - - std::vector& max_column_names() { - return _max_column_names; - } - - void update_has_legacy_counter_shards(bool has_legacy_counter_shards) { - _has_legacy_counter_shards = _has_legacy_counter_shards || has_legacy_counter_shards; - } - - void update(const schema& s, column_stats&& stats) { - update_min_timestamp(stats.min_timestamp.get()); - update_max_timestamp(stats.max_timestamp.get()); - update_max_local_deletion_time(stats.max_local_deletion_time.get()); - add_row_size(stats.row_size); - add_column_count(stats.column_count); - merge_tombstone_histogram(stats.tombstone_histogram); - update_has_legacy_counter_shards(stats.has_legacy_counter_shards); - } - - void construct_compaction(compaction_metadata& m) { - if (!_ancestors.empty()) { - m.ancestors.elements = std::deque(_ancestors.begin(), _ancestors.end()); - } - auto cardinality = _cardinality.get_bytes(); - m.cardinality.elements = std::deque(cardinality.get(), cardinality.get() + cardinality.size()); - } - - void construct_stats(stats_metadata& m) { - m.estimated_row_size = std::move(_estimated_row_size); - m.estimated_column_count = std::move(_estimated_column_count); - m.position = _replay_position; - m.min_timestamp = _min_timestamp; - m.max_timestamp = _max_timestamp; - m.max_local_deletion_time = _max_local_deletion_time; - m.compression_ratio = _compression_ratio; - m.estimated_tombstone_drop_time = std::move(_estimated_tombstone_drop_time); - m.sstable_level = _sstable_level; - m.repaired_at = _repaired_at; - convert(m.min_column_names, std::move(_min_column_names)); - convert(m.max_column_names, std::move(_max_column_names)); - m.has_legacy_counter_shards = _has_legacy_counter_shards; - } -}; - -} - - diff --git a/scylla/sstables/partition.cc b/scylla/sstables/partition.cc deleted file mode 100644 index 558a156..0000000 --- a/scylla/sstables/partition.cc +++ /dev/null @@ -1,1290 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ -#include "mutation.hh" -#include "sstables.hh" -#include "types.hh" -#include "core/future-util.hh" -#include "key.hh" -#include "keys.hh" -#include "core/do_with.hh" -#include "unimplemented.hh" -#include "utils/move.hh" -#include "dht/i_partitioner.hh" -#include -#include "index_reader.hh" -#include "counters.hh" -#include "utils/data_input.hh" -#include "clustering_ranges_walker.hh" -#include "binary_search.hh" - -namespace sstables { - -static inline bytes_view pop_back(std::vector& vec) { - auto b = std::move(vec.back()); - vec.pop_back(); - return b; -} - -class sstable_streamed_mutation; - -class mp_row_consumer : public row_consumer { -public: - struct new_mutation { - partition_key key; - tombstone tomb; - }; -private: - schema_ptr _schema; - const io_priority_class& _pc; - const query::partition_slice& _slice; - bool _out_of_range = false; - stdx::optional _ck_ranges; - stdx::optional _ck_ranges_walker; - sstable_streamed_mutation* _sm; - - bool _skip_partition = false; - // When set, the fragment pending in _in_progress should not be emitted. - bool _skip_in_progress = false; - - // The value of _ck_ranges->lower_bound_counter() last time we tried to skip to _ck_ranges->lower_bound(). - size_t _last_lower_bound_counter = 0; - - // We don't have "end of clustering row" markers. So we know that the current - // row has ended once we get something (e.g. a live cell) that belongs to another - // one. If that happens sstable reader is interrupted (proceed::no) but we - // already have the whole row that just ended and a part of the new row. - // The finished row is moved to _ready so that upper layer can retrieve it and - // the part of the new row goes to _in_progress and this is were we will continue - // accumulating data once sstable reader is continued. - // - // _ready only holds fragments which are in the query range, but _in_progress - // not necessarily. - // - // _in_progress may be disengaged only before reading first fragment of partition - // or after all fragments of partition were consumed. Fast-forwarding within partition - // should not clear it, we rely on it being set to detect repeated tombstones. - mutation_fragment_opt _in_progress; - mutation_fragment_opt _ready; - - stdx::optional _mutation; - bool _is_mutation_end = true; - position_in_partition _fwd_end = position_in_partition::after_all_clustered_rows(); // Restricts the stream on top of _ck_ranges_walker. - streamed_mutation::forwarding _fwd; - - // Because of #1203 we may encounter sstables with range tombstones - // placed earlier than expected. We fix the ordering by loading range tombstones - // initially into _range_tombstones, until first row is encountered, - // and then merge the two streams in push_ready_fragments(). - // - // _range_tombstones holds only tombstones which are relevant for current ranges. - range_tombstone_stream _range_tombstones; - bool _first_row_encountered = false; -public: - void set_streamed_mutation(sstable_streamed_mutation* sm) { - _sm = sm; - } - struct column { - bool is_static; - bytes_view col_name; - std::vector clustering; - // see is_collection. collections have an extra element aside from the name. - // This will be non-zero size if this is a collection, and zero size othersize. - bytes_view collection_extra_data; - bytes_view cell; - const column_definition *cdef; - bool is_present; - - static constexpr size_t static_size = 2; - - // For every normal column, we expect the clustering key, followed by the - // extra element for the column name. - // - // For a collection, some auxiliary data will be embedded into the - // column_name as seen by the row consumer. This means that if our - // exploded clustering keys has more rows than expected, we are dealing - // with a collection. - bool is_collection(const schema& s) { - auto expected_normal = s.clustering_key_size() + 1; - // Note that we can have less than the expected. That is the case for - // incomplete prefixes, for instance. - if (clustering.size() <= expected_normal) { - return false; - } else if (clustering.size() == (expected_normal + 1)) { - return true; - } - throw malformed_sstable_exception(sprint("Found %d clustering elements in column name. Was not expecting that!", clustering.size())); - } - - static bool check_static(const schema& schema, bytes_view col) { - return composite_view(col, schema.is_compound()).is_static(); - } - - static bytes_view fix_static_name(const schema& schema, bytes_view col) { - return fix_static_name(col, check_static(schema, col)); - } - - static bytes_view fix_static_name(bytes_view col, bool is_static) { - if(is_static) { - col.remove_prefix(static_size); - } - return col; - } - - std::vector extract_clustering_key(const schema& schema) { - return composite_view(col_name, schema.is_compound()).explode(); - } - column(const schema& schema, bytes_view col, api::timestamp_type timestamp) - : is_static(check_static(schema, col)) - , col_name(fix_static_name(col, is_static)) - , clustering(extract_clustering_key(schema)) - , collection_extra_data(is_collection(schema) ? pop_back(clustering) : bytes()) // collections are not supported with COMPACT STORAGE, so this is fine - , cell(!schema.is_dense() ? pop_back(clustering) : (*(schema.regular_begin())).name()) // dense: cell name is not provided. It is the only regular column - , cdef(schema.get_column_definition(to_bytes(cell))) - , is_present(cdef && timestamp > cdef->dropped_at()) - { - - if (is_static) { - for (auto& e: clustering) { - if (e.size() != 0) { - throw malformed_sstable_exception("Static row has clustering key information. I didn't expect that!"); - } - } - } - if (is_present && is_static != cdef->is_static()) { - throw malformed_sstable_exception(seastar::format("Mismatch between {} cell and {} column definition", - is_static ? "static" : "non-static", cdef->is_static() ? "static" : "non-static")); - } - } - }; - -private: - // Notes for collection mutation: - // - // While we could in theory generate the mutation for the elements as they - // appear, that would be costly. We would need to keep deserializing and - // serializing them, either explicitly or through a merge. - // - // The best way forward is to accumulate the collection data into a data - // structure, and later on serialize it fully when this (sstable) row ends. - class collection_mutation { - const column_definition *_cdef; - public: - collection_type_impl::mutation cm; - - // We need to get a copy of the prefix here, because the outer object may be short lived. - collection_mutation(const column_definition *cdef) - : _cdef(cdef) { } - - collection_mutation() : _cdef(nullptr) {} - - bool is_new_collection(const column_definition *c) { - if (!_cdef || ((_cdef->id != c->id) || (_cdef->kind != c->kind))) { - return true; - } - return false; - }; - - void flush(const schema& s, mutation_fragment& mf) { - if (!_cdef) { - return; - } - auto ctype = static_pointer_cast(_cdef->type); - auto ac = atomic_cell_or_collection::from_collection_mutation(ctype->serialize_mutation_form(cm)); - if (_cdef->is_static()) { - mf.as_mutable_static_row().set_cell(*_cdef, std::move(ac)); - } else { - mf.as_mutable_clustering_row().set_cell(*_cdef, std::move(ac)); - } - } - }; - std::experimental::optional _pending_collection = {}; - - collection_mutation& pending_collection(const column_definition *cdef) { - if (!_pending_collection || _pending_collection->is_new_collection(cdef)) { - flush_pending_collection(*_schema); - - if (!cdef->is_multi_cell()) { - throw malformed_sstable_exception("frozen set should behave like a cell\n"); - } - _pending_collection = collection_mutation(cdef); - } - return *_pending_collection; - } - - proceed push_ready_fragments_out_of_range(); - proceed push_ready_fragments_with_ready_set(); - - void update_pending_collection(const column_definition *cdef, bytes&& col, atomic_cell&& ac) { - pending_collection(cdef).cm.cells.emplace_back(std::move(col), std::move(ac)); - } - - void update_pending_collection(const column_definition *cdef, tombstone&& t) { - pending_collection(cdef).cm.tomb = std::move(t); - } - - void flush_pending_collection(const schema& s) { - if (_pending_collection) { - _pending_collection->flush(s, *_in_progress); - _pending_collection = {}; - } - } - - // Returns true if and only if the position is inside requested ranges. - // Assumes that this and the other advance_to() are called with monotonic positions. - // We rely on the fact that the first 'S' in SSTables stands for 'sorted' - // and the clustering row keys are always in an ascending order. - void advance_to(position_in_partition_view pos) { - position_in_partition::less_compare less(*_schema); - - if (!less(pos, _fwd_end)) { - _out_of_range = true; - _skip_in_progress = false; - } else { - _skip_in_progress = !_ck_ranges_walker->advance_to(pos); - _out_of_range |= _ck_ranges_walker->out_of_range(); - } - - sstlog.trace("mp_row_consumer {}: advance_to({}) => out_of_range={}, skip_in_progress={}", this, pos, _out_of_range, _skip_in_progress); - } - - // Assumes that this and other advance_to() overloads are called with monotonic positions. - void advance_to(const range_tombstone& rt) { - position_in_partition::less_compare less(*_schema); - auto&& start = rt.position(); - auto&& end = rt.end_position(); - - if (!less(start, _fwd_end)) { - _out_of_range = true; - _skip_in_progress = false; // It may become in range after next forwarding, so cannot drop it - } else { - _skip_in_progress = !_ck_ranges_walker->advance_to(start, end); - _out_of_range |= _ck_ranges_walker->out_of_range(); - } - - sstlog.trace("mp_row_consumer {}: advance_to({}) => out_of_range={}, skip_in_progress={}", this, rt, _out_of_range, _skip_in_progress); - } - - void advance_to(const mutation_fragment& mf) { - if (mf.is_range_tombstone()) { - advance_to(mf.as_range_tombstone()); - } else { - advance_to(mf.position()); - } - } - - void set_up_ck_ranges(const partition_key& pk) { - sstlog.trace("mp_row_consumer {}: set_up_ck_ranges({})", this, pk); - _ck_ranges = query::clustering_key_filter_ranges::get_ranges(*_schema, _slice, pk); - _ck_ranges_walker = clustering_ranges_walker(*_schema, _ck_ranges->ranges(), _schema->has_static_columns()); - _last_lower_bound_counter = 0; - _fwd_end = _fwd ? position_in_partition::before_all_clustered_rows() : position_in_partition::after_all_clustered_rows(); - _out_of_range = false; - _range_tombstones.reset(); - _first_row_encountered = false; - } -public: - mutation_opt mut; - - mp_row_consumer(const schema_ptr schema, - const query::partition_slice& slice, - const io_priority_class& pc, - streamed_mutation::forwarding fwd) - : _schema(schema) - , _pc(pc) - , _slice(slice) - , _fwd(fwd) - , _range_tombstones(*_schema) - { } - - mp_row_consumer(const schema_ptr schema, - const io_priority_class& pc, - streamed_mutation::forwarding fwd) - : mp_row_consumer(schema, query::full_slice, pc, fwd) { } - - virtual proceed consume_row_start(sstables::key_view key, sstables::deletion_time deltime) override { - if (!_is_mutation_end) { - return proceed::yes; - } - _mutation = new_mutation{partition_key::from_exploded(key.explode(*_schema)), tombstone(deltime)}; - setup_for_partition(_mutation->key); - return proceed::no; - } - - void setup_for_partition(const partition_key& pk) { - _is_mutation_end = false; - _skip_partition = false; - _skip_in_progress = false; - set_up_ck_ranges(pk); - } - - proceed flush() { - sstlog.trace("mp_row_consumer {}: flush(in_progress={}, ready={}, skip={})", this, _in_progress, _ready, _skip_in_progress); - flush_pending_collection(*_schema); - // If _ready is already set we have a bug: get_mutation_fragment() - // was not called, and below we will lose one clustering row! - assert(!_ready); - if (!_skip_in_progress) { - _ready = move_and_disengage(_in_progress); - return push_ready_fragments_with_ready_set(); - } else { - _in_progress = { }; - _ready = { }; - _skip_in_progress = false; - return proceed::yes; - } - } - - proceed flush_if_needed(range_tombstone&& rt) { - sstlog.trace("mp_row_consumer {}: flush_if_needed(in_progress={}, ready={}, skip={})", this, _in_progress, _ready, _skip_in_progress); - proceed ret = proceed::yes; - if (_in_progress) { - ret = flush(); - } - advance_to(rt); - _in_progress = mutation_fragment(std::move(rt)); - if (_out_of_range) { - ret = push_ready_fragments_out_of_range(); - } - if (needs_skip()) { - ret = proceed::no; - } - return ret; - } - - proceed flush_if_needed(bool is_static, position_in_partition&& pos) { - sstlog.trace("mp_row_consumer {}: flush_if_needed({})", this, pos); - - // Part of workaround for #1203 - _first_row_encountered = !is_static; - - position_in_partition::equal_compare eq(*_schema); - proceed ret = proceed::yes; - if (_in_progress && !eq(_in_progress->position(), pos)) { - ret = flush(); - } - if (!_in_progress) { - advance_to(pos); - if (is_static) { - _in_progress = mutation_fragment(static_row()); - } else { - _in_progress = mutation_fragment(clustering_row(std::move(pos.key()))); - } - if (_out_of_range) { - ret = push_ready_fragments_out_of_range(); - } - if (needs_skip()) { - ret = proceed::no; - } - } - return ret; - } - - proceed flush_if_needed(bool is_static, const std::vector& ecp) { - auto pos = [&] { - if (is_static) { - return position_in_partition(position_in_partition::static_row_tag_t()); - } else { - auto ck = clustering_key_prefix::from_exploded_view(ecp); - return position_in_partition(position_in_partition::clustering_row_tag_t(), std::move(ck)); - } - }(); - return flush_if_needed(is_static, std::move(pos)); - } - - proceed flush_if_needed(clustering_key_prefix&& ck) { - return flush_if_needed(false, position_in_partition(position_in_partition::clustering_row_tag_t(), std::move(ck))); - } - - atomic_cell make_counter_cell(int64_t timestamp, bytes_view value) { - static constexpr size_t shard_size = 32; - - data_input in(value); - - auto header_size = in.read(); - for (auto i = 0; i < header_size; i++) { - auto idx = in.read(); - if (idx >= 0) { - throw marshal_exception("encountered a local shard in a counter cell"); - } - } - auto shard_count = value.size() / shard_size; - if (shard_count != size_t(header_size)) { - throw marshal_exception("encountered remote shards in a counter cell"); - } - - std::vector shards; - shards.reserve(shard_count); - counter_cell_builder ccb(shard_count); - for (auto i = 0u; i < shard_count; i++) { - auto id_hi = in.read(); - auto id_lo = in.read(); - auto clock = in.read(); - auto value = in.read(); - ccb.add_shard(counter_shard(counter_id(utils::UUID(id_hi, id_lo)), value, clock)); - } - return ccb.build(timestamp); - } - - template - //requires requires(CreateCell create_cell, column col) { - // { create_cell(col) } -> void; - //} - proceed do_consume_cell(bytes_view col_name, int64_t timestamp, int32_t ttl, int32_t expiration, CreateCell&& create_cell) { - if (_skip_partition) { - return proceed::yes; - } - - struct column col(*_schema, col_name, timestamp); - - auto ret = flush_if_needed(col.is_static, col.clustering); - if (_skip_in_progress) { - return ret; - } - - if (col.cell.size() == 0) { - row_marker rm(timestamp, gc_clock::duration(ttl), gc_clock::time_point(gc_clock::duration(expiration))); - _in_progress->as_mutable_clustering_row().apply(std::move(rm)); - return ret; - } - - if (!col.is_present) { - return ret; - } - - create_cell(std::move(col)); - return ret; - } - - virtual proceed consume_counter_cell(bytes_view col_name, bytes_view value, int64_t timestamp) override { - return do_consume_cell(col_name, timestamp, 0, 0, [&] (auto&& col) { - auto ac = make_counter_cell(timestamp, value); - - if (col.is_static) { - _in_progress->as_mutable_static_row().set_cell(*(col.cdef), std::move(ac)); - } else { - _in_progress->as_mutable_clustering_row().set_cell(*(col.cdef), atomic_cell_or_collection(std::move(ac))); - } - }); - } - - atomic_cell make_atomic_cell(uint64_t timestamp, bytes_view value, uint32_t ttl, uint32_t expiration) { - if (ttl) { - return atomic_cell::make_live(timestamp, value, - gc_clock::time_point(gc_clock::duration(expiration)), gc_clock::duration(ttl)); - } else { - return atomic_cell::make_live(timestamp, value); - } - } - - virtual proceed consume_cell(bytes_view col_name, bytes_view value, int64_t timestamp, int32_t ttl, int32_t expiration) override { - return do_consume_cell(col_name, timestamp, ttl, expiration, [&] (auto&& col) { - auto ac = make_atomic_cell(timestamp, value, ttl, expiration); - - bool is_multi_cell = col.collection_extra_data.size(); - if (is_multi_cell != col.cdef->is_multi_cell()) { - return; - } - if (is_multi_cell) { - update_pending_collection(col.cdef, to_bytes(col.collection_extra_data), std::move(ac)); - return; - } - - if (col.is_static) { - _in_progress->as_mutable_static_row().set_cell(*(col.cdef), std::move(ac)); - return; - } - _in_progress->as_mutable_clustering_row().set_cell(*(col.cdef), atomic_cell_or_collection(std::move(ac))); - }); - } - - virtual proceed consume_deleted_cell(bytes_view col_name, sstables::deletion_time deltime) override { - if (_skip_partition) { - return proceed::yes; - } - - auto timestamp = deltime.marked_for_delete_at; - struct column col(*_schema, col_name, timestamp); - gc_clock::duration secs(deltime.local_deletion_time); - - return consume_deleted_cell(col, timestamp, gc_clock::time_point(secs)); - } - - proceed consume_deleted_cell(column &col, int64_t timestamp, gc_clock::time_point ttl) { - auto ret = flush_if_needed(col.is_static, col.clustering); - if (_skip_in_progress) { - return ret; - } - - if (col.cell.size() == 0) { - row_marker rm(tombstone(timestamp, ttl)); - _in_progress->as_mutable_clustering_row().apply(rm); - return ret; - } - if (!col.is_present) { - return ret; - } - - auto ac = atomic_cell::make_dead(timestamp, ttl); - - bool is_multi_cell = col.collection_extra_data.size(); - if (is_multi_cell != col.cdef->is_multi_cell()) { - return ret; - } - - if (is_multi_cell) { - update_pending_collection(col.cdef, to_bytes(col.collection_extra_data), std::move(ac)); - } else if (col.is_static) { - _in_progress->as_mutable_static_row().set_cell(*col.cdef, atomic_cell_or_collection(std::move(ac))); - } else { - _in_progress->as_mutable_clustering_row().set_cell(*col.cdef, atomic_cell_or_collection(std::move(ac))); - } - return ret; - } - virtual proceed consume_row_end() override { - if (_in_progress) { - flush(); - } - _is_mutation_end = true; - _out_of_range = true; - return proceed::no; - } - - virtual proceed consume_shadowable_row_tombstone(bytes_view col_name, sstables::deletion_time deltime) override { - if (_skip_partition) { - return proceed::yes; - } - auto key = composite_view(column::fix_static_name(*_schema, col_name)).explode(); - auto ck = clustering_key_prefix::from_exploded_view(key); - auto ret = flush_if_needed(std::move(ck)); - if (!_skip_in_progress) { - _in_progress->as_mutable_clustering_row().apply(shadowable_tombstone(tombstone(deltime))); - } - return ret; - } - - static bound_kind start_marker_to_bound_kind(bytes_view component) { - auto found = composite::eoc(component.back()); - switch (found) { - // start_col may have composite_marker::none in sstables - // from older versions of Cassandra (see CASSANDRA-7593). - case composite::eoc::none: - return bound_kind::incl_start; - case composite::eoc::start: - return bound_kind::incl_start; - case composite::eoc::end: - return bound_kind::excl_start; - default: - throw malformed_sstable_exception(sprint("Unexpected start composite marker %d\n", uint16_t(uint8_t(found)))); - } - } - - static bound_kind end_marker_to_bound_kind(bytes_view component) { - auto found = composite::eoc(component.back()); - switch (found) { - // start_col may have composite_marker::none in sstables - // from older versions of Cassandra (see CASSANDRA-7593). - case composite::eoc::none: - return bound_kind::incl_end; - case composite::eoc::start: - return bound_kind::excl_end; - case composite::eoc::end: - return bound_kind::incl_end; - default: - throw malformed_sstable_exception(sprint("Unexpected start composite marker %d\n", uint16_t(uint8_t(found)))); - } - } - - virtual proceed consume_range_tombstone( - bytes_view start_col, bytes_view end_col, - sstables::deletion_time deltime) override { - - if (_skip_partition) { - return proceed::yes; - } - - auto start = composite_view(column::fix_static_name(*_schema, start_col)).explode(); - - // Note how this is slightly different from the check in is_collection. Collection tombstones - // do not have extra data. - // - // Still, it is enough to check if we're dealing with a collection, since any other tombstone - // won't have a full clustering prefix (otherwise it isn't a range) - if (start.size() <= _schema->clustering_key_size()) { - auto start_ck = clustering_key_prefix::from_exploded_view(start); - auto start_kind = start_marker_to_bound_kind(start_col); - auto end = clustering_key_prefix::from_exploded_view(composite_view(column::fix_static_name(*_schema, end_col)).explode()); - auto end_kind = end_marker_to_bound_kind(end_col); - if (range_tombstone::is_single_clustering_row_tombstone(*_schema, start_ck, start_kind, end, end_kind)) { - auto ret = flush_if_needed(std::move(start_ck)); - if (!_skip_in_progress) { - _in_progress->as_mutable_clustering_row().apply(tombstone(deltime)); - } - return ret; - } else { - auto rt = range_tombstone(std::move(start_ck), start_kind, std::move(end), end_kind, tombstone(deltime)); - position_in_partition::less_compare less(*_schema); - auto rt_pos = rt.position(); - if (_in_progress && !less(_in_progress->position(), rt_pos)) { - return proceed::yes; // repeated tombstone, ignore - } - // Workaround for #1203 - if (!_first_row_encountered) { - if (_ck_ranges_walker->contains_tombstone(rt_pos, rt.end_position())) { - _range_tombstones.apply(std::move(rt)); - } - return proceed::yes; - } - return flush_if_needed(std::move(rt)); - } - } else { - auto&& column = pop_back(start); - auto cdef = _schema->get_column_definition(to_bytes(column)); - if (cdef && cdef->is_multi_cell() && deltime.marked_for_delete_at > cdef->dropped_at()) { - auto ret = flush_if_needed(cdef->is_static(), start); - if (!_skip_in_progress) { - update_pending_collection(cdef, tombstone(deltime)); - } - return ret; - } - } - return proceed::yes; - } - virtual const io_priority_class& io_priority() override { - return _pc; - } - - // Returns true if the consumer is positioned at partition boundary, - // meaning that after next read either get_mutation() will - // return engaged mutation or end of stream was reached. - bool is_mutation_end() const { - return _is_mutation_end; - } - - bool is_out_of_range() const { - return _out_of_range; - } - - stdx::optional get_mutation() { - return move_and_disengage(_mutation); - } - - // Pushes ready fragments into the streamed_mutation's buffer. - // Tries to push as much as possible, but respects buffer limits. - // Sets streamed_mutation::_end_of_range when there are no more fragments for the query range. - // Returns information whether the parser should continue to parse more - // input and produce more fragments or we have collected enough and should yield. - proceed push_ready_fragments(); - - void skip_partition() { - _pending_collection = { }; - _in_progress = { }; - _ready = { }; - - _skip_partition = true; - } - - virtual void reset(indexable_element el) override { - sstlog.trace("mp_row_consumer {}: reset({})", this, static_cast(el)); - _ready = {}; - if (el == indexable_element::partition) { - _pending_collection = {}; - _in_progress = {}; - _is_mutation_end = true; - _out_of_range = true; - } else { - // Do not reset _in_progress so that out-of-order tombstone detection works. - _is_mutation_end = false; - } - } - - // Changes current fragment range. - // - // When there are no more fragments for current range, - // is_out_of_range() will return true. - // - // The new range must not overlap with the previous range and - // must be after it. - // - future<> fast_forward_to(position_range); - - bool needs_skip() const { - return (_skip_in_progress || !_in_progress) - && _last_lower_bound_counter != _ck_ranges_walker->lower_bound_change_counter(); - } - - // Tries to fast forward the consuming context to the next position. - // Must be called outside consuming context. - future<> maybe_skip(); -}; - -struct sstable_data_source : public enable_lw_shared_from_this { - shared_sstable _sst; - mp_row_consumer _consumer; - bool _index_in_current_partition = false; // Whether _lh_index is in current partition - bool _will_likely_slice = false; - bool _read_enabled = true; - data_consume_context _context; - std::unique_ptr _lh_index; // For lower bound - std::unique_ptr _rh_index; // For upper bound - schema_ptr _schema; - stdx::optional _key; - - struct single_partition_tag {}; - - sstable_data_source(schema_ptr s, shared_sstable sst, mp_row_consumer&& consumer) - : _sst(std::move(sst)) - , _consumer(std::move(consumer)) - , _context(_sst->data_consume_rows(_consumer)) - , _schema(std::move(s)) - { } - - sstable_data_source(schema_ptr s, shared_sstable sst, mp_row_consumer&& consumer, sstable::disk_read_range toread, uint64_t last_end, - std::unique_ptr lh_index = {}, std::unique_ptr rh_index = {}) - : _sst(std::move(sst)) - , _consumer(std::move(consumer)) - , _read_enabled(bool(toread)) - , _context(_sst->data_consume_rows(_consumer, std::move(toread), last_end)) - , _lh_index(std::move(lh_index)) - , _rh_index(std::move(rh_index)) - , _schema(std::move(s)) - { } - - sstable_data_source(single_partition_tag, schema_ptr s, shared_sstable sst, mp_row_consumer&& consumer, - std::unique_ptr lh_index, std::unique_ptr rh_index) - : _sst(std::move(sst)) - , _consumer(std::move(consumer)) - , _read_enabled(lh_index->data_file_position() != rh_index->data_file_position()) - , _context(_sst->data_consume_single_partition(_consumer, - sstable::disk_read_range(lh_index->data_file_position(), rh_index->data_file_position()))) - , _lh_index(std::move(lh_index)) - , _rh_index(std::move(rh_index)) - , _schema(std::move(s)) - { } - - ~sstable_data_source() { - auto close = [] (std::unique_ptr& ptr) { - if (ptr) { - auto f = ptr->close(); - f.handle_exception([index = std::move(ptr)] (auto&&) { }); - } - }; - close(_lh_index); - close(_rh_index); - } - - index_reader& lh_index() { - if (!_lh_index) { - _lh_index = _sst->get_index_reader(_consumer.io_priority()); - } - return *_lh_index; - } - - static bool will_likely_slice(const query::partition_slice& slice) { - return (!slice.default_row_ranges().empty() && !slice.default_row_ranges()[0].is_full()) - || slice.get_specific_ranges(); - } -private: - future<> advance_to_next_partition(); - future read_from_index(); - future read_from_datafile(); -public: - // Assumes that we're currently positioned at partition boundary. - future read_partition(); - // Can be called from any position. - future read_next_partition(); - future<> fast_forward_to(const dht::partition_range&); -}; - -class sstable_streamed_mutation : public streamed_mutation::impl { - friend class mp_row_consumer; - lw_shared_ptr _ds; - tombstone _t; - position_in_partition::less_compare _cmp; - position_in_partition::equal_compare _eq; -public: - sstable_streamed_mutation(schema_ptr s, dht::decorated_key dk, tombstone t, lw_shared_ptr ds) - : streamed_mutation::impl(s, std::move(dk), t) - , _ds(std::move(ds)) - , _t(t) - , _cmp(*s) - , _eq(*s) - { - _ds->_consumer.set_streamed_mutation(this); - } - - sstable_streamed_mutation(sstable_streamed_mutation&&) = delete; - - virtual future<> fill_buffer() final override { - return do_until([this] { return !is_buffer_empty() || is_end_of_stream(); }, [this] { - _ds->_consumer.push_ready_fragments(); - if (is_buffer_full() || is_end_of_stream()) { - return make_ready_future<>(); - } - return _ds->_consumer.maybe_skip().then([this] { - return _ds->_context.read(); - }); - }); - } - - future<> fast_forward_to(position_range range) override { - _end_of_stream = false; - forward_buffer_to(range.start()); - return _ds->_consumer.fast_forward_to(std::move(range)); - } - - future<> advance_context(position_in_partition_view pos) { - if (pos.is_before_all_fragments(*_schema)) { - return make_ready_future<>(); - } - return [this] { - if (!_ds->_index_in_current_partition) { - _ds->_index_in_current_partition = true; - return _ds->lh_index().advance_to(_key); - } - return make_ready_future(); - }().then([this, pos] { - return _ds->lh_index().advance_to(pos).then([this] { - index_reader& idx = *_ds->_lh_index; - return _ds->_context.skip_to(idx.element_kind(), idx.data_file_position()); - }); - }); - } -}; - -row_consumer::proceed -mp_row_consumer::push_ready_fragments_with_ready_set() { - // We're merging two streams here, one is _range_tombstones - // and the other is the main fragment stream represented by - // _ready and _out_of_range (which means end of stream). - - while (!_sm->is_buffer_full()) { - auto mfo = _range_tombstones.get_next(*_ready); - if (mfo) { - _sm->push_mutation_fragment(std::move(*mfo)); - } else { - _sm->push_mutation_fragment(std::move(*_ready)); - _ready = {}; - return proceed(!_sm->is_buffer_full()); - } - } - return proceed::no; -} - -row_consumer::proceed -mp_row_consumer::push_ready_fragments_out_of_range() { - // Emit all range tombstones relevant to the current forwarding range first. - while (!_sm->is_buffer_full()) { - auto mfo = _range_tombstones.get_next(_fwd_end); - if (!mfo) { - _sm->_end_of_stream = true; - break; - } - _sm->push_mutation_fragment(std::move(*mfo)); - } - return proceed::no; -} - -row_consumer::proceed -mp_row_consumer::push_ready_fragments() { - if (_ready) { - return push_ready_fragments_with_ready_set(); - } - - if (_out_of_range) { - return push_ready_fragments_out_of_range(); - } - - return proceed::yes; -} - -future<> mp_row_consumer::fast_forward_to(position_range r) { - sstlog.trace("mp_row_consumer {}: fast_forward_to({})", this, r); - _out_of_range = _is_mutation_end; - _fwd_end = std::move(r).end(); - - _range_tombstones.forward_to(r.start()); - - _ck_ranges_walker->trim_front(std::move(r).start()); - if (_ck_ranges_walker->out_of_range()) { - _out_of_range = true; - _ready = {}; - sstlog.trace("mp_row_consumer {}: no more ranges", this); - return make_ready_future<>(); - } - - auto start = _ck_ranges_walker->lower_bound(); - - if (_ready && !_ready->relevant_for_range(*_schema, start)) { - _ready = {}; - } - - if (_in_progress) { - advance_to(*_in_progress); - if (!_skip_in_progress) { - sstlog.trace("mp_row_consumer {}: _in_progress in range", this); - return make_ready_future<>(); - } - } - - if (_out_of_range) { - sstlog.trace("mp_row_consumer {}: _out_of_range=true", this); - return make_ready_future<>(); - } - - position_in_partition::less_compare less(*_schema); - if (!less(start, _fwd_end)) { - _out_of_range = true; - sstlog.trace("mp_row_consumer {}: no overlap with restrictions", this); - return make_ready_future<>(); - } - - sstlog.trace("mp_row_consumer {}: advance_context({})", this, start); - _last_lower_bound_counter = _ck_ranges_walker->lower_bound_change_counter(); - return _sm->advance_context(start); -} - -future<> mp_row_consumer::maybe_skip() { - if (!needs_skip()) { - return make_ready_future<>(); - } - _last_lower_bound_counter = _ck_ranges_walker->lower_bound_change_counter(); - auto pos = _ck_ranges_walker->lower_bound(); - sstlog.trace("mp_row_consumer {}: advance_context({})", this, pos); - return _sm->advance_context(pos); -} - -future -sstables::sstable::read_row(schema_ptr schema, - const sstables::key& key, - const query::partition_slice& slice, - const io_priority_class& pc, - streamed_mutation::forwarding fwd) -{ - return do_with(dht::global_partitioner().decorate_key(*schema, key.to_partition_key(*schema)), [this, schema, &slice, &pc, fwd] (auto& dk) { - return this->read_row(schema, dk, slice, pc, fwd); - }); -} - -static inline void ensure_len(bytes_view v, size_t len) { - if (v.size() < len) { - throw malformed_sstable_exception(sprint("Expected {} bytes, but remaining is {}", len, v.size())); - } -} - -template -static inline T read_be(const signed char* p) { - return ::read_be(reinterpret_cast(p)); -} - -template -static inline T consume_be(bytes_view& p) { - ensure_len(p, sizeof(T)); - T i = read_be(p.data()); - p.remove_prefix(sizeof(T)); - return i; -} - -static inline bytes_view consume_bytes(bytes_view& p, size_t len) { - ensure_len(p, len); - auto ret = bytes_view(p.data(), len); - p.remove_prefix(len); - return ret; -} - -promoted_index promoted_index_view::parse(const schema& s) const { - bytes_view data = _bytes; - - sstables::deletion_time del_time; - del_time.local_deletion_time = consume_be(data); - del_time.marked_for_delete_at = consume_be(data); - - auto num_blocks = consume_be(data); - std::deque entries; - while (num_blocks--) { - uint16_t len = consume_be(data); - auto start_ck = composite_view(consume_bytes(data, len), s.is_compound()); - len = consume_be(data); - auto end_ck = composite_view(consume_bytes(data, len), s.is_compound()); - uint64_t offset = consume_be(data); - uint64_t width = consume_be(data); - entries.emplace_back(promoted_index::entry{start_ck, end_ck, offset, width}); - } - - return promoted_index{del_time, std::move(entries)}; -} - -sstables::deletion_time promoted_index_view::get_deletion_time() const { - bytes_view data = _bytes; - sstables::deletion_time del_time; - del_time.local_deletion_time = consume_be(data); - del_time.marked_for_delete_at = consume_be(data); - return del_time; -} - - -class mutation_reader::impl { -private: - lw_shared_ptr _ds; - std::function> ()> _get_data_source; -public: - impl(shared_sstable sst, schema_ptr schema, sstable::disk_read_range toread, uint64_t last_end, - const io_priority_class &pc, - streamed_mutation::forwarding fwd) - : _get_data_source([this, sst = std::move(sst), s = std::move(schema), toread, last_end, &pc, fwd] { - auto consumer = mp_row_consumer(s, query::full_slice, pc, fwd); - auto ds = make_lw_shared(std::move(s), std::move(sst), std::move(consumer), std::move(toread), last_end); - return make_ready_future>(std::move(ds)); - }) { } - impl(shared_sstable sst, schema_ptr schema, - const io_priority_class &pc, - streamed_mutation::forwarding fwd) - : _get_data_source([this, sst = std::move(sst), s = std::move(schema), &pc, fwd] { - auto consumer = mp_row_consumer(s, query::full_slice, pc, fwd); - auto ds = make_lw_shared(std::move(s), std::move(sst), std::move(consumer)); - return make_ready_future>(std::move(ds)); - }) { } - impl(shared_sstable sst, - schema_ptr schema, - const dht::partition_range& pr, - const query::partition_slice& slice, - const io_priority_class& pc, - streamed_mutation::forwarding fwd, - ::mutation_reader::forwarding fwd_mr) - : _get_data_source([this, pr, sst = std::move(sst), s = std::move(schema), &pc, &slice, fwd, fwd_mr] () mutable { - auto lh_index = sst->get_index_reader(pc); // lh = left hand - auto rh_index = sst->get_index_reader(pc); - auto f = seastar::when_all_succeed(lh_index->advance_to_start(pr), rh_index->advance_to_end(pr)); - return f.then([this, lh_index = std::move(lh_index), rh_index = std::move(rh_index), sst = std::move(sst), s = std::move(s), &pc, &slice, fwd, fwd_mr] () mutable { - sstable::disk_read_range drr{lh_index->data_file_position(), - rh_index->data_file_position()}; - auto consumer = mp_row_consumer(s, slice, pc, fwd); - auto ds = make_lw_shared(std::move(s), std::move(sst), std::move(consumer), drr, (fwd_mr ? sst->data_size() : drr.end), std::move(lh_index), std::move(rh_index)); - ds->_index_in_current_partition = true; - ds->_will_likely_slice = sstable_data_source::will_likely_slice(slice); - return ds; - }); - }) { } - - // Reference to _consumer is passed to data_consume_rows() in the constructor so we must not allow move/copy - impl(impl&&) = delete; - impl(const impl&) = delete; - - future read() { - if (_ds) { - return _ds->read_next_partition(); - } - return (_get_data_source)().then([this] (lw_shared_ptr ds) { - // We must get the sstable_data_source and backup it in case we enable read - // again in the future. - _ds = std::move(ds); - return _ds->read_partition(); - }); - } - - future<> fast_forward_to(const dht::partition_range& pr) { - if (_ds) { - return _ds->fast_forward_to(pr); - } - return (_get_data_source)().then([this, &pr] (lw_shared_ptr ds) { - // We must get the sstable_data_source and backup it in case we enable read - // again in the future. - _ds = std::move(ds); - return _ds->fast_forward_to(pr); - }); - } -}; - -future<> sstable_data_source::fast_forward_to(const dht::partition_range& pr) { - assert(_lh_index); - assert(_rh_index); - auto f1 = _lh_index->advance_to_start(pr); - auto f2 = _rh_index->advance_to_end(pr); - return seastar::when_all_succeed(std::move(f1), std::move(f2)).then([this] { - auto start = _lh_index->data_file_position(); - auto end = _rh_index->data_file_position(); - if (start != end) { - _read_enabled = true; - _index_in_current_partition = true; - return _context.fast_forward_to(start, end); - } - _index_in_current_partition = false; - _read_enabled = false; - return make_ready_future<>(); - }); -} - -future<> sstable_data_source::advance_to_next_partition() { - sstlog.trace("reader {}: advance_to_next_partition()", this); - auto& consumer = _consumer; - if (consumer.is_mutation_end()) { - sstlog.trace("reader {}: already at partition boundary", this); - _index_in_current_partition = false; - return make_ready_future<>(); - } - return (_index_in_current_partition - ? _lh_index->advance_to_next_partition() - : lh_index().advance_to(dht::ring_position_view::for_after_key(*_key))).then([this] { - _index_in_current_partition = true; - return _context.skip_to(_lh_index->element_kind(), _lh_index->data_file_position()); - }); -} - -future sstable_data_source::read_next_partition() { - sstlog.trace("reader {}: read next partition", this); - return advance_to_next_partition().then([this] { - return read_partition(); - }); -} - -future sstable_data_source::read_partition() { - sstlog.trace("reader {}: reading partition", this); - - if (!_consumer.is_mutation_end()) { - // FIXME: give more details from _context - throw malformed_sstable_exception("consumer not at partition boundary", _sst->get_filename()); - } - - if (!_read_enabled) { - return make_ready_future(); - } - - // It's better to obtain partition information from the index if we already have it. - // We can save on IO if the user will skip past the front of partition immediately. - // - // It is also better to pay the cost of reading the index if we know that we will - // need to use the index anyway soon. - // - if (_index_in_current_partition) { - if (_lh_index->eof()) { - sstlog.trace("reader {}: eof", this); - return make_ready_future(stdx::nullopt); - } - if (_lh_index->partition_data_ready()) { - return read_from_index(); - } - if (_will_likely_slice) { - return _lh_index->read_partition_data().then([this] { - return read_from_index(); - }); - } - } - - // FIXME: advance index to current partition if _will_likely_slice - return read_from_datafile(); -} - -future sstable_data_source::read_from_index() { - sstlog.trace("reader {}: read from index", this); - auto tomb = _lh_index->partition_tombstone(); - if (!tomb) { - sstlog.trace("reader {}: no tombstone", this); - return read_from_datafile(); - } - auto pk = _lh_index->partition_key().to_partition_key(*_schema); - _key = dht::global_partitioner().decorate_key(*_schema, std::move(pk)); - auto sm = make_streamed_mutation(_schema, *_key, tombstone(*tomb), shared_from_this()); - _consumer.setup_for_partition(_key->key()); - return make_ready_future(std::move(sm)); -} - -future sstable_data_source::read_from_datafile() { - sstlog.trace("reader {}: read from data file", this); - return _context.read().then([this] { - auto& consumer = _consumer; - auto mut = consumer.get_mutation(); - if (!mut) { - sstlog.trace("reader {}: eof", this); - return make_ready_future(); - } - _key = dht::global_partitioner().decorate_key(*_schema, std::move(mut->key)); - auto sm = make_streamed_mutation(_schema, *_key, mut->tomb, shared_from_this()); - return make_ready_future(std::move(sm)); - }); -} - -mutation_reader::~mutation_reader() = default; -mutation_reader::mutation_reader(mutation_reader&&) = default; -mutation_reader& mutation_reader::operator=(mutation_reader&&) = default; -mutation_reader::mutation_reader(std::unique_ptr p) - : _pimpl(std::move(p)) { } -future mutation_reader::read() { - return _pimpl->read(); -} -future<> mutation_reader::fast_forward_to(const dht::partition_range& pr) { - return _pimpl->fast_forward_to(pr); -} - -mutation_reader sstable::read_rows(schema_ptr schema, const io_priority_class& pc, streamed_mutation::forwarding fwd) { - return std::make_unique(shared_from_this(), schema, pc, fwd); -} - -static -future<> advance_to_upper_bound(index_reader& ix, const schema& s, const query::partition_slice& slice, dht::ring_position_view key) { - auto& ranges = slice.row_ranges(s, *key.key()); - if (ranges.empty()) { - return ix.advance_past(position_in_partition_view::for_static_row()); - } else { - return ix.advance_past(position_in_partition_view::for_range_end(ranges[ranges.size() - 1])); - } -} - -future -sstables::sstable::read_row(schema_ptr schema, - dht::ring_position_view key, - const query::partition_slice& slice, - const io_priority_class& pc, - streamed_mutation::forwarding fwd) -{ - auto lh_index = get_index_reader(pc); - auto f = lh_index->advance_and_check_if_present(key); - return f.then([this, &slice, &pc, fwd, lh_index = std::move(lh_index), s = std::move(schema), key] (bool present) mutable { - if (!present) { - _filter_tracker.add_false_positive(); - return make_ready_future(stdx::nullopt); - } - - _filter_tracker.add_true_positive(); - - auto rh_index = std::make_unique(*lh_index); - auto f = advance_to_upper_bound(*rh_index, *_schema, slice, key); - return f.then([this, &slice, &pc, fwd, lh_index = std::move(lh_index), rh_index = std::move(rh_index), s = std::move(s)] () mutable { - auto consumer = mp_row_consumer(s, slice, pc, fwd); - auto ds = make_lw_shared(sstable_data_source::single_partition_tag(), std::move(s), - shared_from_this(), std::move(consumer), std::move(lh_index), std::move(rh_index)); - ds->_will_likely_slice = sstable_data_source::will_likely_slice(slice); - ds->_index_in_current_partition = true; - return ds->read_partition().finally([ds]{}); - }); - }); -} - -mutation_reader -sstable::read_range_rows(schema_ptr schema, - const dht::partition_range& range, - const query::partition_slice& slice, - const io_priority_class& pc, - streamed_mutation::forwarding fwd, - ::mutation_reader::forwarding fwd_mr) { - return std::make_unique( - shared_from_this(), std::move(schema), range, slice, pc, fwd, fwd_mr); -} - -} diff --git a/scylla/sstables/remove.hh b/scylla/sstables/remove.hh deleted file mode 100644 index 9ed9aa7..0000000 --- a/scylla/sstables/remove.hh +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - * - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include -#include - -namespace sstables { - -future<> remove_by_toc_name(sstring sstable_toc_name, const io_error_handler& error_handler = sstable_write_error_handler); - -} - - diff --git a/scylla/sstables/row.cc b/scylla/sstables/row.cc deleted file mode 100644 index 9d8111f..0000000 --- a/scylla/sstables/row.cc +++ /dev/null @@ -1,480 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "sstables.hh" -#include "consumer.hh" - -namespace sstables { - -// data_consume_rows_context remembers the context that an ongoing -// data_consume_rows() future is in. -class data_consume_rows_context : public data_consumer::continuous_data_consumer { -private: - enum class state { - ROW_START, - ROW_KEY_BYTES, - DELETION_TIME, - DELETION_TIME_2, - DELETION_TIME_3, - ATOM_START, - ATOM_START_2, - ATOM_NAME_BYTES, - ATOM_MASK, - ATOM_MASK_2, - COUNTER_CELL, - COUNTER_CELL_2, - EXPIRING_CELL, - EXPIRING_CELL_2, - EXPIRING_CELL_3, - CELL, - CELL_2, - CELL_VALUE_BYTES, - CELL_VALUE_BYTES_2, - RANGE_TOMBSTONE, - RANGE_TOMBSTONE_2, - RANGE_TOMBSTONE_3, - RANGE_TOMBSTONE_4, - RANGE_TOMBSTONE_5, - STOP_THEN_ATOM_START, - } _state = state::ROW_START; - - row_consumer& _consumer; - - temporary_buffer _key; - temporary_buffer _val; - - // state for reading a cell - bool _deleted; - bool _counter; - uint32_t _ttl, _expiration; - - bool _shadowable; -public: - bool non_consuming() const { - return (((_state == state::DELETION_TIME_3) - || (_state == state::CELL_VALUE_BYTES_2) - || (_state == state::ATOM_START_2) - || (_state == state::ATOM_MASK_2) - || (_state == state::STOP_THEN_ATOM_START) - || (_state == state::COUNTER_CELL_2) - || (_state == state::EXPIRING_CELL_3)) && (_prestate == prestate::NONE)); - } - - // process() feeds the given data into the state machine. - // The consumer may request at any point (e.g., after reading a whole - // row) to stop the processing, in which case we trim the buffer to - // leave only the unprocessed part. The caller must handle calling - // process() again, and/or refilling the buffer, as needed. - row_consumer::proceed process_state(temporary_buffer& data) { -#if 0 - // Testing hack: call process() for tiny chunks separately, to verify - // that primitive types crossing input buffer are handled correctly. - constexpr size_t tiny_chunk = 1; // try various tiny sizes - if (data.size() > tiny_chunk) { - for (unsigned i = 0; i < data.size(); i += tiny_chunk) { - auto chunk_size = std::min(tiny_chunk, data.size() - i); - auto chunk = data.share(i, chunk_size); - if (process(chunk) == row_consumer::proceed::no) { - data.trim_front(i + chunk_size - chunk.size()); - return row_consumer::proceed::no; - } - } - data.trim(0); - return row_consumer::proceed::yes; - } -#endif - sstlog.trace("data_consume_row_context {}: state={}, size={}", this, static_cast(_state), data.size()); - switch (_state) { - case state::ROW_START: - // read 2-byte key length into _u16 - if (read_16(data) != read_status::ready) { - _state = state::ROW_KEY_BYTES; - break; - } - case state::ROW_KEY_BYTES: - // After previously reading 16-bit length, read key's bytes. - if (read_bytes(data, _u16, _key) != read_status::ready) { - _state = state::DELETION_TIME; - break; - } - case state::DELETION_TIME: - if (read_32(data) != read_status::ready) { - _state = state::DELETION_TIME_2; - break; - } - // fallthrough - case state::DELETION_TIME_2: - if (read_64(data) != read_status::ready) { - _state = state::DELETION_TIME_3; - break; - } - // fallthrough - case state::DELETION_TIME_3: { - deletion_time del; - del.local_deletion_time = _u32; - del.marked_for_delete_at = _u64; - auto ret = _consumer.consume_row_start(key_view(to_bytes_view(_key)), del); - // after calling the consume function, we can release the - // buffers we held for it. - _key.release(); - _state = state::ATOM_START; - if (ret == row_consumer::proceed::no) { - return row_consumer::proceed::no; - } - } - case state::ATOM_START: - if (read_16(data) == read_status::ready) { - if (_u16 == 0) { - // end of row marker - _state = state::ROW_START; - if (_consumer.consume_row_end() == - row_consumer::proceed::no) { - return row_consumer::proceed::no; - } - } else { - _state = state::ATOM_NAME_BYTES; - } - } else { - _state = state::ATOM_START_2; - } - break; - case state::ATOM_START_2: - if (_u16 == 0) { - // end of row marker - _state = state::ROW_START; - if (_consumer.consume_row_end() == - row_consumer::proceed::no) { - return row_consumer::proceed::no; - } - } else { - _state = state::ATOM_NAME_BYTES; - } - break; - case state::ATOM_NAME_BYTES: - if (read_bytes(data, _u16, _key) != read_status::ready) { - _state = state::ATOM_MASK; - break; - } - case state::ATOM_MASK: - if (read_8(data) != read_status::ready) { - _state = state::ATOM_MASK_2; - break; - } - // fallthrough - case state::ATOM_MASK_2: { - auto const mask = column_mask(_u8); - - if ((mask & (column_mask::range_tombstone | column_mask::shadowable)) != column_mask::none) { - _state = state::RANGE_TOMBSTONE; - _shadowable = (mask & column_mask::shadowable) != column_mask::none; - } else if ((mask & column_mask::counter) != column_mask::none) { - _deleted = false; - _counter = true; - _state = state::COUNTER_CELL; - } else if ((mask & column_mask::expiration) != column_mask::none) { - _deleted = false; - _counter = false; - _state = state::EXPIRING_CELL; - } else { - // FIXME: see ColumnSerializer.java:deserializeColumnBody - if ((mask & column_mask::counter_update) != column_mask::none) { - throw malformed_sstable_exception("FIXME COUNTER_UPDATE_MASK"); - } - _ttl = _expiration = 0; - _deleted = (mask & column_mask::deletion) != column_mask::none; - _counter = false; - _state = state::CELL; - } - break; - } - case state::COUNTER_CELL: - if (read_64(data) != read_status::ready) { - _state = state::COUNTER_CELL_2; - break; - } - // fallthrough - case state::COUNTER_CELL_2: - // _timestamp_of_last_deletion = _u64; - _state = state::CELL; - goto state_CELL; - case state::EXPIRING_CELL: - if (read_32(data) != read_status::ready) { - _state = state::EXPIRING_CELL_2; - break; - } - // fallthrough - case state::EXPIRING_CELL_2: - _ttl = _u32; - if (read_32(data) != read_status::ready) { - _state = state::EXPIRING_CELL_3; - break; - } - // fallthrough - case state::EXPIRING_CELL_3: - _expiration = _u32; - _state = state::CELL; - state_CELL: - case state::CELL: { - if (read_64(data) != read_status::ready) { - _state = state::CELL_2; - break; - } - } - case state::CELL_2: - if (read_32(data) != read_status::ready) { - _state = state::CELL_VALUE_BYTES; - break; - } - case state::CELL_VALUE_BYTES: - if (read_bytes(data, _u32, _val) == read_status::ready) { - // If the whole string is in our buffer, great, we don't - // need to copy, and can skip the CELL_VALUE_BYTES_2 state. - // - // finally pass it to the consumer: - row_consumer::proceed ret; - if (_deleted) { - if (_val.size() != 4) { - throw malformed_sstable_exception("deleted cell expects local_deletion_time value"); - } - deletion_time del; - del.local_deletion_time = consume_be(_val); - del.marked_for_delete_at = _u64; - ret = _consumer.consume_deleted_cell(to_bytes_view(_key), del); - } else if (_counter) { - ret = _consumer.consume_counter_cell(to_bytes_view(_key), - to_bytes_view(_val), _u64); - } else { - ret = _consumer.consume_cell(to_bytes_view(_key), - to_bytes_view(_val), _u64, _ttl, _expiration); - } - // after calling the consume function, we can release the - // buffers we held for it. - _key.release(); - _val.release(); - _state = state::ATOM_START; - if (ret == row_consumer::proceed::no) { - return row_consumer::proceed::no; - } - } else { - _state = state::CELL_VALUE_BYTES_2; - } - break; - case state::CELL_VALUE_BYTES_2: - { - row_consumer::proceed ret; - if (_deleted) { - if (_val.size() != 4) { - throw malformed_sstable_exception("deleted cell expects local_deletion_time value"); - } - deletion_time del; - del.local_deletion_time = consume_be(_val); - del.marked_for_delete_at = _u64; - ret = _consumer.consume_deleted_cell(to_bytes_view(_key), del); - } else if (_counter) { - ret = _consumer.consume_counter_cell(to_bytes_view(_key), - to_bytes_view(_val), _u64); - } else { - ret = _consumer.consume_cell(to_bytes_view(_key), - to_bytes_view(_val), _u64, _ttl, _expiration); - } - // after calling the consume function, we can release the - // buffers we held for it. - _key.release(); - _val.release(); - _state = state::ATOM_START; - if (ret == row_consumer::proceed::no) { - return row_consumer::proceed::no; - } - break; - } - case state::RANGE_TOMBSTONE: - if (read_16(data) != read_status::ready) { - _state = state::RANGE_TOMBSTONE_2; - break; - } - case state::RANGE_TOMBSTONE_2: - // read the end column into _val. - if (read_bytes(data, _u16, _val) != read_status::ready) { - _state = state::RANGE_TOMBSTONE_3; - break; - } - case state::RANGE_TOMBSTONE_3: - if (read_32(data) != read_status::ready) { - _state = state::RANGE_TOMBSTONE_4; - break; - } - case state::RANGE_TOMBSTONE_4: - if (read_64(data) != read_status::ready) { - _state = state::RANGE_TOMBSTONE_5; - break; - } - case state::RANGE_TOMBSTONE_5: - { - deletion_time del; - del.local_deletion_time = _u32; - del.marked_for_delete_at = _u64; - auto ret = _shadowable - ? _consumer.consume_shadowable_row_tombstone(to_bytes_view(_key), del) - : _consumer.consume_range_tombstone(to_bytes_view(_key), to_bytes_view(_val), del); - _key.release(); - _val.release(); - _state = state::ATOM_START; - if (ret == row_consumer::proceed::no) { - return row_consumer::proceed::no; - } - break; - } - case state::STOP_THEN_ATOM_START: - _state = state::ATOM_START; - return row_consumer::proceed::no; - default: - throw malformed_sstable_exception("unknown state"); - } - - return row_consumer::proceed::yes; - } - - data_consume_rows_context(row_consumer& consumer, - input_stream && input, uint64_t start, uint64_t maxlen) - : continuous_data_consumer(std::move(input), start, maxlen) - , _consumer(consumer) { - } - - void verify_end_state() { - // If reading a partial row (i.e., when we have a clustering row - // filter and using a promoted index), we may be in ATOM_START or ATOM_START_2 - // state instead of ROW_START. In that case we did not read the - // end-of-row marker and consume_row_end() was never called. - if (_state == state::ATOM_START || _state == state::ATOM_START_2) { - _consumer.consume_row_end(); - return; - } - if (_state != state::ROW_START || _prestate != prestate::NONE) { - throw malformed_sstable_exception("end of input, but not end of row"); - } - } - - void reset(indexable_element el) { - switch (el) { - case indexable_element::partition: - _state = state::ROW_START; - break; - case indexable_element::cell: - _state = state::ATOM_START; - break; - default: - assert(0); - } - _consumer.reset(el); - } -}; - -// data_consume_rows() and data_consume_rows_at_once() both can read just a -// single row or many rows. The difference is that data_consume_rows_at_once() -// is optimized to reading one or few rows (reading it all into memory), while -// data_consume_rows() uses a read buffer, so not all the rows need to fit -// memory in the same time (they are delivered to the consumer one by one). -class data_consume_context::impl { -private: - shared_sstable _sst; - std::unique_ptr _ctx; -public: - impl(shared_sstable sst, row_consumer& consumer, input_stream&& input, uint64_t start, uint64_t maxlen) - : _sst(std::move(sst)) - , _ctx(new data_consume_rows_context(consumer, std::move(input), start, maxlen)) - { } - ~impl() { - if (_ctx) { - auto f = _ctx->close(); - f.handle_exception([ctx = std::move(_ctx), sst = std::move(_sst)] (auto) { }); - } - } - future<> read() { - return _ctx->consume_input(*_ctx); - } - future<> fast_forward_to(uint64_t begin, uint64_t end) { - _ctx->reset(indexable_element::partition); - return _ctx->fast_forward_to(begin, end); - } - future<> skip_to(indexable_element el, uint64_t begin) { - sstlog.trace("data_consume_rows_context {}: skip_to({} -> {}, el={})", _ctx.get(), _ctx->position(), begin, static_cast(el)); - if (begin <= _ctx->position()) { - return make_ready_future<>(); - } - _ctx->reset(el); - return _ctx->skip_to(begin); - } -}; - -data_consume_context::~data_consume_context() = default; -data_consume_context::data_consume_context(data_consume_context&& o) noexcept - : _pimpl(std::move(o._pimpl)) -{ } -data_consume_context& data_consume_context::operator=(data_consume_context&& o) noexcept { - _pimpl = std::move(o._pimpl); - return *this; -} -data_consume_context::data_consume_context(std::unique_ptr p) : _pimpl(std::move(p)) { } -future<> data_consume_context::read() { - return _pimpl->read(); -} -future<> data_consume_context::fast_forward_to(uint64_t begin, uint64_t end) { - return _pimpl->fast_forward_to(begin, end); -} -future<> data_consume_context::skip_to(indexable_element el, uint64_t begin) { - return _pimpl->skip_to(el, begin); -} - -data_consume_context sstable::data_consume_rows( - row_consumer& consumer, sstable::disk_read_range toread, uint64_t last_end) { - // Although we were only asked to read until toread.end, we'll not limit - // the underlying file input stream to this end, but rather to last_end. - // This potentially enables read-ahead beyond end, until last_end, which - // can be beneficial if the user wants to fast_forward_to() on the - // returned context, and may make small skips. - return std::make_unique(shared_from_this(), - consumer, data_stream(toread.start, last_end - toread.start, - consumer.io_priority(), _partition_range_history), toread.start, toread.end - toread.start); -} - -data_consume_context sstable::data_consume_single_partition( - row_consumer& consumer, sstable::disk_read_range toread) { - return std::make_unique(shared_from_this(), - consumer, data_stream(toread.start, toread.end - toread.start, - consumer.io_priority(), _single_partition_history), toread.start, toread.end - toread.start); -} - - -data_consume_context sstable::data_consume_rows(row_consumer& consumer) { - return data_consume_rows(consumer, {0, data_size()}, data_size()); -} - -future<> sstable::data_consume_rows_at_once(row_consumer& consumer, - uint64_t start, uint64_t end) { - return data_read(start, end - start, consumer.io_priority()).then([&consumer] - (temporary_buffer buf) { - data_consume_rows_context ctx(consumer, input_stream(), 0, -1); - ctx.process(buf); - ctx.verify_end_state(); - }); -} - -} diff --git a/scylla/sstables/row.hh b/scylla/sstables/row.hh deleted file mode 100644 index b70ff38..0000000 --- a/scylla/sstables/row.hh +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - * - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "bytes.hh" -#include "key.hh" -#include "core/temporary_buffer.hh" -#include "consumer.hh" -#include "sstables/types.hh" - -// sstables::data_consume_row feeds the contents of a single row into a -// row_consumer object: -// -// * First, consume_row_start() is called, with some information about the -// whole row: The row's key, timestamp, etc. -// * Next, consume_cell() is called once for every column. -// * Finally, consume_row_end() is called. A consumer written for a single -// column will likely not want to do anything here. -// -// Important note: the row key, column name and column value, passed to the -// consume_* functions, are passed as a "bytes_view" object, which points to -// internal data held by the feeder. This internal data is only valid for the -// duration of the single consume function it was passed to. If the object -// wants to hold these strings longer, it must make a copy of the bytes_view's -// contents. [Note, in reality, because our implementation reads the whole -// row into one buffer, the byte_views remain valid until consume_row_end() -// is called.] -class row_consumer { -public: - using proceed = data_consumer::proceed; - - // Consume the row's key and deletion_time. The latter determines if the - // row is a tombstone, and if so, when it has been deleted. - // Note that the key is in serialized form, and should be deserialized - // (according to the schema) before use. - // As explained above, the key object is only valid during this call, and - // if the implementation wishes to save it, it must copy the *contents*. - virtual proceed consume_row_start(sstables::key_view key, sstables::deletion_time deltime) = 0; - - // Consume one cell (column name and value). Both are serialized, and need - // to be deserialized according to the schema. - // When a cell is set with an expiration time, "ttl" is the time to live - // (in seconds) originally set for this cell, and "expiration" is the - // absolute time (in seconds since the UNIX epoch) when this cell will - // expire. Typical cells, not set to expire, will get expiration = 0. - virtual proceed consume_cell(bytes_view col_name, bytes_view value, - int64_t timestamp, - int32_t ttl, int32_t expiration) = 0; - - // Consume one counter cell. Column name and value are serialized, and need - // to be deserialized according to the schema. - virtual proceed consume_counter_cell(bytes_view col_name, bytes_view value, - int64_t timestamp) = 0; - - // Consume a deleted cell (i.e., a cell tombstone). - virtual proceed consume_deleted_cell(bytes_view col_name, sstables::deletion_time deltime) = 0; - - // Consume one row tombstone. - virtual proceed consume_shadowable_row_tombstone(bytes_view col_name, sstables::deletion_time deltime) = 0; - - // Consume one range tombstone. - virtual proceed consume_range_tombstone( - bytes_view start_col, bytes_view end_col, - sstables::deletion_time deltime) = 0; - - // Called at the end of the row, after all cells. - // Returns a flag saying whether the sstable consumer should stop now, or - // proceed consuming more data. - virtual proceed consume_row_end() = 0; - - // Called when the reader is fast forwarded to given element. - virtual void reset(sstables::indexable_element) = 0; - - // Under which priority class to place I/O coming from this consumer - virtual const io_priority_class& io_priority() = 0; - - virtual ~row_consumer() { } -}; diff --git a/scylla/sstables/shared_index_lists.hh b/scylla/sstables/shared_index_lists.hh deleted file mode 100644 index 9566d5a..0000000 --- a/scylla/sstables/shared_index_lists.hh +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Copyright (C) 2017 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include -#include -#include -#include - -namespace sstables { - -using index_list = std::vector; - -// Associative cache of summary index -> index_list -// Entries stay around as long as there is any live external reference (list_ptr) to them. -// Supports asynchronous insertion, ensures that only one entry will be loaded. -class shared_index_lists { -public: - using key_type = uint64_t; - struct stats { - uint64_t hits = 0; // Number of times entry was found ready - uint64_t misses = 0; // Number of times entry was not found - uint64_t blocks = 0; // Number of times entry was not ready (>= misses) - }; -private: - class entry : public enable_lw_shared_from_this { - public: - key_type key; - index_list list; - shared_promise<> loaded; - shared_index_lists& parent; - - entry(shared_index_lists& parent, key_type key) - : key(key), parent(parent) - { } - ~entry() { - parent._lists.erase(key); - } - bool operator==(const entry& e) const { return key == e.key; } - bool operator!=(const entry& e) const { return key != e.key; } - }; - std::unordered_map _lists; - static thread_local stats _shard_stats; -public: - // Pointer to index_list - class list_ptr { - lw_shared_ptr _e; - public: - using element_type = index_list; - list_ptr() = default; - explicit list_ptr(lw_shared_ptr e) : _e(std::move(e)) {} - explicit operator bool() const { return static_cast(_e); } - index_list& operator*() { return _e->list; } - const index_list& operator*() const { return _e->list; } - index_list* operator->() { return &_e->list; } - const index_list* operator->() const { return &_e->list; } - - index_list release() { - auto res = _e.owned() ? index_list(std::move(_e->list)) : index_list(_e->list); - _e = {}; - return std::move(res); - } - }; - - shared_index_lists() = default; - shared_index_lists(shared_index_lists&&) = delete; - shared_index_lists(const shared_index_lists&) = delete; - - // Returns a future which resolves with a shared pointer to index_list for given key. - // Always returns a valid pointer if succeeds. The pointer is never invalidated externally. - // - // If entry is missing, the loader is invoked. If list is already loading, this invocation - // will wait for prior loading to complete and use its result when it's done. - // - // The loader object does not survive deferring, so the caller must deal with its liveness. - template - future get_or_load(key_type key, Loader&& loader) { - auto i = _lists.find(key); - lw_shared_ptr e; - if (i != _lists.end()) { - e = i->second->shared_from_this(); - } else { - ++_shard_stats.misses; - e = make_lw_shared(*this, key); - auto res = _lists.emplace(key, e.get()); - assert(res.second); - loader(key).then_wrapped([e](future&& f) mutable { - if (f.failed()) { - e->loaded.set_exception(f.get_exception()); - } else { - e->list = f.get0(); - e->loaded.set_value(); - } - }); - } - future<> f = e->loaded.get_shared_future(); - if (!f.available()) { - ++_shard_stats.blocks; - return f.then([e]() mutable { - return list_ptr(std::move(e)); - }); - } else { - ++_shard_stats.hits; - return make_ready_future(list_ptr(std::move(e))); - } - } - - static const stats& shard_stats() { return _shard_stats; } -}; - -} diff --git a/scylla/sstables/sstable_set.hh b/scylla/sstables/sstable_set.hh deleted file mode 100644 index 572283e..0000000 --- a/scylla/sstables/sstable_set.hh +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (C) 2016 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "sstables.hh" -#include "query-request.hh" // for partition_range; FIXME: move it out of there -#include -#include - -namespace sstables { - -class sstable_set_impl; -class incremental_selector_impl; - -class sstable_set { - std::unique_ptr _impl; - // used to support column_family::get_sstable(), which wants to return an sstable_list - // that has a reference somewhere - lw_shared_ptr _all; -public: - ~sstable_set(); - sstable_set(std::unique_ptr impl, lw_shared_ptr all); - sstable_set(const sstable_set&); - sstable_set(sstable_set&&) noexcept; - sstable_set& operator=(const sstable_set&); - sstable_set& operator=(sstable_set&&) noexcept; - std::vector select(const dht::partition_range& range) const; - lw_shared_ptr all() const { return _all; } - void insert(shared_sstable sst); - void erase(shared_sstable sst); - - // Used to incrementally select sstables from sstable set using tokens. - // sstable set must be alive and cannot be modified while incremental - // selector is used. - class incremental_selector { - std::unique_ptr _impl; - mutable stdx::optional _current_token_range; - mutable std::vector _current_sstables; - public: - ~incremental_selector(); - incremental_selector(std::unique_ptr impl); - incremental_selector(incremental_selector&&) noexcept; - const std::vector& select(const dht::token& t) const; - }; - incremental_selector make_incremental_selector() const; -}; - -} diff --git a/scylla/sstables/sstables.cc b/scylla/sstables/sstables.cc deleted file mode 100644 index dde6a08..0000000 --- a/scylla/sstables/sstables.cc +++ /dev/null @@ -1,2867 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "log.hh" -#include -#include -#include -#include "core/future.hh" -#include "core/future-util.hh" -#include "core/sstring.hh" -#include "core/fstream.hh" -#include "core/shared_ptr.hh" -#include "core/do_with.hh" -#include "core/thread.hh" -#include -#include -#include - -#include "types.hh" -#include "sstables.hh" -#include "compress.hh" -#include "unimplemented.hh" -#include "index_reader.hh" -#include "remove.hh" -#include "memtable.hh" -#include "range.hh" -#include "downsampling.hh" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "utils/phased_barrier.hh" -#include "range_tombstone_list.hh" -#include "counters.hh" -#include "binary_search.hh" - -#include "checked-file-impl.hh" -#include "service/storage_service.hh" - -thread_local disk_error_signal_type sstable_read_error; -thread_local disk_error_signal_type sstable_write_error; - -namespace sstables { - -logging::logger sstlog("sstable"); - -future new_sstable_component_file(const io_error_handler& error_handler, sstring name, open_flags flags) { - return open_checked_file_dma(error_handler, name, flags).handle_exception([name] (auto ep) { - sstlog.error("Could not create SSTable component {}. Found exception: {}", name, ep); - return make_exception_future(ep); - }); -} - -future new_sstable_component_file(const io_error_handler& error_handler, sstring name, open_flags flags, - file_open_options options) { - return open_checked_file_dma(error_handler, name, flags, options).handle_exception([name] (auto ep) { - sstlog.error("Could not create SSTable component {}. Found exception: {}", name, ep); - return make_exception_future(ep); - }); -} - -static utils::phased_barrier& background_jobs() { - static thread_local utils::phased_barrier gate; - return gate; -} - -future<> await_background_jobs() { - sstlog.debug("Waiting for background jobs"); - return background_jobs().advance_and_await().finally([] { - sstlog.debug("Waiting done"); - }); -} - -future<> await_background_jobs_on_all_shards() { - return smp::invoke_on_all([] { - return await_background_jobs(); - }); -} - -class random_access_reader { - std::unique_ptr> _in; - seastar::gate _close_gate; -protected: - virtual input_stream open_at(uint64_t pos) = 0; -public: - future> read_exactly(size_t n) { - return _in->read_exactly(n); - } - void seek(uint64_t pos) { - if (_in) { - seastar::with_gate(_close_gate, [in = std::move(_in)] () mutable { - auto fut = in->close(); - return fut.then([in = std::move(in)] {}); - }); - } - _in = std::make_unique>(open_at(pos)); - } - bool eof() { return _in->eof(); } - virtual future<> close() { - return _close_gate.close().then([this] { - return _in->close(); - }); - } - virtual ~random_access_reader() { } -}; - -class file_random_access_reader : public random_access_reader { - file _file; - uint64_t _file_size; - size_t _buffer_size; - unsigned _read_ahead; -public: - virtual input_stream open_at(uint64_t pos) override { - auto len = _file_size - pos; - file_input_stream_options options; - options.buffer_size = _buffer_size; - options.read_ahead = _read_ahead; - - return make_file_input_stream(_file, pos, len, std::move(options)); - } - explicit file_random_access_reader(file f, uint64_t file_size, size_t buffer_size = 8192, unsigned read_ahead = 4) - : _file(std::move(f)), _file_size(file_size), _buffer_size(buffer_size), _read_ahead(read_ahead) - { - seek(0); - } - virtual future<> close() override { - return random_access_reader::close().finally([this] { - return _file.close().handle_exception([save = _file] (auto ep) { - sstlog.warn("sstable close failed: {}", ep); - general_disk_error(); - }); - }); - } -}; - -std::unordered_map> sstable::_version_string = { - { sstable::version_types::ka , "ka" }, - { sstable::version_types::la , "la" } -}; - -std::unordered_map> sstable::_format_string = { - { sstable::format_types::big , "big" } -}; - -static const sstring TOC_SUFFIX = "TOC.txt"; -static const sstring TEMPORARY_TOC_SUFFIX = "TOC.txt.tmp"; - -// FIXME: this should be version-dependent -std::unordered_map> sstable::_component_map = { - { component_type::Index, "Index.db"}, - { component_type::CompressionInfo, "CompressionInfo.db" }, - { component_type::Data, "Data.db" }, - { component_type::TOC, TOC_SUFFIX }, - { component_type::Summary, "Summary.db" }, - { component_type::Digest, "Digest.sha1" }, - { component_type::CRC, "CRC.db" }, - { component_type::Filter, "Filter.db" }, - { component_type::Statistics, "Statistics.db" }, - { component_type::Scylla, "Scylla.db" }, - { component_type::TemporaryTOC, TEMPORARY_TOC_SUFFIX }, - { component_type::TemporaryStatistics, "Statistics.db.tmp" }, -}; - -// This assumes that the mappings are small enough, and called unfrequent -// enough. If that changes, it would be adviseable to create a full static -// reverse mapping, even if it is done at runtime. -template -static typename Map::key_type reverse_map(const typename Map::mapped_type& value, Map& map) { - for (auto& pair: map) { - if (pair.second == value) { - return pair.first; - } - } - throw std::out_of_range("unable to reverse map"); -} - -// This should be used every time we use read_exactly directly. -// -// read_exactly is a lot more convenient of an interface to use, because we'll -// be parsing known quantities. -// -// However, anything other than the size we have asked for, is certainly a bug, -// and we need to do something about it. -static void check_buf_size(temporary_buffer& buf, size_t expected) { - if (buf.size() < expected) { - throw bufsize_mismatch_exception(buf.size(), expected); - } -} - -template -static void check_truncate_and_assign(T& to, const U from) { - static_assert(std::is_integral::value && std::is_integral::value, "T and U must be integral"); - to = from; - if (to != from) { - throw std::overflow_error("assigning U to T caused an overflow"); - } -} - -// Base parser, parses an integer type -template -typename std::enable_if_t::value, void> -read_integer(temporary_buffer& buf, T& i) { - auto *nr = reinterpret_cast *>(buf.get()); - i = net::ntoh(*nr); -} - -template -typename std::enable_if_t::value, future<>> -parse(random_access_reader& in, T& i) { - return in.read_exactly(sizeof(T)).then([&i] (auto buf) { - check_buf_size(buf, sizeof(T)); - - read_integer(buf, i); - return make_ready_future<>(); - }); -} - -template -inline typename std::enable_if_t::value, void> -write(file_writer& out, T i) { - auto *nr = reinterpret_cast *>(&i); - i = net::hton(*nr); - auto p = reinterpret_cast(&i); - out.write(p, sizeof(T)).get(); -} - -template -typename std::enable_if_t::value, future<>> -parse(random_access_reader& in, T& i) { - return parse(in, reinterpret_cast::type&>(i)); -} - -template -inline typename std::enable_if_t::value, void> -write(file_writer& out, T i) { - write(out, static_cast::type>(i)); -} - -future<> parse(random_access_reader& in, bool& i) { - return parse(in, reinterpret_cast(i)); -} - -inline void write(file_writer& out, bool i) { - write(out, static_cast(i)); -} - -template -static inline To convert(From f) { - static_assert(sizeof(To) == sizeof(From), "Sizes must match"); - union { - To to; - From from; - } conv; - - conv.from = f; - return conv.to; -} - -future<> parse(random_access_reader& in, double& d) { - return in.read_exactly(sizeof(double)).then([&d] (auto buf) { - check_buf_size(buf, sizeof(double)); - - auto *nr = reinterpret_cast *>(buf.get()); - d = convert(net::ntoh(*nr)); - return make_ready_future<>(); - }); -} - -inline void write(file_writer& out, double d) { - auto *nr = reinterpret_cast *>(&d); - auto tmp = net::hton(*nr); - auto p = reinterpret_cast(&tmp); - out.write(p, sizeof(unsigned long)).get(); -} - -template -future<> parse(random_access_reader& in, T& len, bytes& s) { - return in.read_exactly(len).then([&s, len] (auto buf) { - check_buf_size(buf, len); - // Likely a different type of char. Most bufs are unsigned, whereas the bytes type is signed. - s = bytes(reinterpret_cast(buf.get()), len); - }); -} - -inline void write(file_writer& out, const bytes& s) { - out.write(s).get(); -} - -inline void write(file_writer& out, bytes_view s) { - out.write(reinterpret_cast(s.data()), s.size()).get(); -} - -inline void write(file_writer& out, bytes_ostream s) { - for (bytes_view fragment : s) { - write(out, fragment); - } -} - -// All composite parsers must come after this -template -future<> parse(random_access_reader& in, First& first, Rest&&... rest) { - return parse(in, first).then([&in, &rest...] { - return parse(in, std::forward(rest)...); - }); -} - -template -inline void write(file_writer& out, const First& first, Rest&&... rest) { - write(out, first); - write(out, std::forward(rest)...); -} - -// Intended to be used for a type that describes itself through describe_type(). -template -typename std::enable_if_t::value && !std::is_enum::value, future<>> -parse(random_access_reader& in, T& t) { - return t.describe_type([&in] (auto&&... what) -> future<> { - return parse(in, what...); - }); -} - -template -inline typename std::enable_if_t::value && !std::is_enum::value, void> -write(file_writer& out, const T& t) { - // describe_type() is not const correct, so cheat here: - const_cast(t).describe_type([&out] (auto&&... what) -> void { - write(out, std::forward(what)...); - }); -} - -// For all types that take a size, we provide a template that takes the type -// alone, and another, separate one, that takes a size parameter as well, of -// type Size. This is because although most of the time the size and the data -// are contiguous, it is not always the case. So we want to have the -// flexibility of parsing them separately. -template -future<> parse(random_access_reader& in, disk_string& s) { - auto len = std::make_unique(); - auto f = parse(in, *len); - return f.then([&in, &s, len = std::move(len)] { - return parse(in, *len, s.value); - }); -} - -template -inline void write(file_writer& out, const disk_string& s) { - Size len = 0; - check_truncate_and_assign(len, s.value.size()); - write(out, len); - write(out, s.value); -} - -template -inline void write(file_writer& out, const disk_string_view& s) { - Size len; - check_truncate_and_assign(len, s.value.size()); - write(out, len, s.value); -} - -// We cannot simply read the whole array at once, because we don't know its -// full size. We know the number of elements, but if we are talking about -// disk_strings, for instance, we have no idea how much of the stream each -// element will take. -// -// Sometimes we do know the size, like the case of integers. There, all we have -// to do is to convert each member because they are all stored big endian. -// We'll offer a specialization for that case below. -template -typename std::enable_if_t::value, future<>> -parse(random_access_reader& in, Size& len, std::deque& arr) { - - auto count = make_lw_shared(0); - auto eoarr = [count, len] { return *count == len; }; - - return do_until(eoarr, [count, &in, &arr] { - return parse(in, arr[(*count)++]); - }); -} - -template -typename std::enable_if_t::value, future<>> -parse(random_access_reader& in, Size& len, std::deque& arr) { - auto done = make_lw_shared(0); - return repeat([&in, &len, &arr, done] { - auto now = std::min(len - *done, 100000 / sizeof(Members)); - return in.read_exactly(now * sizeof(Members)).then([&arr, len, now, done] (auto buf) { - check_buf_size(buf, now * sizeof(Members)); - - auto *nr = reinterpret_cast *>(buf.get()); - for (size_t i = 0; i < now; ++i) { - arr[*done + i] = net::ntoh(nr[i]); - } - *done += now; - return make_ready_future(*done == len ? stop_iteration::yes : stop_iteration::no); - }); - }); -} - -// We resize the array here, before we pass it to the integer / non-integer -// specializations -template -future<> parse(random_access_reader& in, disk_array& arr) { - auto len = make_lw_shared(); - auto f = parse(in, *len); - return f.then([&in, &arr, len] { - arr.elements.resize(*len); - return parse(in, *len, arr.elements); - }).finally([len] {}); -} - -template -inline typename std::enable_if_t::value, void> -write(file_writer& out, const std::deque& arr) { - for (auto& a : arr) { - write(out, a); - } -} - -template -inline typename std::enable_if_t::value, void> -write(file_writer& out, const std::deque& arr) { - std::vector tmp; - size_t per_loop = 100000 / sizeof(Members); - tmp.resize(per_loop); - size_t idx = 0; - while (idx != arr.size()) { - auto now = std::min(arr.size() - idx, per_loop); - // copy arr into tmp converting each entry into big-endian representation. - auto nr = arr.begin() + idx; - for (size_t i = 0; i < now; i++) { - tmp[i] = net::hton(nr[i]); - } - auto p = reinterpret_cast(tmp.data()); - auto bytes = now * sizeof(Members); - out.write(p, bytes).get(); - idx += now; - } -} - -template -inline void write(file_writer& out, const disk_array& arr) { - Size len = 0; - check_truncate_and_assign(len, arr.elements.size()); - write(out, len); - write(out, arr.elements); -} - -template -future<> parse(random_access_reader& in, Size& len, std::unordered_map& map) { - return do_with(Size(), [&in, len, &map] (Size& count) { - auto eos = [len, &count] { return len == count++; }; - return do_until(eos, [len, &in, &map] { - struct kv { - Key key; - Value value; - }; - - return do_with(kv(), [&in, &map] (auto& el) { - return parse(in, el.key, el.value).then([&el, &map] { - map.emplace(el.key, el.value); - }); - }); - }); - }); -} - -template -future<> parse(random_access_reader& in, disk_hash& h) { - auto w = std::make_unique(); - auto f = parse(in, *w); - return f.then([&in, &h, w = std::move(w)] { - return parse(in, *w, h.map); - }); -} - -template -inline void write(file_writer& out, const std::unordered_map& map) { - for (auto& val: map) { - write(out, val.first, val.second); - }; -} - -template -inline void write(file_writer& out, const disk_hash& h) { - Size len = 0; - check_truncate_and_assign(len, h.map.size()); - write(out, len); - write(out, h.map); -} - -// Abstract parser/sizer/writer for a single tagged member of a tagged union -template -struct single_tagged_union_member_serdes { - using value_type = typename DiskSetOfTaggedUnion::value_type; - virtual ~single_tagged_union_member_serdes() {} - virtual future<> do_parse(random_access_reader& in, value_type& v) const = 0; - virtual uint32_t do_size(const value_type& v) const = 0; - virtual void do_write(file_writer& out, const value_type& v) const = 0; -}; - -// Concrete parser for a single member of a tagged union; parses type "Member" -template -struct single_tagged_union_member_serdes_for final : single_tagged_union_member_serdes { - using base = single_tagged_union_member_serdes; - using value_type = typename base::value_type; - virtual future<> do_parse(random_access_reader& in, value_type& v) const { - v = Member(); - return parse(in, boost::get(v).value); - } - virtual uint32_t do_size(const value_type& v) const override { - return serialized_size(boost::get(v).value); - } - virtual void do_write(file_writer& out, const value_type& v) const override { - write(out, boost::get(v).value); - } -}; - -template -struct disk_set_of_tagged_union::serdes { - using disk_set = disk_set_of_tagged_union; - // We can't use unique_ptr, because we initialize from an std::intializer_list, which is not move compatible. - using serdes_map_type = std::unordered_map>, typename disk_set::hash_type>; - using value_type = typename disk_set::value_type; - serdes_map_type map = { - {Members::tag(), make_shared>()}... - }; - future<> lookup_and_parse(random_access_reader& in, TagType tag, uint32_t& size, disk_set& s, value_type& value) const { - auto i = map.find(tag); - if (i == map.end()) { - return in.read_exactly(size).discard_result(); - } else { - return i->second->do_parse(in, value).then([tag, &s, &value] () mutable { - s.data.emplace(tag, std::move(value)); - }); - } - } - uint32_t lookup_and_size(TagType tag, const value_type& value) const { - return map.at(tag)->do_size(value); - } - void lookup_and_write(file_writer& out, TagType tag, const value_type& value) const { - return map.at(tag)->do_write(out, value); - } -}; - -template -typename disk_set_of_tagged_union::serdes disk_set_of_tagged_union::s_serdes; - -template -future<> -parse(random_access_reader& in, disk_set_of_tagged_union& s) { - using disk_set = disk_set_of_tagged_union; - using key_type = typename disk_set::key_type; - using value_type = typename disk_set::value_type; - return do_with(0u, 0u, 0u, value_type{}, [&] (key_type& nr_elements, key_type& new_key, unsigned& new_size, value_type& new_value) { - return parse(in, nr_elements).then([&] { - auto rng = boost::irange(0, nr_elements); // do_for_each doesn't like an rvalue range - return do_for_each(rng.begin(), rng.end(), [&] (key_type ignore) { - return parse(in, new_key).then([&] { - return parse(in, new_size).then([&] { - return disk_set::s_serdes.lookup_and_parse(in, TagType(new_key), new_size, s, new_value); - }); - }); - }); - }); - }); -} - -template -void write(file_writer& out, const disk_set_of_tagged_union& s) { - using disk_set = disk_set_of_tagged_union; - write(out, uint32_t(s.data.size())); - for (auto&& kv : s.data) { - auto&& tag = kv.first; - auto&& value = kv.second; - write(out, tag); - write(out, uint32_t(disk_set::s_serdes.lookup_and_size(tag, value))); - disk_set::s_serdes.lookup_and_write(out, tag, value); - } -} - -future<> parse(random_access_reader& in, summary& s) { - using pos_type = typename decltype(summary::positions)::value_type; - - return parse(in, s.header.min_index_interval, - s.header.size, - s.header.memory_size, - s.header.sampling_level, - s.header.size_at_full_sampling).then([&in, &s] { - return in.read_exactly(s.header.size * sizeof(pos_type)).then([&in, &s] (auto buf) { - auto len = s.header.size * sizeof(pos_type); - check_buf_size(buf, len); - - s.entries.resize(s.header.size); - - auto *nr = reinterpret_cast(buf.get()); - s.positions = std::deque(nr, nr + s.header.size); - - // Since the keys in the index are not sized, we need to calculate - // the start position of the index i+1 to determine the boundaries - // of index i. The "memory_size" field in the header determines the - // total memory used by the map, so if we push it to the vector, we - // can guarantee that no conditionals are used, and we can always - // query the position of the "next" index. - s.positions.push_back(s.header.memory_size); - }).then([&in, &s] { - in.seek(sizeof(summary::header) + s.header.memory_size); - return parse(in, s.first_key, s.last_key); - }).then([&in, &s] { - - in.seek(s.positions[0] + sizeof(summary::header)); - - assert(s.positions.size() == (s.entries.size() + 1)); - - auto idx = make_lw_shared(0); - return do_for_each(s.entries.begin(), s.entries.end(), [idx, &in, &s] (auto& entry) { - auto pos = s.positions[(*idx)++]; - auto next = s.positions[*idx]; - - auto entrysize = next - pos; - - return in.read_exactly(entrysize).then([&entry, entrysize] (auto buf) { - check_buf_size(buf, entrysize); - - auto keysize = entrysize - 8; - entry.key = bytes(reinterpret_cast(buf.get()), keysize); - buf.trim_front(keysize); - // FIXME: This is a le read. We should make this explicit - entry.position = *(reinterpret_cast *>(buf.get())); - entry.token = dht::global_partitioner().get_token(entry.get_key()); - - return make_ready_future<>(); - }); - }).then([&s] { - // Delete last element which isn't part of the on-disk format. - s.positions.pop_back(); - }); - }); - }); -} - -inline void write(file_writer& out, const summary_entry& entry) { - // FIXME: summary entry is supposedly written in memory order, but that - // would prevent portability of summary file between machines of different - // endianness. We can treat it as little endian to preserve portability. - write(out, entry.key); - auto p = reinterpret_cast(&entry.position); - out.write(p, sizeof(uint64_t)).get(); -} - -inline void write(file_writer& out, const summary& s) { - // NOTE: positions and entries must be stored in NATIVE BYTE ORDER, not BIG-ENDIAN. - write(out, s.header.min_index_interval, - s.header.size, - s.header.memory_size, - s.header.sampling_level, - s.header.size_at_full_sampling); - for (auto&& e : s.positions) { - out.write(reinterpret_cast(&e), sizeof(e)).get(); - } - write(out, s.entries); - write(out, s.first_key, s.last_key); -} - -future sstable::read_summary_entry(size_t i) { - // The last one is the boundary marker - if (i >= (_components->summary.entries.size())) { - throw std::out_of_range(sprint("Invalid Summary index: %ld", i)); - } - - return make_ready_future(_components->summary.entries[i]); -} - -future<> parse(random_access_reader& in, deletion_time& d) { - return parse(in, d.local_deletion_time, d.marked_for_delete_at); -} - -template -future<> parse(random_access_reader& in, std::unique_ptr& p) { - p.reset(new Child); - return parse(in, *static_cast(p.get())); -} - -template -inline void write(file_writer& out, const std::unique_ptr& p) { - write(out, *static_cast(p.get())); -} - -future<> parse(random_access_reader& in, statistics& s) { - return parse(in, s.hash).then([&in, &s] { - return do_for_each(s.hash.map.begin(), s.hash.map.end(), [&in, &s] (auto val) mutable { - in.seek(val.second); - - switch (val.first) { - case metadata_type::Validation: - return parse(in, s.contents[val.first]); - case metadata_type::Compaction: - return parse(in, s.contents[val.first]); - case metadata_type::Stats: - return parse(in, s.contents[val.first]); - default: - sstlog.warn("Invalid metadata type at Statistics file: {} ", int(val.first)); - return make_ready_future<>(); - } - }); - }); -} - -inline void write(file_writer& out, const statistics& s) { - write(out, s.hash); - auto types = boost::copy_range>(s.hash.map | boost::adaptors::map_keys); - // use same sort order as seal_statistics - boost::sort(types); - for (auto t : types) { - s.contents.at(t)->write(out); - } -} - -future<> parse(random_access_reader& in, utils::estimated_histogram& eh) { - auto len = std::make_unique(); - - auto f = parse(in, *len); - return f.then([&in, &eh, len = std::move(len)] { - uint32_t length = *len; - - if (length == 0) { - throw malformed_sstable_exception("Estimated histogram with zero size found. Can't continue!"); - } - eh.bucket_offsets.resize(length - 1); - eh.buckets.resize(length); - - auto type_size = sizeof(uint64_t) * 2; - return in.read_exactly(length * type_size).then([&eh, length, type_size] (auto buf) { - check_buf_size(buf, length * type_size); - - auto *nr = reinterpret_cast *>(buf.get()); - size_t j = 0; - for (size_t i = 0; i < length; ++i) { - eh.bucket_offsets[i == 0 ? 0 : i - 1] = net::ntoh(nr[j++]); - eh.buckets[i] = net::ntoh(nr[j++]); - } - return make_ready_future<>(); - }); - }); -} - -inline void write(file_writer& out, const utils::estimated_histogram& eh) { - uint32_t len = 0; - check_truncate_and_assign(len, eh.buckets.size()); - - write(out, len); - struct element { - uint64_t offsets; - uint64_t buckets; - }; - std::vector elements; - elements.resize(eh.buckets.size()); - - auto *offsets_nr = reinterpret_cast *>(eh.bucket_offsets.data()); - auto *buckets_nr = reinterpret_cast *>(eh.buckets.data()); - for (size_t i = 0; i < eh.buckets.size(); i++) { - elements[i].offsets = net::hton(offsets_nr[i == 0 ? 0 : i - 1]); - elements[i].buckets = net::hton(buckets_nr[i]); - } - - auto p = reinterpret_cast(elements.data()); - auto bytes = elements.size() * sizeof(element); - out.write(p, bytes).get(); -} - -// This is small enough, and well-defined. Easier to just read it all -// at once -future<> sstable::read_toc() { - if (_recognized_components.size()) { - return make_ready_future<>(); - } - - auto file_path = filename(sstable::component_type::TOC); - - sstlog.debug("Reading TOC file {} ", file_path); - - return open_checked_file_dma(_read_error_handler, file_path, open_flags::ro).then([this, file_path] (file f) { - auto bufptr = allocate_aligned_buffer(4096, 4096); - auto buf = bufptr.get(); - - auto fut = f.dma_read(0, buf, 4096); - return std::move(fut).then([this, f = std::move(f), bufptr = std::move(bufptr), file_path] (size_t size) mutable { - // This file is supposed to be very small. Theoretically we should check its size, - // but if we so much as read a whole page from it, there is definitely something fishy - // going on - and this simplifies the code. - if (size >= 4096) { - throw malformed_sstable_exception("SSTable too big: " + to_sstring(size) + " bytes", file_path); - } - - std::experimental::string_view buf(bufptr.get(), size); - std::vector comps; - - boost::split(comps , buf, boost::is_any_of("\n")); - - for (auto& c: comps) { - // accept trailing newlines - if (c == "") { - continue; - } - try { - _recognized_components.insert(reverse_map(c, _component_map)); - } catch (std::out_of_range& oor) { - _unrecognized_components.push_back(c); - sstlog.info("Unrecognized TOC component was found: {} in sstable {}", c, file_path); - } - } - if (!_recognized_components.size()) { - throw malformed_sstable_exception("Empty TOC", file_path); - } - return f.close().finally([f] {}); - }); - }).then_wrapped([file_path] (future<> f) { - try { - f.get(); - } catch (std::system_error& e) { - if (e.code() == std::error_code(ENOENT, std::system_category())) { - throw malformed_sstable_exception(file_path + ": file not found"); - } - throw; - } - }); - -} - -void sstable::generate_toc(compressor c, double filter_fp_chance) { - // Creating table of components. - _recognized_components.insert(component_type::TOC); - _recognized_components.insert(component_type::Statistics); - _recognized_components.insert(component_type::Digest); - _recognized_components.insert(component_type::Index); - _recognized_components.insert(component_type::Summary); - _recognized_components.insert(component_type::Data); - if (filter_fp_chance != 1.0) { - _recognized_components.insert(component_type::Filter); - } - if (c == compressor::none) { - _recognized_components.insert(component_type::CRC); - } else { - _recognized_components.insert(component_type::CompressionInfo); - } - _recognized_components.insert(component_type::Scylla); -} - -void sstable::write_toc(const io_priority_class& pc) { - auto file_path = filename(sstable::component_type::TemporaryTOC); - - sstlog.debug("Writing TOC file {} ", file_path); - - // Writing TOC content to temporary file. - // If creation of temporary TOC failed, it implies that that boot failed to - // delete a sstable with temporary for this column family, or there is a - // sstable being created in parallel with the same generation. - file f = new_sstable_component_file(_write_error_handler, file_path, open_flags::wo | open_flags::create | open_flags::exclusive).get0(); - - bool toc_exists = file_exists(filename(sstable::component_type::TOC)).get0(); - if (toc_exists) { - // TOC will exist at this point if write_components() was called with - // the generation of a sstable that exists. - f.close().get(); - remove_file(file_path).get(); - throw std::runtime_error(sprint("SSTable write failed due to existence of TOC file for generation %ld of %s.%s", _generation, _schema->ks_name(), _schema->cf_name())); - } - - file_output_stream_options options; - options.buffer_size = 4096; - options.io_priority_class = pc; - auto w = file_writer(std::move(f), std::move(options)); - - for (auto&& key : _recognized_components) { - // new line character is appended to the end of each component name. - auto value = _component_map[key] + "\n"; - bytes b = bytes(reinterpret_cast(value.c_str()), value.size()); - write(w, b); - } - w.flush().get(); - w.close().get(); - - // Flushing parent directory to guarantee that temporary TOC file reached - // the disk. - file dir_f = open_checked_directory(_write_error_handler, _dir).get0(); - sstable_write_io_check([&] { - dir_f.flush().get(); - dir_f.close().get(); - }); -} - -future<> sstable::seal_sstable() { - // SSTable sealing is about renaming temporary TOC file after guaranteeing - // that each component reached the disk safely. - return open_checked_directory(_write_error_handler, _dir).then([this] (file dir_f) { - // Guarantee that every component of this sstable reached the disk. - return sstable_write_io_check([&] { return dir_f.flush(); }).then([this] { - // Rename TOC because it's no longer temporary. - return sstable_write_io_check([&] { - return engine().rename_file(filename(sstable::component_type::TemporaryTOC), filename(sstable::component_type::TOC)); - }); - }).then([this, dir_f] () mutable { - // Guarantee that the changes above reached the disk. - return sstable_write_io_check([&] { return dir_f.flush(); }); - }).then([this, dir_f] () mutable { - return sstable_write_io_check([&] { return dir_f.close(); }); - }).then([this, dir_f] { - // If this point was reached, sstable should be safe in disk. - sstlog.debug("SSTable with generation {} of {}.{} was sealed successfully.", _generation, _schema->ks_name(), _schema->cf_name()); - }); - }); -} - -void write_crc(io_error_handler& error_handler, const sstring file_path, const checksum& c) { - sstlog.debug("Writing CRC file {} ", file_path); - - auto oflags = open_flags::wo | open_flags::create | open_flags::exclusive; - file f = new_sstable_component_file(error_handler, file_path, oflags).get0(); - - file_output_stream_options options; - options.buffer_size = 4096; - auto w = file_writer(std::move(f), std::move(options)); - write(w, c); - w.close().get(); -} - -// Digest file stores the full checksum of data file converted into a string. -void write_digest(io_error_handler& error_handler, const sstring file_path, uint32_t full_checksum) { - sstlog.debug("Writing Digest file {} ", file_path); - - auto oflags = open_flags::wo | open_flags::create | open_flags::exclusive; - auto f = new_sstable_component_file(error_handler, file_path, oflags).get0(); - - file_output_stream_options options; - options.buffer_size = 4096; - auto w = file_writer(std::move(f), std::move(options)); - - auto digest = to_sstring(full_checksum); - write(w, digest); - w.close().get(); -} - -thread_local std::array, downsampling::BASE_SAMPLING_LEVEL> downsampling::_sample_pattern_cache; -thread_local std::array, downsampling::BASE_SAMPLING_LEVEL> downsampling::_original_index_cache; - -future sstable::read_indexes(uint64_t summary_idx, const io_priority_class& pc) { - return do_with(get_index_reader(pc), [summary_idx] (auto& ir_ptr) { - return ir_ptr->get_index_entries(summary_idx).finally([&ir_ptr] { - return ir_ptr->close(); - }); - }); -} - -std::unique_ptr sstable::get_index_reader(const io_priority_class& pc) { - return std::make_unique(shared_from_this(), pc); -} - -template -future<> sstable::read_simple(T& component, const io_priority_class& pc) { - - auto file_path = filename(Type); - sstlog.debug(("Reading " + _component_map[Type] + " file {} ").c_str(), file_path); - return open_file_dma(file_path, open_flags::ro).then([this, &component] (file fi) { - auto fut = fi.size(); - return fut.then([this, &component, fi = std::move(fi)] (uint64_t size) { - auto f = make_checked_file(_read_error_handler, fi); - auto r = make_lw_shared(std::move(f), size, sstable_buffer_size); - auto fut = parse(*r, component); - return fut.finally([r] { - return r->close(); - }).then([r] {}); - }); - }).then_wrapped([this, file_path] (future<> f) { - try { - f.get(); - } catch (std::system_error& e) { - if (e.code() == std::error_code(ENOENT, std::system_category())) { - throw malformed_sstable_exception(file_path + ": file not found"); - } - throw; - } - }); -} - -template -void sstable::write_simple(const T& component, const io_priority_class& pc) { - auto file_path = filename(Type); - sstlog.debug(("Writing " + _component_map[Type] + " file {} ").c_str(), file_path); - file f = new_sstable_component_file(_write_error_handler, file_path, open_flags::wo | open_flags::create | open_flags::exclusive).get0(); - - file_output_stream_options options; - options.buffer_size = sstable_buffer_size; - options.io_priority_class = pc; - auto w = file_writer(std::move(f), std::move(options)); - write(w, component); - w.flush().get(); - w.close().get(); -} - -template future<> sstable::read_simple(sstables::filter& f, const io_priority_class& pc); -template void sstable::write_simple(const sstables::filter& f, const io_priority_class& pc); - -future<> sstable::read_compression(const io_priority_class& pc) { - // FIXME: If there is no compression, we should expect a CRC file to be present. - if (!has_component(sstable::component_type::CompressionInfo)) { - return make_ready_future<>(); - } - - return read_simple(_components->compression, pc); -} - -void sstable::write_compression(const io_priority_class& pc) { - if (!has_component(sstable::component_type::CompressionInfo)) { - return; - } - - write_simple(_components->compression, pc); -} - -void sstable::validate_min_max_metadata() { - auto entry = _components->statistics.contents.find(metadata_type::Stats); - if (entry == _components->statistics.contents.end()) { - throw std::runtime_error("Stats metadata not available"); - } - auto& p = entry->second; - if (!p) { - throw std::runtime_error("Statistics is malformed"); - } - - stats_metadata& s = *static_cast(p.get()); - auto is_composite_valid = [] (const bytes& b) { - auto v = composite_view(b); - try { - size_t s = 0; - for (auto& c : v.components()) { - s += c.first.size() + sizeof(composite::size_type) + sizeof(composite::eoc_type); - } - return s == b.size(); - } catch (marshal_exception&) { - return false; - } - }; - auto clear_incorrect_min_max_column_names = [&s] { - s.min_column_names.elements.clear(); - s.max_column_names.elements.clear(); - }; - auto& min_column_names = s.min_column_names.elements; - auto& max_column_names = s.max_column_names.elements; - - if (min_column_names.empty() && max_column_names.empty()) { - return; - } - - // The min/max metadata is wrong if: - // 1) it's not empty and schema defines no clustering key. - // 2) their size differ. - // 3) column name is stored instead of clustering value. - // 4) clustering component is stored as composite. - if ((!_schema->clustering_key_size() && (min_column_names.size() || max_column_names.size())) || - (min_column_names.size() != max_column_names.size())) { - clear_incorrect_min_max_column_names(); - return; - } - - for (auto i = 0U; i < min_column_names.size(); i++) { - if (_schema->get_column_definition(min_column_names[i].value) || _schema->get_column_definition(max_column_names[i].value)) { - clear_incorrect_min_max_column_names(); - break; - } - - if (_schema->is_compound() && _schema->clustering_key_size() > 1 && _schema->is_dense() && - (is_composite_valid(min_column_names[i].value) || is_composite_valid(max_column_names[i].value))) { - clear_incorrect_min_max_column_names(); - break; - } - } -} - -void sstable::set_clustering_components_ranges() { - if (!_schema->clustering_key_size()) { - return; - } - auto& min_column_names = get_stats_metadata().min_column_names.elements; - auto& max_column_names = get_stats_metadata().max_column_names.elements; - - auto s = std::min(min_column_names.size(), max_column_names.size()); - _clustering_components_ranges.reserve(s); - for (auto i = 0U; i < s; i++) { - auto r = nonwrapping_range({{ min_column_names[i].value, true }}, {{ max_column_names[i].value, true }}); - _clustering_components_ranges.push_back(std::move(r)); - } -} - -const std::vector>& sstable::clustering_components_ranges() const { - return _clustering_components_ranges; -} - -future<> sstable::read_statistics(const io_priority_class& pc) { - return read_simple(_components->statistics, pc); -} - -void sstable::write_statistics(const io_priority_class& pc) { - write_simple(_components->statistics, pc); -} - -void sstable::rewrite_statistics(const io_priority_class& pc) { - auto file_path = filename(component_type::TemporaryStatistics); - sstlog.debug("Rewriting statistics component of sstable {}", get_filename()); - file f = new_sstable_component_file(_write_error_handler, file_path, open_flags::wo | open_flags::create | open_flags::truncate).get0(); - - file_output_stream_options options; - options.buffer_size = sstable_buffer_size; - options.io_priority_class = pc; - auto w = file_writer(std::move(f), std::move(options)); - write(w, _components->statistics); - w.flush().get(); - w.close().get(); - // rename() guarantees atomicity when renaming a file into place. - sstable_write_io_check(rename_file, file_path, filename(component_type::Statistics)).get(); -} - -future<> sstable::read_summary(const io_priority_class& pc) { - if (_components->summary) { - return make_ready_future<>(); - } - - return read_toc().then([this, &pc] { - // We'll try to keep the main code path exception free, but if an exception does happen - // we can try to regenerate the Summary. - if (has_component(sstable::component_type::Summary)) { - return read_simple(_components->summary, pc).handle_exception([this, &pc] (auto ep) { - sstlog.warn("Couldn't read summary file {}: {}. Recreating it.", this->filename(component_type::Summary), ep); - return this->generate_summary(pc); - }); - } else { - return generate_summary(pc); - } - }); -} - -future<> sstable::open_data() { - return when_all(open_checked_file_dma(_read_error_handler, filename(component_type::Index), open_flags::ro), - open_checked_file_dma(_read_error_handler, filename(component_type::Data), open_flags::ro)) - .then([this] (auto files) { - _index_file = std::get(std::get<0>(files).get()); - _data_file = std::get(std::get<1>(files).get()); - return this->update_info_for_opened_data(); - }); -} - -future<> sstable::update_info_for_opened_data() { - return _data_file.stat().then([this] (struct stat st) { - if (this->has_component(sstable::component_type::CompressionInfo)) { - _components->compression.update(st.st_size); - } - _data_file_size = st.st_size; - _data_file_write_time = db_clock::from_time_t(st.st_mtime); - }).then([this] { - return _index_file.size().then([this] (auto size) { - _index_file_size = size; - }); - }).then([this] { - if (this->has_component(sstable::component_type::Filter)) { - return io_check([&] { - return engine().file_size(this->filename(sstable::component_type::Filter)); - }).then([this] (auto size) { - _filter_file_size = size; - }); - } - return make_ready_future<>(); - }).then([this] { - this->set_clustering_components_ranges(); - this->set_first_and_last_keys(); - - // Get disk usage for this sstable (includes all components). - _bytes_on_disk = 0; - return do_for_each(_recognized_components, [this] (component_type c) { - return this->sstable_write_io_check([&] { - return engine().file_size(this->filename(c)); - }).then([this] (uint64_t bytes) { - _bytes_on_disk += bytes; - }); - }); - }); -} - -future<> sstable::create_data() { - auto oflags = open_flags::wo | open_flags::create | open_flags::exclusive; - file_open_options opt; - opt.extent_allocation_size_hint = 32 << 20; - opt.sloppy_size = true; - return when_all(new_sstable_component_file(_write_error_handler, filename(component_type::Index), oflags, opt), - new_sstable_component_file(_write_error_handler, filename(component_type::Data), oflags, opt)).then([this] (auto files) { - // FIXME: If both files could not be created, the first get below will - // throw an exception, and second get() will not be attempted, and - // we'll get a warning about the second future being destructed - // without its exception being examined. - _index_file = std::get(std::get<0>(files).get()); - _data_file = std::get(std::get<1>(files).get()); - }); -} - -// This interface is only used during tests, snapshot loading and early initialization. -// No need to set tunable priorities for it. -future<> sstable::load(const io_priority_class& pc) { - return read_toc().then([this, &pc] { - return seastar::when_all_succeed( - read_statistics(pc), - read_compression(pc), - read_scylla_metadata(pc), - read_filter(pc), - read_summary(pc)).then([this] { - validate_min_max_metadata(); - set_clustering_components_ranges(); - return open_data(); - }); - }); -} - -future<> sstable::load(sstables::foreign_sstable_open_info info) { - return read_toc().then([this, info = std::move(info)] () mutable { - _components = std::move(info.components); - _data_file = make_checked_file(_read_error_handler, info.data.to_file()); - _index_file = make_checked_file(_read_error_handler, info.index.to_file()); - validate_min_max_metadata(); - return update_info_for_opened_data(); - }); -} - -future sstable::load_shared_components(const schema_ptr& s, sstring dir, int generation, version_types v, format_types f, - const io_priority_class& pc) { - auto sst = make_lw_shared(s, dir, generation, v, f); - return sst->load(pc).then([sst] () mutable { - auto shards = sst->get_shards_for_this_sstable(); - auto info = sstable_open_info{make_lw_shared(std::move(*sst->_components)), - std::move(shards), std::move(sst->_data_file), std::move(sst->_index_file)}; - return make_ready_future(std::move(info)); - }); -} - -future sstable::get_open_info() & { - return _components.copy().then([this] (auto c) mutable { - return foreign_sstable_open_info{std::move(c), this->get_shards_for_this_sstable(), _data_file.dup(), _index_file.dup(), - _generation, _version, _format}; - }); -} - -static composite::eoc bound_kind_to_start_marker(bound_kind start_kind) { - return start_kind == bound_kind::excl_start - ? composite::eoc::end - : composite::eoc::start; -} - -static composite::eoc bound_kind_to_end_marker(bound_kind end_kind) { - return end_kind == bound_kind::excl_end - ? composite::eoc::start - : composite::eoc::end; -} - -static void output_promoted_index_entry(bytes_ostream& promoted_index, - const bytes& first_col, - const bytes& last_col, - uint64_t offset, uint64_t width) { - char s[2]; - write_be(s, uint16_t(first_col.size())); - promoted_index.write(s, 2); - promoted_index.write(first_col); - write_be(s, uint16_t(last_col.size())); - promoted_index.write(s, 2); - promoted_index.write(last_col); - char q[8]; - write_be(q, uint64_t(offset)); - promoted_index.write(q, 8); - write_be(q, uint64_t(width)); - promoted_index.write(q, 8); -} - -// FIXME: use this in write_column_name() instead of repeating the code -static bytes serialize_colname(const composite& clustering_key, - const std::vector& column_names, composite::eoc marker) { - auto c = composite::from_exploded(column_names, marker); - auto ck_bview = bytes_view(clustering_key); - // The marker is not a component, so if the last component is empty (IOW, - // only serializes to the marker), then we just replace the key's last byte - // with the marker. If the component however it is not empty, then the - // marker should be in the end of it, and we just join them together as we - // do for any normal component - if (c.size() == 1) { - ck_bview.remove_suffix(1); - } - size_t sz = ck_bview.size() + c.size(); - if (sz > std::numeric_limits::max()) { - throw std::runtime_error(sprint("Column name too large (%d > %d)", sz, std::numeric_limits::max())); - } - bytes colname(bytes::initialized_later(), sz); - std::copy(ck_bview.begin(), ck_bview.end(), colname.begin()); - std::copy(c.get_bytes().begin(), c.get_bytes().end(), colname.begin() + ck_bview.size()); - return colname; -} - -// Call maybe_flush_pi_block() before writing the given sstable atom to the -// output. This may start a new promoted-index block depending on how much -// data we've already written since the start of the current block. Starting -// a new block involves both outputting the range of the old block to the -// index file, and outputting again the currently-open range tombstones to -// the data file. -// TODO: currently, maybe_flush_pi_block serializes the column name on every -// call, saving it in _pi_write.block_last_colname which we need for closing -// each block, as well as for closing the last block. We could instead save -// just the unprocessed arguments, and serialize them only when needed at the -// end of the block. For this we would need this function to take rvalue -// references (so data is moved in), and need not to use vector of byte_view -// (which might be gone later). -void sstable::maybe_flush_pi_block(file_writer& out, - const composite& clustering_key, - const std::vector& column_names, - composite::eoc marker) { - bytes colname = serialize_colname(clustering_key, column_names, marker); - if (_pi_write.block_first_colname.empty()) { - // This is the first column in the partition, or first column since we - // closed a promoted-index block. Remember its name and position - - // we'll need to write it to the promoted index. - _pi_write.block_start_offset = out.offset(); - _pi_write.block_next_start_offset = out.offset() + _pi_write.desired_block_size; - _pi_write.block_first_colname = colname; - _pi_write.block_last_colname = std::move(colname); - } else if (out.offset() >= _pi_write.block_next_start_offset) { - // If we wrote enough bytes to the partition since we output a sample - // to the promoted index, output one now and start a new one. - output_promoted_index_entry(_pi_write.data, - _pi_write.block_first_colname, - _pi_write.block_last_colname, - _pi_write.block_start_offset - _c_stats.start_offset, - out.offset() - _pi_write.block_start_offset); - _pi_write.numblocks++; - _pi_write.block_start_offset = out.offset(); - // Because the new block can be read without the previous blocks, we - // need to repeat the range tombstones which are still open. - // Note that block_start_offset is before outputting those (so the new - // block includes them), but we set block_next_start_offset after - so - // even if we wrote a lot of open tombstones, we still get a full - // block size of new data. - if (!clustering_key.empty()) { - auto& rts = _pi_write.tombstone_accumulator->range_tombstones_for_row( - clustering_key_prefix::from_range(clustering_key.values())); - for (const auto& rt : rts) { - auto start = composite::from_clustering_element(*_pi_write.schemap, rt.start); - auto end = composite::from_clustering_element(*_pi_write.schemap, rt.end); - write_range_tombstone(out, - start, bound_kind_to_start_marker(rt.start_kind), - end, bound_kind_to_end_marker(rt.end_kind), - {}, rt.tomb); - } - } - _pi_write.block_next_start_offset = out.offset() + _pi_write.desired_block_size; - _pi_write.block_first_colname = colname; - _pi_write.block_last_colname = std::move(colname); - } else { - // Keep track of the last column in the partition - we'll need it to close - // the last block in the promoted index, unfortunately. - _pi_write.block_last_colname = std::move(colname); - } -} - -void sstable::write_column_name(file_writer& out, const composite& clustering_key, const std::vector& column_names, composite::eoc marker) { - // was defined in the schema, for example. - auto c = composite::from_exploded(column_names, marker); - auto ck_bview = bytes_view(clustering_key); - - // The marker is not a component, so if the last component is empty (IOW, - // only serializes to the marker), then we just replace the key's last byte - // with the marker. If the component however it is not empty, then the - // marker should be in the end of it, and we just join them together as we - // do for any normal component - if (c.size() == 1) { - ck_bview.remove_suffix(1); - } - size_t sz = ck_bview.size() + c.size(); - if (sz > std::numeric_limits::max()) { - throw std::runtime_error(sprint("Column name too large (%d > %d)", sz, std::numeric_limits::max())); - } - uint16_t sz16 = sz; - write(out, sz16, ck_bview, c); -} - -void sstable::write_column_name(file_writer& out, bytes_view column_names) { - size_t sz = column_names.size(); - if (sz > std::numeric_limits::max()) { - throw std::runtime_error(sprint("Column name too large (%d > %d)", sz, std::numeric_limits::max())); - } - uint16_t sz16 = sz; - write(out, sz16, column_names); -} - - -static inline void update_cell_stats(column_stats& c_stats, uint64_t timestamp) { - c_stats.update_min_timestamp(timestamp); - c_stats.update_max_timestamp(timestamp); - c_stats.column_count++; -} - -// Intended to write all cell components that follow column name. -void sstable::write_cell(file_writer& out, atomic_cell_view cell, const column_definition& cdef) { - uint64_t timestamp = cell.timestamp(); - - update_cell_stats(_c_stats, timestamp); - - if (cell.is_dead(_now)) { - // tombstone cell - - column_mask mask = column_mask::deletion; - uint32_t deletion_time_size = sizeof(uint32_t); - uint32_t deletion_time = cell.deletion_time().time_since_epoch().count(); - - _c_stats.update_max_local_deletion_time(deletion_time); - _c_stats.tombstone_histogram.update(deletion_time); - - write(out, mask, timestamp, deletion_time_size, deletion_time); - } else if (cdef.is_counter()) { - // counter cell - assert(!cell.is_counter_update()); - - column_mask mask = column_mask::counter; - write(out, mask, int64_t(0), timestamp); - - counter_cell_view ccv(cell); - auto shard_count = ccv.shard_count(); - - static constexpr auto header_entry_size = sizeof(int16_t); - static constexpr auto counter_shard_size = 32u; // counter_id: 16 + clock: 8 + value: 8 - auto total_size = sizeof(int16_t) + shard_count * (header_entry_size + counter_shard_size); - - write(out, int32_t(total_size), int16_t(shard_count)); - for (auto i = 0u; i < shard_count; i++) { - write(out, std::numeric_limits::min() + i); - } - for (auto&& s : ccv.shards()) { - auto uuid = s.id().to_uuid(); - write(out, int64_t(uuid.get_most_significant_bits()), - int64_t(uuid.get_least_significant_bits()), - int64_t(s.logical_clock()), int64_t(s.value())); - } - - _c_stats.update_max_local_deletion_time(std::numeric_limits::max()); - } else if (cell.is_live_and_has_ttl()) { - // expiring cell - - column_mask mask = column_mask::expiration; - uint32_t ttl = cell.ttl().count(); - uint32_t expiration = cell.expiry().time_since_epoch().count(); - disk_string_view cell_value { cell.value() }; - - _c_stats.update_max_local_deletion_time(expiration); - - write(out, mask, ttl, expiration, timestamp, cell_value); - } else { - // regular cell - - column_mask mask = column_mask::none; - disk_string_view cell_value { cell.value() }; - - _c_stats.update_max_local_deletion_time(std::numeric_limits::max()); - - write(out, mask, timestamp, cell_value); - } -} - -void sstable::write_row_marker(file_writer& out, const row_marker& marker, const composite& clustering_key) { - if (marker.is_missing()) { - return; - } - - // Write row mark cell to the beginning of clustered row. - write_column_name(out, clustering_key, { bytes_view() }); - uint64_t timestamp = marker.timestamp(); - uint32_t value_length = 0; - - update_cell_stats(_c_stats, timestamp); - - if (marker.is_dead(_now)) { - column_mask mask = column_mask::deletion; - uint32_t deletion_time_size = sizeof(uint32_t); - uint32_t deletion_time = marker.deletion_time().time_since_epoch().count(); - - _c_stats.tombstone_histogram.update(deletion_time); - - write(out, mask, timestamp, deletion_time_size, deletion_time); - } else if (marker.is_expiring()) { - column_mask mask = column_mask::expiration; - uint32_t ttl = marker.ttl().count(); - uint32_t expiration = marker.expiry().time_since_epoch().count(); - write(out, mask, ttl, expiration, timestamp, value_length); - } else { - column_mask mask = column_mask::none; - write(out, mask, timestamp, value_length); - } -} - -void sstable::write_deletion_time(file_writer& out, const tombstone t) { - uint64_t timestamp = t.timestamp; - uint32_t deletion_time = t.deletion_time.time_since_epoch().count(); - - update_cell_stats(_c_stats, timestamp); - _c_stats.update_max_local_deletion_time(deletion_time); - _c_stats.tombstone_histogram.update(deletion_time); - - write(out, deletion_time, timestamp); -} - -void sstable::write_row_tombstone(file_writer& out, const composite& key, const row_tombstone t) { - if (!t) { - return; - } - - auto write_tombstone = [&] (tombstone t, column_mask mask) { - write_column_name(out, key, {}, composite::eoc::start); - write(out, mask); - write_column_name(out, key, {}, composite::eoc::end); - write_deletion_time(out, t); - }; - - write_tombstone(t.regular(), column_mask::range_tombstone); - if (t.is_shadowable()) { - write_tombstone(t.shadowable().tomb(), column_mask::shadowable); - } -} - -void sstable::write_range_tombstone(file_writer& out, - const composite& start, - composite::eoc start_marker, - const composite& end, - composite::eoc end_marker, - std::vector suffix, - const tombstone t) { - if (!t) { - return; - } - - write_column_name(out, start, suffix, start_marker); - column_mask mask = column_mask::range_tombstone; - write(out, mask); - write_column_name(out, end, suffix, end_marker); - write_deletion_time(out, t); -} - -void sstable::write_collection(file_writer& out, const composite& clustering_key, const column_definition& cdef, collection_mutation_view collection) { - - auto t = static_pointer_cast(cdef.type); - auto mview = t->deserialize_mutation_form(collection); - const bytes& column_name = cdef.name(); - write_range_tombstone(out, clustering_key, clustering_key, { bytes_view(column_name) }, mview.tomb); - for (auto& cp: mview.cells) { - maybe_flush_pi_block(out, clustering_key, { column_name, cp.first }); - write_column_name(out, clustering_key, { column_name, cp.first }); - write_cell(out, cp.second, cdef); - } -} - -// This function is about writing a clustered_row to data file according to SSTables format. -// clustered_row contains a set of cells sharing the same clustering key. -void sstable::write_clustered_row(file_writer& out, const schema& schema, const clustering_row& clustered_row) { - auto clustering_key = composite::from_clustering_element(schema, clustered_row.key()); - - if (schema.is_compound() && !schema.is_dense()) { - maybe_flush_pi_block(out, clustering_key, { bytes_view() }); - write_row_marker(out, clustered_row.marker(), clustering_key); - } - // Before writing cells, range tombstone must be written if the row has any (deletable_row::t). - if (clustered_row.tomb()) { - maybe_flush_pi_block(out, clustering_key, {}); - write_row_tombstone(out, clustering_key, clustered_row.tomb()); - // Because we currently may break a partition to promoted-index blocks - // in the middle of a clustered row, we also need to track the current - // row's tombstone - not just range tombstones - which may effect the - // beginning of a new block. - // TODO: consider starting a new block only between rows, so the - // following code can be dropped: - _pi_write.tombstone_accumulator->apply(range_tombstone( - clustered_row.key(), bound_kind::incl_start, - clustered_row.key(), bound_kind::incl_end, clustered_row.tomb().tomb())); - } - - if (schema.clustering_key_size()) { - column_name_helper::min_max_components(schema, _collector.min_column_names(), _collector.max_column_names(), - clustered_row.key().components()); - } - - // Write all cells of a partition's row. - clustered_row.cells().for_each_cell([&] (column_id id, const atomic_cell_or_collection& c) { - auto&& column_definition = schema.regular_column_at(id); - // non atomic cell isn't supported yet. atomic cell maps to a single trift cell. - // non atomic cell maps to multiple trift cell, e.g. collection. - if (!column_definition.is_atomic()) { - write_collection(out, clustering_key, column_definition, c.as_collection_mutation()); - return; - } - assert(column_definition.is_regular()); - atomic_cell_view cell = c.as_atomic_cell(); - const bytes& column_name = column_definition.name(); - - if (schema.is_compound()) { - if (schema.is_dense()) { - maybe_flush_pi_block(out, composite(), { bytes_view(clustering_key) }); - write_column_name(out, bytes_view(clustering_key)); - } else { - maybe_flush_pi_block(out, clustering_key, { bytes_view(column_name) }); - write_column_name(out, clustering_key, { bytes_view(column_name) }); - } - } else { - if (schema.is_dense()) { - maybe_flush_pi_block(out, composite(), { bytes_view(clustered_row.key().get_component(schema, 0)) }); - write_column_name(out, bytes_view(clustered_row.key().get_component(schema, 0))); - } else { - maybe_flush_pi_block(out, composite(), { bytes_view(column_name) }); - write_column_name(out, bytes_view(column_name)); - } - } - write_cell(out, cell, column_definition); - }); -} - -void sstable::write_static_row(file_writer& out, const schema& schema, const row& static_row) { - static_row.for_each_cell([&] (column_id id, const atomic_cell_or_collection& c) { - auto&& column_definition = schema.static_column_at(id); - if (!column_definition.is_atomic()) { - auto sp = composite::static_prefix(schema); - write_collection(out, sp, column_definition, c.as_collection_mutation()); - return; - } - assert(column_definition.is_static()); - const auto& column_name = column_definition.name(); - if (schema.is_compound()) { - auto sp = composite::static_prefix(schema); - maybe_flush_pi_block(out, sp, { bytes_view(column_name) }); - write_column_name(out, sp, { bytes_view(column_name) }); - } else { - assert(!schema.is_dense()); - maybe_flush_pi_block(out, composite(), { bytes_view(column_name) }); - write_column_name(out, bytes_view(column_name)); - } - atomic_cell_view cell = c.as_atomic_cell(); - write_cell(out, cell, column_definition); - }); -} - -static void write_index_header(file_writer& out, disk_string_view& key, uint64_t pos) { - write(out, key, pos); -} - -static void write_index_promoted(file_writer& out, bytes_ostream& promoted_index, - deletion_time deltime, uint32_t numblocks) { - uint32_t promoted_index_size = promoted_index.size(); - if (promoted_index_size) { - promoted_index_size += 16 /* deltime + numblocks */; - write(out, promoted_index_size, deltime, numblocks, promoted_index); - } else { - write(out, promoted_index_size); - } -} - -static void prepare_summary(summary& s, uint64_t expected_partition_count, uint32_t min_index_interval) { - assert(expected_partition_count >= 1); - - s.header.min_index_interval = min_index_interval; - s.header.sampling_level = downsampling::BASE_SAMPLING_LEVEL; - uint64_t max_expected_entries = - (expected_partition_count / min_index_interval) + - !!(expected_partition_count % min_index_interval); - // FIXME: handle case where max_expected_entries is greater than max value stored by uint32_t. - if (max_expected_entries > std::numeric_limits::max()) { - throw malformed_sstable_exception("Current sampling level (" + to_sstring(downsampling::BASE_SAMPLING_LEVEL) + ") not enough to generate summary."); - } - - s.keys_written = 0; - s.header.memory_size = 0; -} - -static void seal_summary(summary& s, - std::experimental::optional&& first_key, - std::experimental::optional&& last_key) { - s.header.size = s.entries.size(); - s.header.size_at_full_sampling = s.header.size; - - s.header.memory_size = s.header.size * sizeof(uint32_t); - for (auto& e: s.entries) { - s.positions.push_back(s.header.memory_size); - s.header.memory_size += e.key.size() + sizeof(e.position); - } - assert(first_key); // assume non-empty sstable - s.first_key.value = first_key->get_bytes(); - - if (last_key) { - s.last_key.value = last_key->get_bytes(); - } else { - // An empty last_mutation indicates we had just one partition - s.last_key.value = s.first_key.value; - } -} - -static void prepare_compression(compression& c, const schema& schema) { - const auto& cp = schema.get_compressor_params(); - c.set_compressor(cp.get_compressor()); - c.chunk_len = cp.chunk_length(); - c.data_len = 0; - // FIXME: crc_check_chance can be configured by the user. - // probability to verify the checksum of a compressed chunk we read. - // defaults to 1.0. - c.options.elements.push_back({"crc_check_chance", "1.0"}); - c.init_full_checksum(); -} - -static void maybe_add_summary_entry(summary& s, const dht::token& token, bytes_view key, uint64_t offset) { - // Maybe add summary entry into in-memory representation of summary file. - if ((s.keys_written++ % s.header.min_index_interval) == 0) { - s.entries.push_back({ token, bytes(key.data(), key.size()), offset }); - } -} - -static -void -populate_statistics_offsets(statistics& s) { - // copy into a sorted vector to guarantee consistent order - auto types = boost::copy_range>(s.contents | boost::adaptors::map_keys); - boost::sort(types); - - // populate the hash with garbage so we can calculate its size - for (auto t : types) { - s.hash.map[t] = -1; - } - - auto offset = serialized_size(s.hash); - for (auto t : types) { - s.hash.map[t] = offset; - offset += s.contents[t]->serialized_size(); - } -} - -static -sharding_metadata -create_sharding_metadata(schema_ptr schema, const dht::decorated_key& first_key, const dht::decorated_key& last_key, shard_id shard) { - auto prange = dht::partition_range::make(dht::ring_position(first_key), dht::ring_position(last_key)); - auto sm = sharding_metadata(); - for (auto&& range : dht::split_range_to_single_shard(*schema, prange, shard)) { - if (true) { // keep indentation - // we know left/right are not infinite - auto&& left = range.start()->value(); - auto&& right = range.end()->value(); - auto&& left_token = left.token(); - auto left_exclusive = !left.has_key() && left.bound() == dht::ring_position::token_bound::end; - auto&& right_token = right.token(); - auto right_exclusive = !right.has_key() && right.bound() == dht::ring_position::token_bound::start; - sm.token_ranges.elements.push_back(disk_token_range{ - {left_exclusive, to_bytes(bytes_view(left_token._data))}, - {right_exclusive, to_bytes(bytes_view(right_token._data))}}); - } - } - return sm; -} - - -// In the beginning of the statistics file, there is a disk_hash used to -// map each metadata type to its correspondent position in the file. -static void seal_statistics(statistics& s, metadata_collector& collector, - const sstring partitioner, double bloom_filter_fp_chance, schema_ptr schema, - const dht::decorated_key& first_key, const dht::decorated_key& last_key) { - validation_metadata validation; - compaction_metadata compaction; - stats_metadata stats; - - validation.partitioner.value = to_bytes(partitioner); - validation.filter_chance = bloom_filter_fp_chance; - s.contents[metadata_type::Validation] = std::make_unique(std::move(validation)); - - collector.construct_compaction(compaction); - s.contents[metadata_type::Compaction] = std::make_unique(std::move(compaction)); - - collector.construct_stats(stats); - s.contents[metadata_type::Stats] = std::make_unique(std::move(stats)); - - populate_statistics_offsets(s); -} - -// Returns offset into data component. -size_t components_writer::get_offset() { - if (_sst.has_component(sstable::component_type::CompressionInfo)) { - // Variable returned by compressed_file_length() is constantly updated by compressed output stream. - return _sst._components->compression.compressed_file_length(); - } else { - return _out.offset(); - } -} - -file_writer components_writer::index_file_writer(sstable& sst, const io_priority_class& pc) { - file_output_stream_options options; - options.buffer_size = sst.sstable_buffer_size; - options.io_priority_class = pc; - options.write_behind = 10; - return file_writer(std::move(sst._index_file), std::move(options)); -} - -// Get the currently loaded configuration, or the default configuration in -// case none has been loaded (this happens, for example, in unit tests). -static const db::config& get_config() { - if (service::get_storage_service().local_is_initialized() && - service::get_local_storage_service().db().local_is_initialized()) { - return service::get_local_storage_service().db().local().get_config(); - } else { - static db::config default_config; - return default_config; - } -} - -components_writer::components_writer(sstable& sst, const schema& s, file_writer& out, - uint64_t estimated_partitions, - const sstable_writer_config& cfg, - const io_priority_class& pc) - : _sst(sst) - , _schema(s) - , _out(out) - , _index(index_file_writer(sst, pc)) - , _index_needs_close(true) - , _max_sstable_size(cfg.max_sstable_size) - , _tombstone_written(false) -{ - _sst._components->filter = utils::i_filter::get_filter(estimated_partitions, _schema.bloom_filter_fp_chance()); - _sst._pi_write.desired_block_size = cfg.promoted_index_block_size.value_or(get_config().column_index_size_in_kb() * 1024); - - prepare_summary(_sst._components->summary, estimated_partitions, _schema.min_index_interval()); - - // FIXME: we may need to set repaired_at stats at this point. -} - -void components_writer::consume_new_partition(const dht::decorated_key& dk) { - // Set current index of data to later compute row size. - _sst._c_stats.start_offset = _out.offset(); - - _partition_key = key::from_partition_key(_schema, dk.key()); - - maybe_add_summary_entry(_sst._components->summary, dk.token(), bytes_view(*_partition_key), _index.offset()); - _sst._components->filter->add(bytes_view(*_partition_key)); - _sst._collector.add_key(bytes_view(*_partition_key)); - - auto p_key = disk_string_view(); - p_key.value = bytes_view(*_partition_key); - - // Write index file entry from partition key into index file. - // Write an index entry minus the "promoted index" (sample of columns) - // part. We can only write that after processing the entire partition - // and collecting the sample of columns. - write_index_header(_index, p_key, _out.offset()); - _sst._pi_write.data = {}; - _sst._pi_write.numblocks = 0; - _sst._pi_write.deltime.local_deletion_time = std::numeric_limits::max(); - _sst._pi_write.deltime.marked_for_delete_at = std::numeric_limits::min(); - _sst._pi_write.block_start_offset = _out.offset(); - _sst._pi_write.tombstone_accumulator = range_tombstone_accumulator(_schema, false); - _sst._pi_write.schemap = &_schema; // sadly we need this - - // Write partition key into data file. - write(_out, p_key); - - _tombstone_written = false; -} - -void components_writer::consume(tombstone t) { - deletion_time d; - - if (t) { - d.local_deletion_time = t.deletion_time.time_since_epoch().count(); - d.marked_for_delete_at = t.timestamp; - - _sst._c_stats.tombstone_histogram.update(d.local_deletion_time); - _sst._c_stats.update_max_local_deletion_time(d.local_deletion_time); - _sst._c_stats.update_min_timestamp(d.marked_for_delete_at); - _sst._c_stats.update_max_timestamp(d.marked_for_delete_at); - } else { - // Default values for live, undeleted rows. - d.local_deletion_time = std::numeric_limits::max(); - d.marked_for_delete_at = std::numeric_limits::min(); - } - write(_out, d); - _tombstone_written = true; - // TODO: need to verify we don't do this twice? - _sst._pi_write.deltime = d; -} - -stop_iteration components_writer::consume(static_row&& sr) { - ensure_tombstone_is_written(); - _sst.write_static_row(_out, _schema, sr.cells()); - return stop_iteration::no; -} - -stop_iteration components_writer::consume(clustering_row&& cr) { - ensure_tombstone_is_written(); - _sst.write_clustered_row(_out, _schema, cr); - return stop_iteration::no; -} - -stop_iteration components_writer::consume(range_tombstone&& rt) { - ensure_tombstone_is_written(); - // Remember the range tombstone so when we need to open a new promoted - // index block, we can figure out which ranges are still open and need - // to be repeated in the data file. Note that apply() also drops ranges - // already closed by rt.start, so the accumulator doesn't grow boundless. - _sst._pi_write.tombstone_accumulator->apply(rt); - auto start = composite::from_clustering_element(_schema, std::move(rt.start)); - auto start_marker = bound_kind_to_start_marker(rt.start_kind); - auto end = composite::from_clustering_element(_schema, std::move(rt.end)); - auto end_marker = bound_kind_to_end_marker(rt.end_kind); - _sst.maybe_flush_pi_block(_out, start, {}, start_marker); - _sst.write_range_tombstone(_out, std::move(start), start_marker, std::move(end), end_marker, {}, rt.tomb); - return stop_iteration::no; -} - -stop_iteration components_writer::consume_end_of_partition() { - // If there is an incomplete block in the promoted index, write it too. - // However, if the _promoted_index is still empty, don't add a single - // chunk - better not output a promoted index at all in this case. - if (!_sst._pi_write.data.empty() && !_sst._pi_write.block_first_colname.empty()) { - output_promoted_index_entry(_sst._pi_write.data, - _sst._pi_write.block_first_colname, - _sst._pi_write.block_last_colname, - _sst._pi_write.block_start_offset - _sst._c_stats.start_offset, - _out.offset() - _sst._pi_write.block_start_offset); - _sst._pi_write.numblocks++; - } - write_index_promoted(_index, _sst._pi_write.data, _sst._pi_write.deltime, - _sst._pi_write.numblocks); - _sst._pi_write.data = {}; - _sst._pi_write.block_first_colname = {}; - - ensure_tombstone_is_written(); - int16_t end_of_row = 0; - write(_out, end_of_row); - - // compute size of the current row. - _sst._c_stats.row_size = _out.offset() - _sst._c_stats.start_offset; - // update is about merging column_stats with the data being stored by collector. - _sst._collector.update(_schema, std::move(_sst._c_stats)); - _sst._c_stats.reset(); - - if (!_first_key) { - _first_key = *_partition_key; - } - _last_key = std::move(*_partition_key); - - return get_offset() < _max_sstable_size ? stop_iteration::no : stop_iteration::yes; -} - -void components_writer::consume_end_of_stream() { - seal_summary(_sst._components->summary, std::move(_first_key), std::move(_last_key)); // what if there is only one partition? what if it is empty? - - _index_needs_close = false; - _index.close().get(); - - if (_sst.has_component(sstable::component_type::CompressionInfo)) { - _sst._collector.add_compression_ratio(_sst._components->compression.compressed_file_length(), _sst._components->compression.uncompressed_file_length()); - } - - _sst.set_first_and_last_keys(); - seal_statistics(_sst._components->statistics, _sst._collector, dht::global_partitioner().name(), _schema.bloom_filter_fp_chance(), - _sst._schema, _sst.get_first_decorated_key(), _sst.get_last_decorated_key()); -} - -components_writer::~components_writer() { - if (_index_needs_close) { - try { - _index.close().get(); - } catch (...) { - sstlog.error("components_writer failed to close file: {}", std::current_exception()); - } - } -} - -future<> -sstable::read_scylla_metadata(const io_priority_class& pc) { - if (_components->scylla_metadata) { - return make_ready_future<>(); - } - return read_toc().then([this, &pc] { - _components->scylla_metadata.emplace(); // engaged optional means we won't try to re-read this again - if (!has_component(component_type::Scylla)) { - return make_ready_future<>(); - } - return read_simple(*_components->scylla_metadata, pc); - }); -} - -void -sstable::write_scylla_metadata(const io_priority_class& pc, shard_id shard) { - auto&& first_key = get_first_decorated_key(); - auto&& last_key = get_last_decorated_key(); - auto sm = create_sharding_metadata(_schema, first_key, last_key, shard); - _components->scylla_metadata.emplace(); - _components->scylla_metadata->data.set(std::move(sm)); - - write_simple(*_components->scylla_metadata, pc); -} - -void sstable_writer::prepare_file_writer() -{ - file_output_stream_options options; - options.io_priority_class = _pc; - options.buffer_size = _sst.sstable_buffer_size; - options.write_behind = 10; - - if (!_compression_enabled) { - _writer = std::make_unique(std::move(_sst._data_file), std::move(options), true); - } else { - prepare_compression(_sst._components->compression, _schema); - _writer = std::make_unique(make_compressed_file_output_stream(std::move(_sst._data_file), std::move(options), &_sst._components->compression)); - } -} - -void sstable_writer::finish_file_writer() -{ - auto writer = std::move(_writer); - writer->close().get(); - - if (!_compression_enabled) { - auto chksum_wr = static_cast(writer.get()); - write_digest(_sst._write_error_handler, _sst.filename(sstable::component_type::Digest), chksum_wr->full_checksum()); - write_crc(_sst._write_error_handler, _sst.filename(sstable::component_type::CRC), chksum_wr->finalize_checksum()); - } else { - write_digest(_sst._write_error_handler, _sst.filename(sstable::component_type::Digest), _sst._components->compression.full_checksum()); - } -} - -sstable_writer::~sstable_writer() { - if (_writer) { - try { - _writer->close().get(); - } catch (...) { - sstlog.error("sstable_writer failed to close file: {}", std::current_exception()); - } - } -} - -sstable_writer::sstable_writer(sstable& sst, const schema& s, uint64_t estimated_partitions, - const sstable_writer_config& cfg, const io_priority_class& pc, shard_id shard) - : _sst(sst) - , _schema(s) - , _pc(pc) - , _backup(cfg.backup) - , _leave_unsealed(cfg.leave_unsealed) - , _shard(shard) -{ - _sst.generate_toc(_schema.get_compressor_params().get_compressor(), _schema.bloom_filter_fp_chance()); - _sst.write_toc(_pc); - _sst.create_data().get(); - _compression_enabled = !_sst.has_component(sstable::component_type::CRC); - prepare_file_writer(); - _components_writer.emplace(_sst, _schema, *_writer, estimated_partitions, cfg, _pc); -} - -void sstable_writer::consume_end_of_stream() -{ - _components_writer->consume_end_of_stream(); - _components_writer = stdx::nullopt; - finish_file_writer(); - _sst.write_summary(_pc); - _sst.write_filter(_pc); - _sst.write_statistics(_pc); - _sst.write_compression(_pc); - _sst.write_scylla_metadata(_pc, _shard); - - if (!_leave_unsealed) { - _sst.seal_sstable(_backup).get(); - } -} - -future<> sstable::seal_sstable(bool backup) -{ - return seal_sstable().then([this, backup] { - if (backup) { - auto dir = get_dir() + "/backups/"; - return sstable_write_io_check(touch_directory, dir).then([this, dir] { - return create_links(dir); - }); - } - return make_ready_future<>(); - }); -} - -sstable_writer sstable::get_writer(const schema& s, uint64_t estimated_partitions, const sstable_writer_config& cfg, const io_priority_class& pc, shard_id shard) -{ - return sstable_writer(*this, s, estimated_partitions, cfg, pc, shard); -} - -future<> sstable::write_components(::mutation_reader mr, - uint64_t estimated_partitions, schema_ptr schema, const sstable_writer_config& cfg, const io_priority_class& pc) { - if (cfg.replay_position) { - _collector.set_replay_position(cfg.replay_position.value()); - } - seastar::thread_attributes attr; - attr.scheduling_group = cfg.thread_scheduling_group; - return seastar::async(std::move(attr), [this, mr = std::move(mr), estimated_partitions, schema = std::move(schema), cfg, &pc] () mutable { - auto wr = get_writer(*schema, estimated_partitions, cfg, pc); - consume_flattened_in_thread(mr, wr); - }); -} - -future<> sstable::generate_summary(const io_priority_class& pc) { - if (_components->summary) { - return make_ready_future<>(); - } - - sstlog.info("Summary file {} not found. Generating Summary...", filename(sstable::component_type::Summary)); - class summary_generator { - summary& _summary; - public: - std::experimental::optional first_key, last_key; - - summary_generator(summary& s) : _summary(s) {} - bool should_continue() { - return true; - } - void consume_entry(index_entry&& ie, uint64_t offset) { - auto token = dht::global_partitioner().get_token(ie.get_key()); - maybe_add_summary_entry(_summary, token, ie.get_key_bytes(), offset); - if (!first_key) { - first_key = key(to_bytes(ie.get_key_bytes())); - } else { - last_key = key(to_bytes(ie.get_key_bytes())); - } - } - }; - - return open_checked_file_dma(_read_error_handler, filename(component_type::Index), open_flags::ro).then([this, &pc] (file index_file) { - return do_with(std::move(index_file), [this, &pc] (file index_file) { - return index_file.size().then([this, &pc, index_file] (auto size) { - // an upper bound. Surely to be less than this. - auto estimated_partitions = size / sizeof(uint64_t); - prepare_summary(_components->summary, estimated_partitions, _schema->min_index_interval()); - - file_input_stream_options options; - options.buffer_size = sstable_buffer_size; - options.io_priority_class = pc; - auto stream = make_file_input_stream(index_file, 0, size, std::move(options)); - return do_with(summary_generator(_components->summary), [this, &pc, stream = std::move(stream), size] (summary_generator& s) mutable { - auto ctx = make_lw_shared>(s, std::move(stream), 0, size); - return ctx->consume_input(*ctx).finally([ctx] { - return ctx->close(); - }).then([this, ctx, &s] { - seal_summary(_components->summary, std::move(s.first_key), std::move(s.last_key)); - }); - }); - }).then([index_file] () mutable { - return index_file.close().handle_exception([] (auto ep) { - sstlog.warn("sstable close index_file failed: {}", ep); - general_disk_error(); - }); - }); - }); - }); -} - -uint64_t sstable::data_size() const { - if (has_component(sstable::component_type::CompressionInfo)) { - return _components->compression.data_len; - } - return _data_file_size; -} - -uint64_t sstable::ondisk_data_size() const { - return _data_file_size; -} - -uint64_t sstable::bytes_on_disk() { - assert(_bytes_on_disk > 0); - return _bytes_on_disk; -} - -const bool sstable::has_component(component_type f) const { - return _recognized_components.count(f); -} - -const sstring sstable::filename(component_type f) const { - return filename(_dir, _schema->ks_name(), _schema->cf_name(), _version, _generation, _format, f); -} - -std::vector sstable::component_filenames() const { - std::vector res; - for (auto c : _component_map | boost::adaptors::map_keys) { - if (has_component(c)) { - res.emplace_back(filename(c)); - } - } - return res; -} - -sstring sstable::toc_filename() const { - return filename(component_type::TOC); -} - -const sstring sstable::filename(sstring dir, sstring ks, sstring cf, version_types version, int64_t generation, - format_types format, component_type component) { - - static std::unordered_map, enum_hash> strmap = { - { sstable::version_types::ka, [] (entry_descriptor d) { - return d.ks + "-" + d.cf + "-" + _version_string.at(d.version) + "-" + to_sstring(d.generation) + "-" + _component_map.at(d.component); } - }, - { sstable::version_types::la, [] (entry_descriptor d) { - return _version_string.at(d.version) + "-" + to_sstring(d.generation) + "-" + _format_string.at(d.format) + "-" + _component_map.at(d.component); } - } - }; - - return dir + "/" + strmap[version](entry_descriptor(ks, cf, version, generation, format, component)); -} - -const sstring sstable::filename(sstring dir, sstring ks, sstring cf, version_types version, int64_t generation, - format_types format, sstring component) { - static std::unordered_map> fmtmap = { - { sstable::version_types::ka, "{0}-{1}-{2}-{3}-{5}" }, - { sstable::version_types::la, "{2}-{3}-{4}-{5}" } - }; - - return dir + "/" + seastar::format(fmtmap[version], ks, cf, _version_string.at(version), to_sstring(generation), _format_string.at(format), component); -} - -std::vector> sstable::all_components() const { - std::vector> all; - all.reserve(_recognized_components.size() + _unrecognized_components.size()); - for (auto& c : _recognized_components) { - all.push_back(std::make_pair(c, _component_map.at(c))); - } - for (auto& c : _unrecognized_components) { - all.push_back(std::make_pair(component_type::Unknown, c)); - } - return all; -} - -future<> sstable::create_links(sstring dir, int64_t generation) const { - // TemporaryTOC is always first, TOC is always last - auto dst = sstable::filename(dir, _schema->ks_name(), _schema->cf_name(), _version, generation, _format, component_type::TemporaryTOC); - return sstable_write_io_check(::link_file, filename(component_type::TOC), dst).then([this, dir] { - return sstable_write_io_check(sync_directory, dir); - }).then([this, dir, generation] { - // FIXME: Should clean already-created links if we failed midway. - return parallel_for_each(all_components(), [this, dir, generation] (auto p) { - if (p.first == component_type::TOC) { - return make_ready_future<>(); - } - auto src = sstable::filename(_dir, _schema->ks_name(), _schema->cf_name(), _version, _generation, _format, p.second); - auto dst = sstable::filename(dir, _schema->ks_name(), _schema->cf_name(), _version, generation, _format, p.second); - return this->sstable_write_io_check(::link_file, std::move(src), std::move(dst)); - }); - }).then([this, dir] { - return sstable_write_io_check(sync_directory, dir); - }).then([dir, this, generation] { - auto src = sstable::filename(dir, _schema->ks_name(), _schema->cf_name(), _version, generation, _format, component_type::TemporaryTOC); - auto dst = sstable::filename(dir, _schema->ks_name(), _schema->cf_name(), _version, generation, _format, component_type::TOC); - return sstable_write_io_check([&] { - return engine().rename_file(src, dst); - }); - }).then([this, dir] { - return sstable_write_io_check(sync_directory, dir); - }); -} - -future<> sstable::set_generation(int64_t new_generation) { - return create_links(_dir, new_generation).then([this] { - return remove_file(filename(component_type::TOC)).then([this] { - return sstable_write_io_check(sync_directory, _dir); - }).then([this] { - return parallel_for_each(all_components(), [this] (auto p) { - if (p.first == component_type::TOC) { - return make_ready_future<>(); - } - return remove_file(sstable::filename(_dir, _schema->ks_name(), _schema->cf_name(), _version, _generation, _format, p.second)); - }); - }); - }).then([this, new_generation] { - return sync_directory(_dir).then([this, new_generation] { - _generation = new_generation; - }); - }); -} - -entry_descriptor entry_descriptor::make_descriptor(sstring fname) { - static std::regex la("la-(\\d+)-(\\w+)-(.*)"); - static std::regex ka("(\\w+)-(\\w+)-ka-(\\d+)-(.*)"); - - std::smatch match; - - sstable::version_types version; - - sstring generation; - sstring format; - sstring component; - sstring ks; - sstring cf; - - std::string s(fname); - if (std::regex_match(s, match, la)) { - sstring ks = ""; - sstring cf = ""; - version = sstable::version_types::la; - generation = match[1].str(); - format = sstring(match[2].str()); - component = sstring(match[3].str()); - } else if (std::regex_match(s, match, ka)) { - ks = match[1].str(); - cf = match[2].str(); - version = sstable::version_types::ka; - format = sstring("big"); - generation = match[3].str(); - component = sstring(match[4].str()); - } else { - throw malformed_sstable_exception(sprint("invalid version for file %s. Name doesn't match any known version.", fname)); - } - return entry_descriptor(ks, cf, version, boost::lexical_cast(generation), sstable::format_from_sstring(format), sstable::component_from_sstring(component)); -} - -sstable::version_types sstable::version_from_sstring(sstring &s) { - return reverse_map(s, _version_string); -} - -sstable::format_types sstable::format_from_sstring(sstring &s) { - return reverse_map(s, _format_string); -} - -sstable::component_type sstable::component_from_sstring(sstring &s) { - try { - return reverse_map(s, _component_map); - } catch (std::out_of_range&) { - return component_type::Unknown; - } -} - -input_stream sstable::data_stream(uint64_t pos, size_t len, const io_priority_class& pc, lw_shared_ptr history) { - file_input_stream_options options; - options.buffer_size = sstable_buffer_size; - options.io_priority_class = pc; - options.read_ahead = 4; - options.dynamic_adjustments = std::move(history); - if (_components->compression) { - return make_compressed_file_input_stream(_data_file, &_components->compression, - pos, len, std::move(options)); - } else { - return make_file_input_stream(_data_file, pos, len, std::move(options)); - } -} - -future> sstable::data_read(uint64_t pos, size_t len, const io_priority_class& pc) { - return do_with(data_stream(pos, len, pc, { }), [len] (auto& stream) { - return stream.read_exactly(len).finally([&stream] { - return stream.close(); - }); - }); -} - -void sstable::set_first_and_last_keys() { - if (_first && _last) { - return; - } - auto decorate_key = [this] (const char *m, const bytes& value) { - if (value.empty()) { - throw std::runtime_error(sprint("%s key of summary of %s is empty", m, get_filename())); - } - auto pk = key::from_bytes(value).to_partition_key(*_schema); - return dht::global_partitioner().decorate_key(*_schema, std::move(pk)); - }; - _first = decorate_key("first", _components->summary.first_key.value); - _last = decorate_key("last", _components->summary.last_key.value); -} - -const partition_key& sstable::get_first_partition_key() const { - return get_first_decorated_key().key(); - } - -const partition_key& sstable::get_last_partition_key() const { - return get_last_decorated_key().key(); -} - -const dht::decorated_key& sstable::get_first_decorated_key() const { - if (!_first) { - throw std::runtime_error(sprint("first key of %s wasn't set", get_filename())); - } - return *_first; -} - -const dht::decorated_key& sstable::get_last_decorated_key() const { - if (!_last) { - throw std::runtime_error(sprint("last key of %s wasn't set", get_filename())); - } - return *_last; -} - -int sstable::compare_by_first_key(const sstable& other) const { - return get_first_decorated_key().tri_compare(*_schema, other.get_first_decorated_key()); -} - -double sstable::get_compression_ratio() const { - if (this->has_component(sstable::component_type::CompressionInfo)) { - return double(_components->compression.compressed_file_length()) / _components->compression.uncompressed_file_length(); - } else { - return metadata_collector::NO_COMPRESSION_RATIO; - } -} - -std::unordered_set sstable::ancestors() const { - const compaction_metadata& cm = get_compaction_metadata(); - return boost::copy_range>(cm.ancestors.elements); -} - -void sstable::set_sstable_level(uint32_t new_level) { - auto entry = _components->statistics.contents.find(metadata_type::Stats); - if (entry == _components->statistics.contents.end()) { - return; - } - auto& p = entry->second; - if (!p) { - throw std::runtime_error("Statistics is malformed"); - } - stats_metadata& s = *static_cast(p.get()); - sstlog.debug("set level of {} with generation {} from {} to {}", get_filename(), _generation, s.sstable_level, new_level); - s.sstable_level = new_level; -} - -future<> sstable::mutate_sstable_level(uint32_t new_level) { - if (!has_component(component_type::Statistics)) { - return make_ready_future<>(); - } - - auto entry = _components->statistics.contents.find(metadata_type::Stats); - if (entry == _components->statistics.contents.end()) { - return make_ready_future<>(); - } - - auto& p = entry->second; - if (!p) { - throw std::runtime_error("Statistics is malformed"); - } - stats_metadata& s = *static_cast(p.get()); - if (s.sstable_level == new_level) { - return make_ready_future<>(); - } - - s.sstable_level = new_level; - // Technically we don't have to write the whole file again. But the assumption that - // we will always write sequentially is a powerful one, and this does not merit an - // exception. - return seastar::async([this] { - // This is not part of the standard memtable flush path, but there is no reason - // to come up with a class just for that. It is used by the snapshot/restore mechanism - // which comprises mostly hard link creation and this operation at the end + this operation, - // and also (eventually) by some compaction strategy. In any of the cases, it won't be high - // priority enough so we will use the default priority - rewrite_statistics(default_priority_class()); - }); -} - -int sstable::compare_by_max_timestamp(const sstable& other) const { - auto ts1 = get_stats_metadata().max_timestamp; - auto ts2 = other.get_stats_metadata().max_timestamp; - return (ts1 > ts2 ? 1 : (ts1 == ts2 ? 0 : -1)); -} - -sstable::~sstable() { - if (_index_file) { - _index_file.close().handle_exception([save = _index_file, op = background_jobs().start()] (auto ep) { - sstlog.warn("sstable close index_file failed: {}", ep); - general_disk_error(); - }); - } - if (_data_file) { - _data_file.close().handle_exception([save = _data_file, op = background_jobs().start()] (auto ep) { - sstlog.warn("sstable close data_file failed: {}", ep); - general_disk_error(); - }); - } - - if (_marked_for_deletion) { - // We need to delete the on-disk files for this table. Since this is a - // destructor, we can't wait for this to finish, or return any errors, - // but just need to do our best. If a deletion fails for some reason we - // log and ignore this failure, because on startup we'll again try to - // clean up unused sstables, and because we'll never reuse the same - // generation number anyway. - try { - delete_atomically({sstable_to_delete(filename(component_type::TOC), _shared)}).handle_exception( - [op = background_jobs().start()] (std::exception_ptr eptr) { - try { - std::rethrow_exception(eptr); - } catch (atomic_deletion_cancelled&) { - sstlog.debug("Exception when deleting sstable file: {}", eptr); - } catch (...) { - sstlog.warn("Exception when deleting sstable file: {}", eptr); - } - }); - } catch (...) { - sstlog.warn("Exception when deleting sstable file: {}", std::current_exception()); - } - - } -} - -sstring -dirname(sstring fname) { - return boost::filesystem::canonical(std::string(fname)).parent_path().string(); -} - -future<> -fsync_directory(const io_error_handler& error_handler, sstring fname) { - return ::sstable_io_check(error_handler, [&] { - return open_checked_directory(error_handler, dirname(fname)).then([] (file f) { - return do_with(std::move(f), [] (file& f) { - return f.flush().then([&f] { - return f.close(); - }); - }); - }); - }); -} - -future<> -remove_by_toc_name(sstring sstable_toc_name, const io_error_handler& error_handler) { - return seastar::async([sstable_toc_name, &error_handler] () mutable { - sstring prefix = sstable_toc_name.substr(0, sstable_toc_name.size() - TOC_SUFFIX.size()); - auto new_toc_name = prefix + TEMPORARY_TOC_SUFFIX; - sstring dir; - - if (sstable_io_check(error_handler, file_exists, sstable_toc_name).get0()) { - dir = dirname(sstable_toc_name); - sstable_io_check(error_handler, rename_file, sstable_toc_name, new_toc_name).get(); - fsync_directory(error_handler, dir).get(); - } else if (sstable_io_check(error_handler, file_exists, new_toc_name).get0()) { - dir = dirname(new_toc_name); - } else { - sstlog.warn("Unable to delete {} because it doesn't exist.", sstable_toc_name); - return; - } - - auto toc_file = open_checked_file_dma(error_handler, new_toc_name, open_flags::ro).get0(); - auto in = make_file_input_stream(toc_file); - auto size = toc_file.size().get0(); - auto text = in.read_exactly(size).get0(); - in.close().get(); - std::vector components; - sstring all(text.begin(), text.end()); - boost::split(components, all, boost::is_any_of("\n")); - parallel_for_each(components, [prefix, &error_handler] (sstring component) mutable { - if (component.empty()) { - // eof - return make_ready_future<>(); - } - if (component == TOC_SUFFIX) { - // already deleted - return make_ready_future<>(); - } - auto fname = prefix + component; - return sstable_io_check(error_handler, remove_file, prefix + component).then_wrapped([fname = std::move(fname)] (future<> f) { - // forgive ENOENT, since the component may not have been written; - try { - f.get(); - } catch (std::system_error& e) { - if (!is_system_error_errno(ENOENT)) { - throw; - } - sstlog.debug("Forgiving ENOENT when deleting file {}", fname); - } - return make_ready_future<>(); - }); - }).get(); - fsync_directory(error_handler, dir).get(); - sstable_io_check(error_handler, remove_file, new_toc_name).get(); - }); -} - -future<> -sstable::remove_sstable_with_temp_toc(sstring ks, sstring cf, sstring dir, int64_t generation, version_types v, format_types f) { - return seastar::async([ks, cf, dir, generation, v, f] { - const io_error_handler& error_handler = sstable_write_error_handler; - auto toc = sstable_io_check(error_handler, file_exists, filename(dir, ks, cf, v, generation, f, component_type::TOC)).get0(); - // assert that toc doesn't exist for sstable with temporary toc. - assert(toc == false); - - auto tmptoc = sstable_io_check(error_handler, file_exists, filename(dir, ks, cf, v, generation, f, component_type::TemporaryTOC)).get0(); - // assert that temporary toc exists for this sstable. - assert(tmptoc == true); - - sstlog.warn("Deleting components of sstable from {}.{} of generation {} that has a temporary TOC", ks, cf, generation); - - for (auto& entry : sstable::_component_map) { - // Skipping TemporaryTOC because it must be the last component to - // be deleted, and unordered map doesn't guarantee ordering. - // This is needed because we may end up with a partial delete in - // event of a power failure. - // If TemporaryTOC is deleted prematurely and scylla crashes, - // the subsequent boot would fail because of that generation - // missing a TOC. - if (entry.first == component_type::TemporaryTOC) { - continue; - } - - auto file_path = filename(dir, ks, cf, v, generation, f, entry.first); - // Skip component that doesn't exist. - auto exists = sstable_io_check(error_handler, file_exists, file_path).get0(); - if (!exists) { - continue; - } - sstable_io_check(error_handler, remove_file, file_path).get(); - } - fsync_directory(error_handler, dir).get(); - // Removing temporary - sstable_io_check(error_handler, remove_file, filename(dir, ks, cf, v, generation, f, component_type::TemporaryTOC)).get(); - // Fsync'ing column family dir to guarantee that deletion completed. - fsync_directory(error_handler, dir).get(); - }); -} - -future> -sstable::get_sstable_key_range(const schema& s) { - auto fut = read_summary(default_priority_class()); - return std::move(fut).then([this, &s] () mutable { - this->set_first_and_last_keys(); - return make_ready_future>(range::make(get_first_partition_key(), get_last_partition_key())); - }); -} - -future> -sstable::get_owning_shards_from_unloaded() { - return when_all(read_summary(default_priority_class()), read_scylla_metadata(default_priority_class())).then( - [this] (std::tuple, future<>> rets) { - std::get<0>(rets).get(); - std::get<1>(rets).get(); - set_first_and_last_keys(); - return get_shards_for_this_sstable(); - }); -} - -/** - * Returns a pair of positions [p1, p2) in the summary file corresponding to entries - * covered by the specified range, or a disengaged optional if no such pair exists. - */ -stdx::optional> sstable::get_sample_indexes_for_range(const dht::token_range& range) { - auto entries_size = _components->summary.entries.size(); - auto search = [this](bool before, const dht::token& token) { - auto kind = before ? key::kind::before_all_keys : key::kind::after_all_keys; - key k(kind); - // Binary search will never returns positive values. - return uint64_t((binary_search(_components->summary.entries, k, token) + 1) * -1); - }; - uint64_t left = 0; - if (range.start()) { - left = search(range.start()->is_inclusive(), range.start()->value()); - if (left == entries_size) { - // left is past the end of the sampling. - return stdx::nullopt; - } - } - uint64_t right = entries_size; - if (range.end()) { - right = search(!range.end()->is_inclusive(), range.end()->value()); - if (right == 0) { - // The first key is strictly greater than right. - return stdx::nullopt; - } - } - if (left < right) { - return stdx::optional>(stdx::in_place_t(), left, right); - } - return stdx::nullopt; -} - -std::vector sstable::get_key_samples(const schema& s, const dht::token_range& range) { - auto index_range = get_sample_indexes_for_range(range); - std::vector res; - if (index_range) { - for (auto idx = index_range->first; idx < index_range->second; ++idx) { - auto pkey = _components->summary.entries[idx].get_key().to_partition_key(s); - res.push_back(dht::global_partitioner().decorate_key(s, std::move(pkey))); - } - } - return res; -} - -uint64_t sstable::estimated_keys_for_range(const dht::token_range& range) { - auto sample_index_range = get_sample_indexes_for_range(range); - uint64_t sample_key_count = sample_index_range ? sample_index_range->second - sample_index_range->first : 0; - // adjust for the current sampling level - uint64_t estimated_keys = sample_key_count * ((downsampling::BASE_SAMPLING_LEVEL * _components->summary.header.min_index_interval) / _components->summary.header.sampling_level); - return std::max(uint64_t(1), estimated_keys); -} - -std::vector -sstable::get_shards_for_this_sstable() const { - std::unordered_set shards; - dht::partition_range_vector token_ranges; - const auto* sm = _components->scylla_metadata - ? _components->scylla_metadata->data.get() - : nullptr; - if (!sm) { - token_ranges.push_back(dht::partition_range::make( - dht::ring_position::starting_at(get_first_decorated_key().token()), - dht::ring_position::ending_at(get_last_decorated_key().token()))); - } else { - auto disk_token_range_to_ring_position_range = [] (const disk_token_range& dtr) { - auto t1 = dht::token(dht::token::kind::key, managed_bytes(bytes_view(dtr.left.token))); - auto t2 = dht::token(dht::token::kind::key, managed_bytes(bytes_view(dtr.right.token))); - return dht::partition_range::make( - (dtr.left.exclusive ? dht::ring_position::ending_at : dht::ring_position::starting_at)(std::move(t1)), - (dtr.right.exclusive ? dht::ring_position::starting_at : dht::ring_position::ending_at)(std::move(t2))); - }; - token_ranges = boost::copy_range( - sm->token_ranges.elements - | boost::adaptors::transformed(disk_token_range_to_ring_position_range)); - } - auto sharder = dht::ring_position_range_vector_sharder(std::move(token_ranges)); - auto rpras = sharder.next(*_schema); - while (rpras) { - shards.insert(rpras->shard); - rpras = sharder.next(*_schema); - } - return boost::copy_range>(shards); -} - -utils::hashed_key sstable::make_hashed_key(const schema& s, const partition_key& key) { - return utils::make_hashed_key(static_cast(key::from_partition_key(s, key))); -} - -std::ostream& -operator<<(std::ostream& os, const sstable_to_delete& std) { - return os << std.name << "(" << (std.shared ? "shared" : "unshared") << ")"; -} - -future<> -delete_sstables(std::vector tocs) { - // FIXME: this needs to be done atomically (using a log file of sstables we intend to delete) - return parallel_for_each(tocs, [] (sstring name) { - return remove_by_toc_name(name); - }); -} - -static thread_local atomic_deletion_manager g_atomic_deletion_manager(smp::count, delete_sstables); - -future<> -delete_atomically(std::vector ssts) { - auto shard = engine().cpu_id(); - return smp::submit_to(0, [=] { - return g_atomic_deletion_manager.delete_atomically(ssts, shard); - }); -} - -future<> -delete_atomically(std::vector ssts) { - std::vector sstables_to_delete_atomically; - for (auto&& sst : ssts) { - sstables_to_delete_atomically.push_back({sst->toc_filename(), sst->is_shared()}); - } - return delete_atomically(std::move(sstables_to_delete_atomically)); -} - -void cancel_atomic_deletions() { - g_atomic_deletion_manager.cancel_atomic_deletions(); -} - -atomic_deletion_cancelled::atomic_deletion_cancelled(std::vector names) - : _msg(sprint("atomic deletions cancelled; not deleting %s", names)) { -} - -const char* -atomic_deletion_cancelled::what() const noexcept { - return _msg.c_str(); -} - -thread_local shared_index_lists::stats shared_index_lists::_shard_stats; -static thread_local seastar::metrics::metric_groups metrics; - -future<> init_metrics() { - return seastar::smp::invoke_on_all([] { - namespace sm = seastar::metrics; - metrics.add_group("sstables", { - sm::make_derive("index_page_hits", [] { return shared_index_lists::shard_stats().hits; }, - sm::description("Index page requests which could be satisfied without waiting")), - sm::make_derive("index_page_misses", [] { return shared_index_lists::shard_stats().misses; }, - sm::description("Index page requests which initiated a read from disk")), - sm::make_derive("index_page_blocks", [] { return shared_index_lists::shard_stats().blocks; }, - sm::description("Index page requests which needed to wait due to page not being loaded yet")), - }); - }); -} - -struct range_reader_adaptor final : public ::mutation_reader::impl { - sstables::shared_sstable _sst; - sstables::mutation_reader _rd; -public: - range_reader_adaptor(sstables::shared_sstable sst, sstables::mutation_reader rd) - : _sst(std::move(sst)), _rd(std::move(rd)) {} - virtual future operator()() override { - return _rd.read(); - } - virtual future<> fast_forward_to(const dht::partition_range& pr) override { - return _rd.fast_forward_to(pr); - } -}; - -struct single_partition_reader_adaptor final : public ::mutation_reader::impl { - sstables::shared_sstable _sst; - schema_ptr _s; - dht::ring_position_view _key; - const query::partition_slice& _slice; - const io_priority_class& _pc; - streamed_mutation::forwarding _fwd; -public: - single_partition_reader_adaptor(sstables::shared_sstable sst, schema_ptr s, dht::ring_position_view key, - const query::partition_slice& slice, const io_priority_class& pc, streamed_mutation::forwarding fwd) - : _sst(sst), _s(s), _key(key), _slice(slice), _pc(pc), _fwd(fwd) - { } - virtual future operator()() override { - if (!_sst) { - return make_ready_future(stdx::nullopt); - } - auto sst = std::move(_sst); - return sst->read_row(_s, _key, _slice, _pc, _fwd); - } - virtual future<> fast_forward_to(const dht::partition_range& pr) override { - throw std::bad_function_call(); - } -}; - -mutation_source sstable::as_mutation_source() { - return mutation_source([sst = shared_from_this()] (schema_ptr s, - const dht::partition_range& range, - const query::partition_slice& slice, - const io_priority_class& pc, - tracing::trace_state_ptr trace_ptr, - streamed_mutation::forwarding fwd, - ::mutation_reader::forwarding fwd_mr) mutable { - // CAVEAT: if as_mutation_source() is called on a single partition - // we want to optimize and read exactly this partition. As a - // consequence, fast_forward_to() will *NOT* work on the result, - // regardless of what the fwd_mr parameter says. - if (range.is_singular() && range.start()->value().has_key()) { - const dht::ring_position& pos = range.start()->value(); - return make_mutation_reader(sst, s, pos, slice, pc, fwd); - } else { - return make_mutation_reader(sst, sst->read_range_rows(s, range, slice, pc, fwd, fwd_mr)); - } - }); -} - - -} diff --git a/scylla/sstables/sstables.hh b/scylla/sstables/sstables.hh deleted file mode 100644 index 9d01f2f..0000000 --- a/scylla/sstables/sstables.hh +++ /dev/null @@ -1,858 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - * - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "core/file.hh" -#include "core/fstream.hh" -#include "core/future.hh" -#include "core/sstring.hh" -#include "core/enum.hh" -#include "core/shared_ptr.hh" -#include "core/distributed.hh" -#include -#include -#include "types.hh" -#include "clustering_key_filter.hh" -#include "core/enum.hh" -#include "compress.hh" -#include "row.hh" -#include "dht/i_partitioner.hh" -#include "schema.hh" -#include "mutation.hh" -#include "utils/i_filter.hh" -#include "core/stream.hh" -#include "writer.hh" -#include "metadata_collector.hh" -#include "filter.hh" -#include "exceptions.hh" -#include "mutation_reader.hh" -#include "query-request.hh" -#include "compound_compat.hh" -#include "disk-error-handler.hh" -#include "atomic_deletion.hh" -#include "sstables/shared_index_lists.hh" -#include "db/commitlog/replay_position.hh" - -namespace seastar { -class thread_scheduling_group; -} - -namespace sstables { - -extern logging::logger sstlog; - -// data_consume_context is an object returned by sstable::data_consume_rows() -// which allows knowing when the consumer stops reading, and starting it again -// (e.g., when the consumer wants to stop after every sstable row). -// -// The read() method initiates reading into the consumer, and continues to -// read and feed data into the consumer until one of the consumer's callbacks -// requests to stop, or until we reach the end of the data range originally -// requested. read() returns a future which completes when reading stopped. -// If we're at the end-of-file, the read may complete without reading anything -// so it's the consumer class's task to check if anything was consumed. -// Note: -// The caller MUST ensure that between calling read() on this object, -// and the time the returned future is completed, the object lives on. -// Moreover, the sstable object used for the sstable::data_consume_rows() -// call which created this data_consume_context, must also be kept alive. -class data_consume_context { - class impl; - std::unique_ptr _pimpl; - // This object can only be constructed by sstable::data_consume_rows() - data_consume_context(std::unique_ptr); - friend class sstable; -public: - future<> read(); - future<> fast_forward_to(uint64_t begin, uint64_t end); - future<> skip_to(indexable_element, uint64_t begin); - uint64_t position() const; - // Define (as defaults) the destructor and move operations in the source - // file, so here we don't need to know the incomplete impl type. - ~data_consume_context(); - data_consume_context(data_consume_context&&) noexcept; - data_consume_context& operator=(data_consume_context&&) noexcept; -}; - -// mutation_reader is an object returned by sstable::read_rows() et al. which -// allows getting each sstable row in sequence, in mutation format. -// -// The read() method reads the next mutation, returning a disengaged optional -// on EOF. As usual for future-returning functions, a caller which starts a -// read() MUST ensure that the mutation_reader object continues to live until -// the returned future is fulfilled. Moreover, the sstable whose read_rows() -// method was used to open this mutation_reader must also live between the -// time read() is called and its future ends. -// As soon as the future returned by read() completes, the object may safely -// be deleted. In other words, when the read() future is fulfilled, we can -// be sure there are no background tasks still scheduled. -class mutation_reader { - class impl; - std::unique_ptr _pimpl; - // This object can only be constructed by sstable::read_rows() et al. - mutation_reader(std::unique_ptr); - friend class sstable; -public: - future read(); - future<> fast_forward_to(const dht::partition_range&); - // Define (as defaults) the destructor and move operations in the source - // file, so here we don't need to know the incomplete impl type. - ~mutation_reader(); - mutation_reader(mutation_reader&&); - mutation_reader& operator=(mutation_reader&&); -}; - -class key; -class sstable_writer; -struct foreign_sstable_open_info; -struct sstable_open_info; - -class index_reader; - -struct sstable_writer_config { - std::experimental::optional promoted_index_block_size; - uint64_t max_sstable_size = std::numeric_limits::max(); - bool backup = false; - bool leave_unsealed = false; - stdx::optional replay_position; - seastar::thread_scheduling_group* thread_scheduling_group = nullptr; -}; - -class sstable : public enable_lw_shared_from_this { -public: - enum class component_type { - Index, - CompressionInfo, - Data, - TOC, - Summary, - Digest, - CRC, - Filter, - Statistics, - TemporaryTOC, - TemporaryStatistics, - Scylla, - Unknown, - }; - enum class version_types { ka, la }; - enum class format_types { big }; - static const size_t default_buffer_size = 128*1024; -public: - sstable(schema_ptr schema, sstring dir, int64_t generation, version_types v, format_types f, gc_clock::time_point now = gc_clock::now(), - io_error_handler_gen error_handler_gen = default_io_error_handler_gen(), size_t buffer_size = default_buffer_size) - : sstable_buffer_size(buffer_size) - , _schema(std::move(schema)) - , _dir(std::move(dir)) - , _generation(generation) - , _version(v) - , _format(f) - , _now(now) - , _read_error_handler(error_handler_gen(sstable_read_error)) - , _write_error_handler(error_handler_gen(sstable_write_error)) - { } - sstable& operator=(const sstable&) = delete; - sstable(const sstable&) = delete; - sstable(sstable&&) = default; - - ~sstable(); - - // Read one or few rows at the given byte range from the data file, - // feeding them into the consumer. This function reads the entire given - // byte range at once into memory, so it should not be used for iterating - // over all the rows in the data file (see the next function for that. - // The function returns a future which completes after all the data has - // been fed into the consumer. The caller needs to ensure the "consumer" - // object lives until then (e.g., using the do_with() idiom). - future<> data_consume_rows_at_once(row_consumer& consumer, uint64_t pos, uint64_t end); - - // disk_read_range describes a byte ranges covering part of an sstable - // row that we need to read from disk. Usually this is the whole byte - // range covering a single sstable row, but in very large rows we might - // want to only read a subset of the atoms which we know contains the - // columns we are looking for. - struct disk_read_range { - // TODO: this should become a vector of ranges - uint64_t start; - uint64_t end; - - disk_read_range() : start(0), end(0) {} - disk_read_range(uint64_t start, uint64_t end) : - start(start), end(end) { } - explicit operator bool() const { - return start != end; - } - }; - - // data_consume_rows() iterates over rows in the data file from - // a particular range, feeding them into the consumer. The iteration is - // done as efficiently as possible - reading only the data file (not the - // summary or index files) and reading data in batches. - // - // The consumer object may request the iteration to stop before reaching - // the end of the requested data range (e.g. stop after each sstable row). - // A context object is returned which allows to resume this consumption: - // This context's read() method requests that consumption begins, and - // returns a future which will be resolved when it ends (because the - // consumer asked to stop, or the data range ended). Only after the - // returned future is resolved, may read() be called again to consume - // more. - // The caller must ensure (e.g., using do_with()) that the context object, - // as well as the sstable, remains alive as long as a read() is in - // progress (i.e., returned a future which hasn't completed yet). - // - // The "toread" range specifies the range we want to read initially. - // However, the object returned by the read, a data_consume_context, also - // provides a fast_forward_to(start,end) method which allows resetting - // the reader to a new range. To allow that, we also have a "last_end" - // byte which should be the last end to which fast_forward_to is - // eventually allowed. If last_end==end, fast_forward_to is not allowed - // at all, if last_end==file_size fast_forward_to is allowed until the - // end of the file, and it can be something in between if we know that we - // are planning to skip parts, but eventually read until last_end. - // When last_end==end, we guarantee that the read will only read the - // desired byte range from disk. However, when last_end > end, we may - // read beyond end in anticipation of a small skip via fast_foward_to. - // The amount of this excessive read is controlled by read ahead - // hueristics which learn from the usefulness of previous read aheads. - data_consume_context data_consume_rows(row_consumer& consumer, disk_read_range toread, uint64_t last_end); - - data_consume_context data_consume_single_partition(row_consumer& consumer, disk_read_range toread); - - // Like data_consume_rows() with bounds, but iterates over whole range - data_consume_context data_consume_rows(row_consumer& consumer); - - static component_type component_from_sstring(sstring& s); - static version_types version_from_sstring(sstring& s); - static format_types format_from_sstring(sstring& s); - static const sstring filename(sstring dir, sstring ks, sstring cf, version_types version, int64_t generation, - format_types format, component_type component); - static const sstring filename(sstring dir, sstring ks, sstring cf, version_types version, int64_t generation, - format_types format, sstring component); - // WARNING: it should only be called to remove components of a sstable with - // a temporary TOC file. - static future<> remove_sstable_with_temp_toc(sstring ks, sstring cf, sstring dir, int64_t generation, - version_types v, format_types f); - - // load sstable using components shared by a shard - future<> load(foreign_sstable_open_info info); - // load all components from disk - // this variant will be useful for testing purposes and also when loading - // a new sstable from scratch for sharing its components. - future<> load(const io_priority_class& pc = default_priority_class()); - future<> open_data(); - future<> update_info_for_opened_data(); - - future<> set_generation(int64_t generation); - - int64_t generation() const { - return _generation; - } - - // read_row() reads the entire sstable row (partition) at a given - // partition key k, or a subset of this row. The subset is defined by - // a filter on the clustering keys which we want to read, which - // additionally determines also if all the static columns will also be - // returned in the result. - future read_row( - schema_ptr schema, - dht::ring_position_view key, - const query::partition_slice& slice = query::full_slice, - const io_priority_class& pc = default_priority_class(), - streamed_mutation::forwarding fwd = streamed_mutation::forwarding::no); - - future read_row( - schema_ptr schema, - const sstables::key& key, - const query::partition_slice& slice = query::full_slice, - const io_priority_class& pc = default_priority_class(), - streamed_mutation::forwarding fwd = streamed_mutation::forwarding::no); - - // Returns a mutation_reader for given range of partitions - mutation_reader read_range_rows( - schema_ptr schema, - const dht::partition_range& range, - const query::partition_slice& slice = query::full_slice, - const io_priority_class& pc = default_priority_class(), - streamed_mutation::forwarding fwd = streamed_mutation::forwarding::no, - ::mutation_reader::forwarding fwd_mr = ::mutation_reader::forwarding::yes); - - // read_rows() returns each of the rows in the sstable, in sequence, - // converted to a "mutation" data structure. - // This function is implemented efficiently - doing buffered, sequential - // read of the data file (no need to access the index file). - // A "mutation_reader" object is returned with which the caller can - // fetch mutations in sequence, and allows stop iteration any time - // after getting each row. - // - // The caller must ensure (e.g., using do_with()) that the context object, - // as well as the sstable, remains alive as long as a read() is in - // progress (i.e., returned a future which hasn't completed yet). - mutation_reader read_rows(schema_ptr schema, - const io_priority_class& pc = default_priority_class(), - streamed_mutation::forwarding fwd = streamed_mutation::forwarding::no); - - // Returns mutation_source containing all writes contained in this sstable. - // The mutation_source shares ownership of this sstable. - mutation_source as_mutation_source(); - - future<> write_components(::mutation_reader mr, - uint64_t estimated_partitions, - schema_ptr schema, - const sstable_writer_config&, - const io_priority_class& pc = default_priority_class()); - - sstable_writer get_writer(const schema& s, - uint64_t estimated_partitions, - const sstable_writer_config&, - const io_priority_class& pc = default_priority_class(), - shard_id shard = engine().cpu_id()); - - future<> seal_sstable(bool backup); - - uint64_t get_estimated_key_count() const { - return ((uint64_t)_components->summary.header.size_at_full_sampling + 1) * - _components->summary.header.min_index_interval; - } - - uint64_t estimated_keys_for_range(const dht::token_range& range); - - std::vector get_key_samples(const schema& s, const dht::token_range& range); - - // mark_for_deletion() specifies that a sstable isn't relevant to the - // current shard, and thus can be deleted by the deletion manager, if - // all shards sharing it agree. In case the sstable is unshared, it's - // guaranteed that all of its on-disk files will be deleted as soon as - // the in-memory object is destroyed. - void mark_for_deletion() { - _marked_for_deletion = true; - } - - bool marked_for_deletion() const { - return _marked_for_deletion; - } - - void add_ancestor(int64_t generation) { - _collector.add_ancestor(generation); - } - - std::unordered_set ancestors() const; - - // Returns true iff this sstable contains data which belongs to many shards. - bool is_shared() const { - return _shared; - } - - void set_unshared() { - _shared = false; - } - - // Returns uncompressed size of data component. - uint64_t data_size() const; - // Returns on-disk size of data component. - uint64_t ondisk_data_size() const; - - uint64_t index_size() const { - return _index_file_size; - } - uint64_t filter_size() const { - return _filter_file_size; - } - - db_clock::time_point data_file_write_time() const { - return _data_file_write_time; - } - - uint64_t filter_memory_size() const { - return _components->filter->memory_size(); - } - - // Returns the total bytes of all components. - uint64_t bytes_on_disk(); - - const partition_key& get_first_partition_key() const; - const partition_key& get_last_partition_key() const; - - const dht::decorated_key& get_first_decorated_key() const; - const dht::decorated_key& get_last_decorated_key() const; - - // SSTable comparator using the first key (decorated key). - // Return values are those of a trichotomic comparison. - int compare_by_first_key(const sstable& other) const; - - // SSTable comparator using the max timestamp. - // Return values are those of a trichotomic comparison. - int compare_by_max_timestamp(const sstable& other) const; - - const sstring get_filename() const { - return filename(component_type::Data); - } - const sstring& get_dir() const { - return _dir; - } - sstring toc_filename() const; - - metadata_collector& get_metadata_collector() { - return _collector; - } - - std::vector> all_components() const; - - future<> create_links(sstring dir, int64_t generation) const; - - future<> create_links(sstring dir) const { - return create_links(dir, _generation); - } - - /** - * Note. This is using the Origin definition of - * max_data_age, which is load time. This could maybe - * be improved upon. - */ - gc_clock::time_point max_data_age() const { - return _now; - } - std::vector component_filenames() const; - - template - auto sstable_write_io_check(Func&& func, Args&&... args) const { - return do_io_check(_write_error_handler, func, std::forward(args)...); - } - - // Immutable components that can be shared among shards. - struct shareable_components { - sstables::compression compression; - utils::filter_ptr filter; - sstables::summary summary; - sstables::statistics statistics; - stdx::optional scylla_metadata; - }; -private: - size_t sstable_buffer_size = default_buffer_size; - - static std::unordered_map> _version_string; - static std::unordered_map> _format_string; - static std::unordered_map> _component_map; - - std::unordered_set> _recognized_components; - std::vector _unrecognized_components; - - foreign_ptr> _components = make_foreign(make_lw_shared()); - shared_index_lists _index_lists; - bool _shared = true; // across shards; safe default - // NOTE: _collector and _c_stats are used to generation of statistics file - // when writing a new sstable. - metadata_collector _collector; - column_stats _c_stats; - file _index_file; - file _data_file; - uint64_t _data_file_size; - uint64_t _index_file_size; - uint64_t _filter_file_size = 0; - uint64_t _bytes_on_disk = 0; - db_clock::time_point _data_file_write_time; - std::vector> _clustering_components_ranges; - stdx::optional _first; - stdx::optional _last; - - lw_shared_ptr _single_partition_history = make_lw_shared(); - lw_shared_ptr _partition_range_history = make_lw_shared(); - - // _pi_write is used temporarily for building the promoted - // index (column sample) of one partition when writing a new sstable. - struct { - // Unfortunately we cannot output the promoted index directly to the - // index file because it needs to be prepended by its size. - bytes_ostream data; - uint32_t numblocks; - deletion_time deltime; - uint64_t block_start_offset; - uint64_t block_next_start_offset; - bytes block_first_colname; - bytes block_last_colname; - std::experimental::optional tombstone_accumulator; - const schema* schemap; - size_t desired_block_size; - } _pi_write; - - void maybe_flush_pi_block(file_writer& out, - const composite& clustering_key, - const std::vector& column_names, - composite::eoc marker = composite::eoc::none); - - schema_ptr _schema; - sstring _dir; - unsigned long _generation = 0; - version_types _version; - format_types _format; - - filter_tracker _filter_tracker; - - bool _marked_for_deletion = false; - - gc_clock::time_point _now; - - io_error_handler _read_error_handler; - io_error_handler _write_error_handler; - - const bool has_component(component_type f) const; - - const sstring filename(component_type f) const; - - template - future<> read_simple(T& comp, const io_priority_class& pc); - - template - void write_simple(const T& comp, const io_priority_class& pc); - - void generate_toc(compressor c, double filter_fp_chance); - void write_toc(const io_priority_class& pc); - future<> seal_sstable(); - - future<> read_compression(const io_priority_class& pc); - void write_compression(const io_priority_class& pc); - - future<> read_scylla_metadata(const io_priority_class& pc); - void write_scylla_metadata(const io_priority_class& pc, shard_id shard = engine().cpu_id()); - - future<> read_filter(const io_priority_class& pc); - - void write_filter(const io_priority_class& pc); - - future<> read_summary(const io_priority_class& pc); - - void write_summary(const io_priority_class& pc) { - write_simple(_components->summary, pc); - } - - // To be called when we try to load an SSTable that lacks a Summary. Could - // happen if old tools are being used. - future<> generate_summary(const io_priority_class& pc); - - future<> read_statistics(const io_priority_class& pc); - void write_statistics(const io_priority_class& pc); - // Rewrite statistics component by creating a temporary Statistics and - // renaming it into place of existing one. - void rewrite_statistics(const io_priority_class& pc); - // Validate metadata that's used to optimize reads when user specifies - // a clustering key range. If this specific metadata is incorrect, then - // it should be cleared. Otherwise, it could lead to bad decisions. - // Metadata is probably incorrect if generated by previous Scylla versions. - void validate_min_max_metadata(); - - void set_first_and_last_keys(); - - // Create one range for each clustering component of this sstable. - // Each range stores min and max value for that specific component. - // It does nothing if schema defines no clustering key, and it's supposed - // to be called when loading an existing sstable or after writing a new one. - void set_clustering_components_ranges(); - - future<> create_data(); - - future read_indexes(uint64_t summary_idx, const io_priority_class& pc); - - // Return an input_stream which reads exactly the specified byte range - // from the data file (after uncompression, if the file is compressed). - // Unlike data_read() below, this method does not read the entire byte - // range into memory all at once. Rather, this method allows reading the - // data incrementally as a stream. Knowing in advance the exact amount - // of bytes to be read using this stream, we can make better choices - // about the buffer size to read, and where exactly to stop reading - // (even when a large buffer size is used). - input_stream data_stream(uint64_t pos, size_t len, const io_priority_class& pc, - lw_shared_ptr history); - - // Read exactly the specific byte range from the data file (after - // uncompression, if the file is compressed). This can be used to read - // a specific row from the data file (its position and length can be - // determined using the index file). - // This function is intended (and optimized for) random access, not - // for iteration through all the rows. - future> data_read(uint64_t pos, size_t len, const io_priority_class& pc); - - future read_summary_entry(size_t i); - - // FIXME: pending on Bloom filter implementation - bool filter_has_key(const schema& s, const dht::decorated_key& dk) { return filter_has_key(key::from_partition_key(s, dk._key)); } - - // NOTE: functions used to generate sstable components. - void write_row_marker(file_writer& out, const row_marker& marker, const composite& clustering_key); - void write_clustered_row(file_writer& out, const schema& schema, const clustering_row& clustered_row); - void write_static_row(file_writer& out, const schema& schema, const row& static_row); - void write_cell(file_writer& out, atomic_cell_view cell, const column_definition& cdef); - void write_column_name(file_writer& out, const composite& clustering_key, const std::vector& column_names, composite::eoc marker = composite::eoc::none); - void write_column_name(file_writer& out, bytes_view column_names); - void write_range_tombstone(file_writer& out, const composite& start, composite::eoc start_marker, const composite& end, composite::eoc end_marker, std::vector suffix, const tombstone t); - void write_range_tombstone(file_writer& out, const composite& start, const composite& end, std::vector suffix, const tombstone t) { - write_range_tombstone(out, start, composite::eoc::start, end, composite::eoc::end, std::move(suffix), std::move(t)); - } - void write_collection(file_writer& out, const composite& clustering_key, const column_definition& cdef, collection_mutation_view collection); - void write_row_tombstone(file_writer& out, const composite& key, const row_tombstone t); - void write_deletion_time(file_writer& out, const tombstone t); - - stdx::optional> get_sample_indexes_for_range(const dht::token_range& range); -public: - std::unique_ptr get_index_reader(const io_priority_class& pc); - - future<> read_toc(); - - bool filter_has_key(const key& key) { - return _components->filter->is_present(bytes_view(key)); - } - - bool filter_has_key(utils::hashed_key key) { - return _components->filter->is_present(key); - } - - bool filter_has_key(const schema& s, partition_key_view key) { - return filter_has_key(key::from_partition_key(s, key)); - } - - static utils::hashed_key make_hashed_key(const schema& s, const partition_key& key); - - uint64_t filter_get_false_positive() { - return _filter_tracker.false_positive; - } - uint64_t filter_get_true_positive() { - return _filter_tracker.true_positive; - } - uint64_t filter_get_recent_false_positive() { - auto t = _filter_tracker.false_positive - _filter_tracker.last_false_positive; - _filter_tracker.last_false_positive = _filter_tracker.false_positive; - return t; - } - uint64_t filter_get_recent_true_positive() { - auto t = _filter_tracker.true_positive - _filter_tracker.last_true_positive; - _filter_tracker.last_true_positive = _filter_tracker.true_positive; - return t; - } - - const stats_metadata& get_stats_metadata() const { - auto entry = _components->statistics.contents.find(metadata_type::Stats); - if (entry == _components->statistics.contents.end()) { - throw std::runtime_error("Stats metadata not available"); - } - auto& p = entry->second; - if (!p) { - throw std::runtime_error("Statistics is malformed"); - } - const stats_metadata& s = *static_cast(p.get()); - return s; - } - const compaction_metadata& get_compaction_metadata() const { - auto entry = _components->statistics.contents.find(metadata_type::Compaction); - if (entry == _components->statistics.contents.end()) { - throw std::runtime_error("Compaction metadata not available"); - } - auto& p = entry->second; - if (!p) { - throw std::runtime_error("Statistics is malformed"); - } - const compaction_metadata& s = *static_cast(p.get()); - return s; - } - std::vector get_shards_for_this_sstable() const; - - uint32_t get_sstable_level() const { - return get_stats_metadata().sstable_level; - } - - // This will change sstable level only in memory. - void set_sstable_level(uint32_t); - - double get_compression_ratio() const; - - future<> mutate_sstable_level(uint32_t); - - const summary& get_summary() const { - return _components->summary; - } - - // Return sstable key range as range reading only the summary component. - future> - get_sstable_key_range(const schema& s); - - future> get_owning_shards_from_unloaded(); - - const std::vector>& clustering_components_ranges() const; - - // get sstable open info from a loaded sstable, which can be used to quickly open a sstable - // at another shard. - future get_open_info() &; - - // returns all info needed for a sstable to be shared with other shards. - static future load_shared_components(const schema_ptr& s, sstring dir, int generation, version_types v, format_types f, - const io_priority_class& pc = default_priority_class()); - - // Allow the test cases from sstable_test.cc to test private methods. We use - // a placeholder to avoid cluttering this class too much. The sstable_test class - // will then re-export as public every method it needs. - friend class test; - - friend class components_writer; - friend class sstable_writer; - friend class index_reader; - friend class mutation_reader::impl; -}; - -using shared_sstable = lw_shared_ptr; -using sstable_list = std::unordered_set; - -struct entry_descriptor { - sstring ks; - sstring cf; - sstable::version_types version; - int64_t generation; - sstable::format_types format; - sstable::component_type component; - - static entry_descriptor make_descriptor(sstring fname); - - entry_descriptor(sstring ks, sstring cf, sstable::version_types version, - int64_t generation, sstable::format_types format, - sstable::component_type component) - : ks(ks), cf(cf), version(version), generation(generation), format(format), component(component) {} -}; - -// Waits for all prior tasks started on current shard related to sstable management to finish. -// -// There may be asynchronous cleanup started from sstable destructor. Since we can't have blocking -// destructors in seastar, that cleanup is not waited for. It can be waited for using this function. -// It is also waited for when seastar exits. -future<> await_background_jobs(); - -// Invokes await_background_jobs() on all shards -future<> await_background_jobs_on_all_shards(); - -// When we compact sstables, we have to atomically instantiate the new -// sstable and delete the old ones. Otherwise, if we compact A+B into C, -// and if A contained some data that was tombstoned by B, and if B was -// deleted but A survived, then data from A will be resurrected. -// -// There are two violators of the requirement to atomically delete -// sstables: first sstable instantiation and deletion on disk is atomic -// only wrt. itself, not other sstables, and second when an sstable is -// shared among shard, so actual on-disk deletion of an sstable is deferred -// until all shards agree it can be deleted. -// -// When shutting down, we will not be able to complete some deletions. -// In that case, an atomic_deletion_cancelled exception is returned instead. -// -// This function only solves the second problem for now. -future<> delete_atomically(std::vector ssts); -future<> delete_atomically(std::vector ssts); - -// Cancel any deletions scheduled by delete_atomically() and make their -// futures complete (with an atomic_deletion_cancelled exception). -void cancel_atomic_deletions(); - -class components_writer { - sstable& _sst; - const schema& _schema; - file_writer& _out; - file_writer _index; - bool _index_needs_close; - uint64_t _max_sstable_size; - bool _tombstone_written; - // Remember first and last keys, which we need for the summary file. - stdx::optional _first_key, _last_key; - stdx::optional _partition_key; -private: - size_t get_offset(); - file_writer index_file_writer(sstable& sst, const io_priority_class& pc); - void ensure_tombstone_is_written() { - if (!_tombstone_written) { - consume(tombstone()); - } - } -public: - components_writer(sstable& sst, const schema& s, file_writer& out, uint64_t estimated_partitions, const sstable_writer_config&, const io_priority_class& pc); - ~components_writer(); - components_writer(components_writer&& o) : _sst(o._sst), _schema(o._schema), _out(o._out), _index(std::move(o._index)), - _index_needs_close(o._index_needs_close), _max_sstable_size(o._max_sstable_size), _tombstone_written(o._tombstone_written), - _first_key(std::move(o._first_key)), _last_key(std::move(o._last_key)), _partition_key(std::move(o._partition_key)) { - o._index_needs_close = false; - } - - void consume_new_partition(const dht::decorated_key& dk); - void consume(tombstone t); - stop_iteration consume(static_row&& sr); - stop_iteration consume(clustering_row&& cr); - stop_iteration consume(range_tombstone&& rt); - stop_iteration consume_end_of_partition(); - void consume_end_of_stream(); -}; - -class sstable_writer { - sstable& _sst; - const schema& _schema; - const io_priority_class& _pc; - bool _backup; - bool _leave_unsealed; - bool _compression_enabled; - std::unique_ptr _writer; - stdx::optional _components_writer; - shard_id _shard; // Specifies which shard new sstable will belong to. -private: - void prepare_file_writer(); - void finish_file_writer(); -public: - sstable_writer(sstable& sst, const schema& s, uint64_t estimated_partitions, - const sstable_writer_config&, const io_priority_class& pc, shard_id shard = engine().cpu_id()); - ~sstable_writer(); - sstable_writer(sstable_writer&& o) : _sst(o._sst), _schema(o._schema), _pc(o._pc), _backup(o._backup), - _leave_unsealed(o._leave_unsealed), _compression_enabled(o._compression_enabled), _writer(std::move(o._writer)), - _components_writer(std::move(o._components_writer)), _shard(o._shard) {} - void consume_new_partition(const dht::decorated_key& dk) { return _components_writer->consume_new_partition(dk); } - void consume(tombstone t) { _components_writer->consume(t); } - stop_iteration consume(static_row&& sr) { return _components_writer->consume(std::move(sr)); } - stop_iteration consume(clustering_row&& cr) { return _components_writer->consume(std::move(cr)); } - stop_iteration consume(range_tombstone&& rt) { return _components_writer->consume(std::move(rt)); } - stop_iteration consume_end_of_partition() { return _components_writer->consume_end_of_partition(); } - void consume_end_of_stream(); -}; - -// contains data for loading a sstable using components shared by a single shard; -// can be moved across shards -struct foreign_sstable_open_info { - foreign_ptr> components; - std::vector owners; - seastar::file_handle data; - seastar::file_handle index; - uint64_t generation; - sstable::version_types version; - sstable::format_types format; -}; - -// can only be used locally -struct sstable_open_info { - lw_shared_ptr components; - std::vector owners; - file data; - file index; -}; - -future<> init_metrics(); - -} diff --git a/scylla/sstables/streaming_histogram.hh b/scylla/sstables/streaming_histogram.hh deleted file mode 100644 index f0dd2bf..0000000 --- a/scylla/sstables/streaming_histogram.hh +++ /dev/null @@ -1,249 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (C) 2015 ScyllaDB - * - * Modified by ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "disk_types.hh" - -namespace sstables { - -/** - * Histogram that can be constructed from streaming of data. - * - * The algorithm is taken from following paper: - * Yael Ben-Haim and Elad Tom-Tov, "A Streaming Parallel Decision Tree Algorithm" (2010) - * http://jmlr.csail.mit.edu/papers/volume11/ben-haim10a/ben-haim10a.pdf - */ -struct streaming_histogram { - // TreeMap to hold bins of histogram. - disk_hash bin; - - // maximum bin size for this histogram - uint32_t max_bin_size; - - /** - * Creates a new histogram with max bin size of maxBinSize - * @param maxBinSize maximum number of bins this histogram can have - */ - streaming_histogram(int max_bin_size_p = 0) { - max_bin_size = max_bin_size_p; - } - - streaming_histogram(int max_bin_size_p, disk_hash&& bin_p) - { - max_bin_size = max_bin_size_p; - bin = std::move(bin_p); - } - - /** - * Adds new point p to this histogram. - * @param p - */ - void update(double p) - { - update(p, 1); - } - - /** - * Adds new point p with value m to this histogram. - * @param p - * @param m - */ - void update(double p, uint64_t m) { - auto it = bin.map.find(p); - if (it != bin.map.end()) { - bin.map[p] = it->second + m; - } else { - bin.map[p] = m; - // if bin size exceeds maximum bin size then trim down to max size - while (bin.map.size() > max_bin_size) { - // find points p1, p2 which have smallest difference - auto it = bin.map.begin(); - double p1 = it->first; - it++; - double p2 = it->first; - it++; - double smallestDiff = p2 - p1; - double q1 = p1, q2 = p2; - while(it != bin.map.end()) { - p1 = p2; - p2 = it->first; - it++; - double diff = p2 - p1; - if (diff < smallestDiff) - { - smallestDiff = diff; - q1 = p1; - q2 = p2; - } - } - // merge those two - uint64_t k1 = bin.map.erase(q1); - uint64_t k2 = bin.map.erase(q2); - bin.map.insert({(q1 * k1 + q2 * k2) / (k1 + k2), k1 + k2}); - } - } - } - - - /** - * Merges given histogram with this histogram. - * - * @param other histogram to merge - */ - void merge(streaming_histogram& other) - { - if (!other.bin.map.size()) - return; - - for (auto& it : other.bin.map) { - update(it.first, it.second); - } - } - - /** - * Function used to describe the type. - */ - template - auto describe_type(Describer f) { return f(max_bin_size, bin); } - - // FIXME: convert Java code below. -#if 0 - /** - * Calculates estimated number of points in interval [-inf,b]. - * - * @param b upper bound of a interval to calculate sum - * @return estimated number of points in a interval [-inf,b]. - */ - public double sum(double b) - { - double sum = 0; - // find the points pi, pnext which satisfy pi <= b < pnext - Map.Entry pnext = bin.higherEntry(b); - if (pnext == null) - { - // if b is greater than any key in this histogram, - // just count all appearance and return - for (Long value : bin.values()) - sum += value; - } - else - { - Map.Entry pi = bin.floorEntry(b); - if (pi == null) - return 0; - // calculate estimated count mb for point b - double weight = (b - pi.getKey()) / (pnext.getKey() - pi.getKey()); - double mb = pi.getValue() + (pnext.getValue() - pi.getValue()) * weight; - sum += (pi.getValue() + mb) * weight / 2; - - sum += pi.getValue() / 2.0; - for (Long value : bin.headMap(pi.getKey(), false).values()) - sum += value; - } - return sum; - } - - public Map getAsMap() - { - return Collections.unmodifiableMap(bin); - } - - public static class StreamingHistogramSerializer implements ISerializer - { - public void serialize(StreamingHistogram histogram, DataOutputPlus out) throws IOException - { - out.writeInt(histogram.maxBinSize); - Map entries = histogram.getAsMap(); - out.writeInt(entries.size()); - for (Map.Entry entry : entries.entrySet()) - { - out.writeDouble(entry.getKey()); - out.writeLong(entry.getValue()); - } - } - - public StreamingHistogram deserialize(DataInput in) throws IOException - { - int maxBinSize = in.readInt(); - int size = in.readInt(); - Map tmp = new HashMap<>(size); - for (int i = 0; i < size; i++) - { - tmp.put(in.readDouble(), in.readLong()); - } - - return new StreamingHistogram(maxBinSize, tmp); - } - - public long serializedSize(StreamingHistogram histogram, TypeSizes typeSizes) - { - long size = typeSizes.sizeof(histogram.maxBinSize); - Map entries = histogram.getAsMap(); - size += typeSizes.sizeof(entries.size()); - // size of entries = size * (8(double) + 8(long)) - size += entries.size() * (8 + 8); - return size; - } - } - - @Override - public boolean equals(Object o) - { - if (this == o) - return true; - - if (!(o instanceof StreamingHistogram)) - return false; - - StreamingHistogram that = (StreamingHistogram) o; - return maxBinSize == that.maxBinSize && bin.equals(that.bin); - } - - @Override - public int hashCode() - { - return Objects.hashCode(bin.hashCode(), maxBinSize); - } -#endif -}; - -} diff --git a/scylla/sstables/types.hh b/scylla/sstables/types.hh deleted file mode 100644 index e7c6d91..0000000 --- a/scylla/sstables/types.hh +++ /dev/null @@ -1,431 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "disk_types.hh" -#include "core/enum.hh" -#include "bytes.hh" -#include "gc_clock.hh" -#include "tombstone.hh" -#include "streaming_histogram.hh" -#include "utils/estimated_histogram.hh" -#include "column_name_helper.hh" -#include "sstables/key.hh" -#include "db/commitlog/replay_position.hh" -#include -#include -#include - -// While the sstable code works with char, bytes_view works with int8_t -// (signed char). Rather than change all the code, let's do a cast. -static inline bytes_view to_bytes_view(const temporary_buffer& b) { - using byte = bytes_view::value_type; - return bytes_view(reinterpret_cast(b.get()), b.size()); -} - -namespace sstables { - -struct deletion_time { - int32_t local_deletion_time; - int64_t marked_for_delete_at; - - template - auto describe_type(Describer f) { return f(local_deletion_time, marked_for_delete_at); } - - bool live() const { - return (local_deletion_time == std::numeric_limits::max()) && - (marked_for_delete_at == std::numeric_limits::min()); - } - - bool operator==(const deletion_time& d) { - return local_deletion_time == d.local_deletion_time && - marked_for_delete_at == d.marked_for_delete_at; - } - bool operator!=(const deletion_time& d) { - return !(*this == d); - } - explicit operator tombstone() { - return !live() ? tombstone(marked_for_delete_at, gc_clock::time_point(gc_clock::duration(local_deletion_time))) : tombstone(); - } -}; - -struct option { - disk_string key; - disk_string value; - - template - auto describe_type(Describer f) { return f(key, value); } -}; - -struct filter { - uint32_t hashes; - disk_array buckets; - - template - auto describe_type(Describer f) { return f(hashes, buckets); } - - // Create an always positive filter if nothing else is specified. - filter() : hashes(0), buckets({}) {} - explicit filter(int hashes, std::deque buckets) : hashes(hashes), buckets({std::move(buckets)}) {} -}; - -enum class indexable_element { - partition, - cell -}; - -// Exploded view of promoted index. -// Contains pointers into external buffer, so that buffer must be kept alive -// as long as this is used. -struct promoted_index { - struct entry { - composite_view start; - composite_view end; - uint64_t offset; - uint64_t width; - }; - deletion_time del_time; - std::deque entries; -}; - -class promoted_index_view { - bytes_view _bytes; -public: - explicit promoted_index_view(bytes_view v) : _bytes(v) {} - sstables::deletion_time get_deletion_time() const; - promoted_index parse(const schema&) const; - explicit operator bool() const { return !_bytes.empty(); } -}; - -class index_entry { - temporary_buffer _key; - mutable stdx::optional _token; - uint64_t _position; - temporary_buffer _promoted_index_bytes; - stdx::optional _promoted_index; -public: - - bytes_view get_key_bytes() const { - return to_bytes_view(_key); - } - - key_view get_key() const { - return key_view{get_key_bytes()}; - } - - decorated_key_view get_decorated_key() const { - if (!_token) { - _token.emplace(dht::global_partitioner().get_token(get_key())); - } - return decorated_key_view(*_token, get_key()); - } - - uint64_t position() const { - return _position; - } - - bytes_view get_promoted_index_bytes() const { - return to_bytes_view(_promoted_index_bytes); - } - - promoted_index_view get_promoted_index_view() const { - return promoted_index_view(get_promoted_index_bytes()); - } - - index_entry(temporary_buffer&& key, uint64_t position, temporary_buffer&& promoted_index) - : _key(std::move(key)), _position(position), _promoted_index_bytes(std::move(promoted_index)) {} - - index_entry(const index_entry& o) - : _key(o._key.get(), o._key.size()) - , _position(o._position) - , _promoted_index_bytes(o._promoted_index_bytes.get(), o._promoted_index_bytes.size()) - { } - - promoted_index* get_promoted_index(const schema& s) { - if (!_promoted_index) { - auto v = get_promoted_index_view(); - if (v) { - _promoted_index = v.parse(s); - } - } - return _promoted_index ? &*_promoted_index : nullptr; - } -}; - -struct summary_entry { - dht::token token; - bytes key; - uint64_t position; - - key_view get_key() const { - return key_view{key}; - } - - decorated_key_view get_decorated_key() const { - return decorated_key_view(token, get_key()); - } - - bool operator==(const summary_entry& x) const { - return position == x.position && key == x.key; - } -}; - -// Note: Sampling level is present in versions ka and higher. We ATM only support ka, -// so it's always there. But we need to make this conditional if we ever want to support -// other formats. -struct summary_ka { - struct header { - // The minimum possible amount of indexes per group (sampling level) - uint32_t min_index_interval; - // The number of entries in the Summary File - uint32_t size; - // The memory to be consumed to map the whole Summary into memory. - uint64_t memory_size; - // The actual sampling level. - uint32_t sampling_level; - // The number of entries the Summary *would* have if the sampling - // level would be equal to min_index_interval. - uint32_t size_at_full_sampling; - } header; - // The position in the Summary file for each of the indexes. - // NOTE1 that its actual size is determined by the "size" parameter, not - // by its preceding size_at_full_sampling - // NOTE2: They are laid out in *MEMORY* order, not BE. - // NOTE3: The sizes in this array represent positions in the memory stream, - // not the file. The memory stream effectively begins after the header, - // so every position here has to be added of sizeof(header). - std::deque positions; // can be large, so use a deque instead of a vector - std::deque entries; - - disk_string first_key; - disk_string last_key; - - // Used to determine when a summary entry should be added based on min_index_interval. - // NOTE: keys_written isn't part of on-disk format of summary. - size_t keys_written; - - // NOTE4: There is a structure written by Cassandra into the end of the Summary - // file, after the field last_key, that we haven't understand yet, but we know - // that its content isn't related to the summary itself. - // The structure is basically as follow: - // struct { disk_string; uint32_t; uint64_t; disk_string; } - // Another interesting fact about this structure is that it is apparently always - // filled with the same data. It's too early to judge that the data is useless. - // However, it was tested that Cassandra loads successfully a Summary file with - // this structure removed from it. Anyway, let's pay attention to it. - - /* - * Returns total amount of memory used by the summary - * Similar to origin off heap size - */ - uint64_t memory_footprint() const { - auto sz = sizeof(summary_entry) * entries.size() + sizeof(uint32_t) * positions.size() + sizeof(*this); - sz += first_key.value.size() + last_key.value.size(); - for (auto& e : entries) { - sz += e.key.size(); - } - return sz; - } - - explicit operator bool() const { - return entries.size(); - } -}; -using summary = summary_ka; - -class file_writer; - -struct metadata { - virtual ~metadata() {} - virtual uint64_t serialized_size() const = 0; - virtual void write(file_writer& write) const = 0; -}; - -template -uint64_t serialized_size(const T& object); - -template -typename std::enable_if_t::value && !std::is_enum::value, void> -write(file_writer& out, const T& t); - -// serialized_size() implementation for metadata class -template -class metadata_base : public metadata { -public: - virtual uint64_t serialized_size() const override { - return sstables::serialized_size(static_cast(*this)); - } - virtual void write(file_writer& writer) const override { - return sstables::write(writer, static_cast(*this)); - } -}; - -struct validation_metadata : public metadata_base { - disk_string partitioner; - double filter_chance; - - template - auto describe_type(Describer f) { return f(partitioner, filter_chance); } -}; - -struct compaction_metadata : public metadata_base { - disk_array ancestors; - disk_array cardinality; - - template - auto describe_type(Describer f) { return f(ancestors, cardinality); } -}; - -struct ka_stats_metadata : public metadata_base { - utils::estimated_histogram estimated_row_size; - utils::estimated_histogram estimated_column_count; - db::replay_position position; - int64_t min_timestamp; - int64_t max_timestamp; - int32_t max_local_deletion_time; - double compression_ratio; - streaming_histogram estimated_tombstone_drop_time; - uint32_t sstable_level; - uint64_t repaired_at; - disk_array> min_column_names; - disk_array> max_column_names; - bool has_legacy_counter_shards; - - template - auto describe_type(Describer f) { - return f( - estimated_row_size, - estimated_column_count, - position, - min_timestamp, - max_timestamp, - max_local_deletion_time, - compression_ratio, - estimated_tombstone_drop_time, - sstable_level, - repaired_at, - min_column_names, - max_column_names, - has_legacy_counter_shards - ); - } -}; -using stats_metadata = ka_stats_metadata; - -struct disk_token_bound { - uint8_t exclusive; // really a boolean - disk_string token; - - template - auto describe_type(Describer f) { return f(exclusive, token); } -}; - -struct disk_token_range { - disk_token_bound left; - disk_token_bound right; - - template - auto describe_type(Describer f) { return f(left, right); } -}; - -// Scylla-specific sharding information. This is a set of token -// ranges that are spanned by this sstable. When loading the -// sstable, we can see which shards own data in the sstable by -// checking each such range. -struct sharding_metadata { - disk_array token_ranges; - - template - auto describe_type(Describer f) { return f(token_ranges); } -}; - - -// Numbers are found on disk, so they do matter. Also, setting their sizes of -// that of an uint32_t is a bit wasteful, but it simplifies the code a lot -// since we can now still use a strongly typed enum without introducing a -// notion of "disk-size" vs "memory-size". -enum class metadata_type : uint32_t { - Validation = 0, - Compaction = 1, - Stats = 2, -}; - - -enum class scylla_metadata_type : uint32_t { - Sharding = 1, -}; - -struct scylla_metadata { - disk_set_of_tagged_union - > data; - - template - auto describe_type(Describer f) { return f(data); } -}; - -static constexpr int DEFAULT_CHUNK_SIZE = 65536; - -// checksums are generated using adler32 algorithm. -struct checksum { - uint32_t chunk_size; - std::deque checksums; - - template - auto describe_type(Describer f) { return f(chunk_size, checksums); } -}; - -} - -namespace std { - -template <> -struct hash : enum_hash {}; - -} - -namespace sstables { - -struct statistics { - disk_hash hash; - std::unordered_map> contents; -}; - -enum class column_mask : uint8_t { - none = 0x0, - deletion = 0x01, - expiration = 0x02, - counter = 0x04, - counter_update = 0x08, - range_tombstone = 0x10, - shadowable = 0x40 -}; - -inline column_mask operator&(column_mask m1, column_mask m2) { - return column_mask(static_cast(m1) & static_cast(m2)); -} - -inline column_mask operator|(column_mask m1, column_mask m2) { - return column_mask(static_cast(m1) | static_cast(m2)); -} -} - diff --git a/scylla/sstables/writer.hh b/scylla/sstables/writer.hh deleted file mode 100644 index fe89f4a..0000000 --- a/scylla/sstables/writer.hh +++ /dev/null @@ -1,251 +0,0 @@ -/* - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "core/iostream.hh" -#include "core/fstream.hh" -#include "types.hh" -#include "compress.hh" -#include - -namespace sstables { - -class file_writer { - output_stream _out; - size_t _offset = 0; -public: - file_writer(file f, file_output_stream_options options) - : _out(make_file_output_stream(std::move(f), std::move(options))) {} - - file_writer(output_stream&& out) - : _out(std::move(out)) {} - - virtual ~file_writer() = default; - file_writer(file_writer&&) = default; - - future<> write(const char* buf, size_t n) { - _offset += n; - return _out.write(buf, n); - } - future<> write(const bytes& s) { - _offset += s.size(); - return _out.write(s); - } - future<> flush() { - return _out.flush(); - } - future<> close() { - return _out.close(); - } - size_t offset() { - return _offset; - } -}; - - -class sizing_data_sink : public data_sink_impl { - uint64_t& _size; -public: - explicit sizing_data_sink(uint64_t& dest) : _size(dest) { - _size = 0; - } - virtual temporary_buffer allocate_buffer(size_t size) { - return temporary_buffer(size); - } - virtual future<> put(net::packet data) override { - _size += data.len(); - return make_ready_future<>(); - } - virtual future<> put(std::vector> data) override { - _size += boost::accumulate(data | boost::adaptors::transformed(std::mem_fn(&temporary_buffer::size)), 0); - return make_ready_future<>(); - } - virtual future<> put(temporary_buffer buf) override { - _size += buf.size(); - return make_ready_future<>(); - } - virtual future<> flush() override { - return make_ready_future<>(); - } - virtual future<> close() override { - return make_ready_future<>(); - } -}; - -inline -output_stream -make_sizing_output_stream(uint64_t& dest) { - return output_stream(data_sink(std::make_unique(std::ref(dest))), 4096); -} - -// Must be called from a thread -template -uint64_t -serialized_size(const T& object) { - uint64_t size = 0; - auto writer = file_writer(make_sizing_output_stream(size)); - write(writer, object); - writer.flush().get(); - writer.close().get(); - return size; -} - -output_stream make_checksummed_file_output_stream(file f, struct checksum& cinfo, uint32_t& full_file_checksum, bool checksum_file, file_output_stream_options options); - -class checksummed_file_writer : public file_writer { - checksum _c; - uint32_t _full_checksum; -public: - checksummed_file_writer(file f, file_output_stream_options options, bool checksum_file = false) - : file_writer(make_checksummed_file_output_stream(std::move(f), _c, _full_checksum, checksum_file, options)) - , _c({uint32_t(std::min(size_t(DEFAULT_CHUNK_SIZE), size_t(options.buffer_size)))}) - , _full_checksum(init_checksum_adler32()) {} - - // Since we are exposing a reference to _full_checksum, we delete the move - // constructor. If it is moved, the reference will refer to the old - // location. - checksummed_file_writer(checksummed_file_writer&&) = delete; - checksummed_file_writer(const checksummed_file_writer&) = default; - - checksum& finalize_checksum() { - return _c; - } - uint32_t full_checksum() { - return _full_checksum; - } -}; - -class checksummed_file_data_sink_impl : public data_sink_impl { - output_stream _out; - struct checksum& _c; - uint32_t& _full_checksum; - bool _checksum_file; -public: - checksummed_file_data_sink_impl(file f, struct checksum& c, uint32_t& full_file_checksum, bool checksum_file, file_output_stream_options options) - : _out(make_file_output_stream(std::move(f), std::move(options))) - , _c(c) - , _full_checksum(full_file_checksum) - , _checksum_file(checksum_file) - {} - - future<> put(net::packet data) { abort(); } - virtual future<> put(temporary_buffer buf) override { - // bufs will usually be a multiple of chunk size, but this won't be the case for - // the last buffer being flushed. - - if (!_checksum_file) { - _full_checksum = checksum_adler32(_full_checksum, buf.begin(), buf.size()); - } else { - for (size_t offset = 0; offset < buf.size(); offset += _c.chunk_size) { - size_t size = std::min(size_t(_c.chunk_size), buf.size() - offset); - uint32_t per_chunk_checksum = init_checksum_adler32(); - - per_chunk_checksum = checksum_adler32(per_chunk_checksum, buf.begin() + offset, size); - _full_checksum = checksum_adler32_combine(_full_checksum, per_chunk_checksum, size); - _c.checksums.push_back(per_chunk_checksum); - } - } - auto f = _out.write(buf.begin(), buf.size()); - return f.then([buf = std::move(buf)] {}); - } - - virtual future<> close() { - // Nothing to do, because close at the file_stream level will call flush on us. - return _out.close(); - } -}; - -class checksummed_file_data_sink : public data_sink { -public: - checksummed_file_data_sink(file f, struct checksum& cinfo, uint32_t& full_file_checksum, bool checksum_file, file_output_stream_options options) - : data_sink(std::make_unique(std::move(f), cinfo, full_file_checksum, checksum_file, std::move(options))) {} -}; - -inline -output_stream make_checksummed_file_output_stream(file f, struct checksum& cinfo, uint32_t& full_file_checksum, bool checksum_file, file_output_stream_options options) { - auto buffer_size = options.buffer_size; - return output_stream(checksummed_file_data_sink(std::move(f), cinfo, full_file_checksum, checksum_file, std::move(options)), buffer_size, true); -} - -// compressed_file_data_sink_impl works as a filter for a file output stream, -// where the buffer flushed will be compressed and its checksum computed, then -// the result passed to a regular output stream. -class compressed_file_data_sink_impl : public data_sink_impl { - output_stream _out; - sstables::compression* _compression_metadata; - size_t _pos = 0; -public: - compressed_file_data_sink_impl(file f, sstables::compression* cm, file_output_stream_options options) - : _out(make_file_output_stream(std::move(f), options)) - , _compression_metadata(cm) {} - - future<> put(net::packet data) { abort(); } - virtual future<> put(temporary_buffer buf) override { - auto output_len = _compression_metadata->compress_max_size(buf.size()); - // account space for checksum that goes after compressed data. - temporary_buffer compressed(output_len + 4); - - // compress flushed data. - auto len = _compression_metadata->compress(buf.get(), buf.size(), compressed.get_write(), output_len); - if (len > output_len) { - throw std::runtime_error("possible overflow during compression"); - } - - _compression_metadata->offsets.elements.push_back(_pos); - // account compressed data + 32-bit checksum. - _pos += len + 4; - _compression_metadata->set_compressed_file_length(_pos); - // total length of the uncompressed data. - _compression_metadata->data_len += buf.size(); - - // compute 32-bit checksum for compressed data. - uint32_t per_chunk_checksum = checksum_adler32(compressed.get(), len); - _compression_metadata->update_full_checksum(per_chunk_checksum, len); - - // write checksum into buffer after compressed data. - write_be(compressed.get_write() + len, per_chunk_checksum); - - compressed.trim(len + 4); - - auto f = _out.write(compressed.get(), compressed.size()); - return f.then([compressed = std::move(compressed)] {}); - } - virtual future<> close() { - return _out.close(); - } -}; - -class compressed_file_data_sink : public data_sink { -public: - compressed_file_data_sink(file f, sstables::compression* cm, file_output_stream_options options) - : data_sink(std::make_unique( - std::move(f), cm, options)) {} -}; - -static inline output_stream make_compressed_file_output_stream(file f, file_output_stream_options options, sstables::compression* cm) { - // buffer of output stream is set to chunk length, because flush must - // happen every time a chunk was filled up. - auto outer_buffer_size = cm->uncompressed_chunk_length(); - return output_stream(compressed_file_data_sink(std::move(f), cm, options), outer_buffer_size, true); -} - -} diff --git a/scylla/stdx.hh b/scylla/stdx.hh deleted file mode 100644 index d2d78e4..0000000 --- a/scylla/stdx.hh +++ /dev/null @@ -1,27 +0,0 @@ - -/* - * Copyright (C) 2016 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -namespace std { namespace experimental {} } -namespace seastar { namespace stdx = std::experimental; } -using namespace seastar; diff --git a/scylla/streamed_mutation.cc b/scylla/streamed_mutation.cc deleted file mode 100644 index 55dcb75..0000000 --- a/scylla/streamed_mutation.cc +++ /dev/null @@ -1,688 +0,0 @@ -/* - * Copyright (C) 2016 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include -#include -#include - -#include "mutation.hh" -#include "streamed_mutation.hh" -#include "utils/move.hh" - -std::ostream& -operator<<(std::ostream& os, const clustering_row& row) { - return os << "{clustering_row: ck " << row._ck << " t " << row._t << " row_marker " << row._marker << " cells " << row._cells << "}"; -} - -std::ostream& -operator<<(std::ostream& os, const static_row& row) { - return os << "{static_row: "<< row._cells << "}"; -} - -std::ostream& operator<<(std::ostream& out, position_in_partition_view pos) { - out << "{position: " << pos._bound_weight << ":"; - if (pos._ck) { - out << *pos._ck; - } else { - out << "null"; - } - return out << "}"; -} - -std::ostream& operator<<(std::ostream& out, const position_in_partition& pos) { - return out << static_cast(pos); -} - -std::ostream& operator<<(std::ostream& out, const position_range& range) { - return out << "{" << range.start() << ", " << range.end() << "}"; -} - -mutation_fragment::mutation_fragment(static_row&& r) - : _kind(kind::static_row), _data(std::make_unique()) -{ - new (&_data->_static_row) static_row(std::move(r)); -} - -mutation_fragment::mutation_fragment(clustering_row&& r) - : _kind(kind::clustering_row), _data(std::make_unique()) -{ - new (&_data->_clustering_row) clustering_row(std::move(r)); -} - -mutation_fragment::mutation_fragment(range_tombstone&& r) - : _kind(kind::range_tombstone), _data(std::make_unique()) -{ - new (&_data->_range_tombstone) range_tombstone(std::move(r)); -} - -void mutation_fragment::destroy_data() noexcept -{ - switch (_kind) { - case kind::static_row: - _data->_static_row.~static_row(); - break; - case kind::clustering_row: - _data->_clustering_row.~clustering_row(); - break; - case kind::range_tombstone: - _data->_range_tombstone.~range_tombstone(); - break; - } -} - -namespace { - -struct get_key_visitor { - const clustering_key_prefix& operator()(const clustering_row& cr) { return cr.key(); } - const clustering_key_prefix& operator()(const range_tombstone& rt) { return rt.start; } - template - const clustering_key_prefix& operator()(const T&) { abort(); } -}; - -} - -const clustering_key_prefix& mutation_fragment::key() const -{ - assert(has_key()); - return visit(get_key_visitor()); -} - -void mutation_fragment::apply(const schema& s, mutation_fragment&& mf) -{ - assert(_kind == mf._kind); - assert(!is_range_tombstone()); - _data->_size_in_bytes = stdx::nullopt; - switch (_kind) { - case kind::static_row: - _data->_static_row.apply(s, std::move(mf._data->_static_row)); - mf._data->_static_row.~static_row(); - break; - case kind::clustering_row: - _data->_clustering_row.apply(s, std::move(mf._data->_clustering_row)); - mf._data->_clustering_row.~clustering_row(); - break; - default: abort(); - } - mf._data.reset(); -} - -position_in_partition_view mutation_fragment::position() const -{ - return visit([] (auto& mf) { return mf.position(); }); -} - -std::ostream& operator<<(std::ostream& os, const streamed_mutation& sm) { - auto& s = *sm.schema(); - fprint(os, "{%s.%s key %s streamed mutation}", s.ks_name(), s.cf_name(), sm.decorated_key()); - return os; -} - -std::ostream& operator<<(std::ostream& os, mutation_fragment::kind k) -{ - switch (k) { - case mutation_fragment::kind::static_row: return os << "static row"; - case mutation_fragment::kind::clustering_row: return os << "clustering row"; - case mutation_fragment::kind::range_tombstone: return os << "range tombstone"; - } - abort(); -} - -std::ostream& operator<<(std::ostream& os, const mutation_fragment& mf) { - os << "{mutation_fragment: " << mf._kind << " " << mf.position() << " "; - mf.visit([&os] (const auto& what) { - os << what; - }); - os << "}"; - return os; -} - -streamed_mutation make_empty_streamed_mutation(schema_ptr s, dht::decorated_key key, streamed_mutation::forwarding fwd) { - return streamed_mutation_from_mutation(mutation(std::move(key), std::move(s)), fwd); -} - -streamed_mutation streamed_mutation_from_mutation(mutation m, streamed_mutation::forwarding fwd) -{ - class reader final : public streamed_mutation::impl { - mutation _mutation; - position_in_partition::less_compare _cmp; - bool _static_row_done = false; - mutation_fragment_opt _rt; - mutation_fragment_opt _cr; - private: - void prepare_next_clustering_row() { - auto& crs = _mutation.partition().clustered_rows(); - while (true) { - auto re = crs.unlink_leftmost_without_rebalance(); - if (!re) { - break; - } - auto re_deleter = defer([re] { current_deleter()(re); }); - if (!re->dummy()) { - _cr = mutation_fragment(std::move(*re)); - break; - } - } - } - void prepare_next_range_tombstone() { - auto& rts = _mutation.partition().row_tombstones().tombstones(); - auto rt = rts.unlink_leftmost_without_rebalance(); - if (rt) { - auto rt_deleter = defer([rt] { current_deleter()(rt); }); - _rt = mutation_fragment(std::move(*rt)); - } - } - mutation_fragment_opt read_next() { - if (_cr && (!_rt || _cmp(_cr->position(), _rt->position()))) { - auto cr = move_and_disengage(_cr); - prepare_next_clustering_row(); - return cr; - } else if (_rt) { - auto rt = move_and_disengage(_rt); - prepare_next_range_tombstone(); - return rt; - } - return { }; - } - private: - void do_fill_buffer() { - if (!_static_row_done) { - _static_row_done = true; - if (!_mutation.partition().static_row().empty()) { - push_mutation_fragment(static_row(std::move(_mutation.partition().static_row()))); - } - } - while (!is_end_of_stream() && !is_buffer_full()) { - auto mfopt = read_next(); - if (mfopt) { - push_mutation_fragment(std::move(*mfopt)); - } else { - _end_of_stream = true; - } - } - } - public: - explicit reader(mutation m) - : streamed_mutation::impl(m.schema(), m.decorated_key(), m.partition().partition_tombstone()) - , _mutation(std::move(m)) - , _cmp(*_mutation.schema()) - { - auto mutation_destroyer = defer([this] { destroy_mutation(); }); - - prepare_next_clustering_row(); - prepare_next_range_tombstone(); - - do_fill_buffer(); - - mutation_destroyer.cancel(); - } - - void destroy_mutation() noexcept { - // After unlink_leftmost_without_rebalance() was called on a bi::set - // we need to complete destroying the tree using that function. - // clear_and_dispose() used by mutation_partition destructor won't - // work properly. - - auto& crs = _mutation.partition().clustered_rows(); - auto re = crs.unlink_leftmost_without_rebalance(); - while (re) { - current_deleter()(re); - re = crs.unlink_leftmost_without_rebalance(); - } - - auto& rts = _mutation.partition().row_tombstones().tombstones(); - auto rt = rts.unlink_leftmost_without_rebalance(); - while (rt) { - current_deleter()(rt); - rt = rts.unlink_leftmost_without_rebalance(); - } - } - - ~reader() { - destroy_mutation(); - } - - virtual future<> fill_buffer() override { - do_fill_buffer(); - return make_ready_future<>(); - } - }; - - auto sm = make_streamed_mutation(std::move(m)); - if (fwd) { - return make_forwardable(std::move(sm)); // FIXME: optimize - } - return std::move(sm); -} - -streamed_mutation streamed_mutation_from_forwarding_streamed_mutation(streamed_mutation&& sm) -{ - class reader final : public streamed_mutation::impl { - streamed_mutation _sm; - bool _static_row_done = false; - public: - explicit reader(streamed_mutation&& sm) - : streamed_mutation::impl(sm.schema(), sm.decorated_key(), sm.partition_tombstone()) - , _sm(std::move(sm)) - { } - - virtual future<> fill_buffer() override { - if (!_static_row_done) { - _static_row_done = true; - return _sm().then([this] (auto&& mf) { - if (mf) { - this->push_mutation_fragment(std::move(*mf)); - } - return _sm.fast_forward_to(query::clustering_range{}).then([this] { - return this->fill_buffer(); - }); - }); - } - return do_until([this] { return is_end_of_stream() || is_buffer_full(); }, [this] { - return _sm().then([this] (auto&& mf) { - if (mf) { - this->push_mutation_fragment(std::move(*mf)); - } else { - _end_of_stream = true; - } - }); - }); - } - }; - - return make_streamed_mutation(std::move(sm)); -} - -streamed_mutation make_forwardable(streamed_mutation m) { - class reader : public streamed_mutation::impl { - streamed_mutation _sm; - position_range _current = position_range::for_static_row(); - mutation_fragment_opt _next; - private: - // When resolves, _next is engaged or _end_of_stream is set. - future<> ensure_next() { - if (_next) { - return make_ready_future<>(); - } - return _sm().then([this] (auto&& mfo) { - _next = std::move(mfo); - if (!_next) { - _end_of_stream = true; - } - }); - } - public: - explicit reader(streamed_mutation sm) - : impl(sm.schema(), std::move(sm.decorated_key()), sm.partition_tombstone()) - , _sm(std::move(sm)) - { } - - virtual future<> fill_buffer() override { - return repeat([this] { - if (is_buffer_full()) { - return make_ready_future(stop_iteration::yes); - } - return ensure_next().then([this] { - if (is_end_of_stream()) { - return stop_iteration::yes; - } - position_in_partition::less_compare cmp(*_sm.schema()); - if (!cmp(_next->position(), _current.end())) { - _end_of_stream = true; - // keep _next, it may be relevant for next range - return stop_iteration::yes; - } - if (_next->relevant_for_range(*_schema, _current.start())) { - push_mutation_fragment(std::move(*_next)); - } - _next = {}; - return stop_iteration::no; - }); - }); - } - - virtual future<> fast_forward_to(position_range pr) override { - _current = std::move(pr); - _end_of_stream = false; - forward_buffer_to(_current.start()); - return make_ready_future<>(); - } - }; - - return make_streamed_mutation(std::move(m)); -} - -class mutation_merger final : public streamed_mutation::impl { - std::vector _original_readers; - std::vector _next_readers; - // FIXME: do not store all in-flight clustering rows in memory - struct row_and_reader { - mutation_fragment row; - streamed_mutation* reader; - }; - std::vector _readers; - range_tombstone_stream _deferred_tombstones; -private: - void read_next() { - if (_readers.empty()) { - auto rt = _deferred_tombstones.get_next(); - if (rt) { - push_mutation_fragment(std::move(*rt)); - } else { - _end_of_stream = true; - } - return; - } - - position_in_partition::less_compare cmp(*_schema); - auto heap_compare = [&] (auto& a, auto& b) { return cmp(b.row.position(), a.row.position()); }; - - auto result = [&] { - auto rt = _deferred_tombstones.get_next(_readers.front().row); - if (rt) { - return std::move(*rt); - } - boost::range::pop_heap(_readers, heap_compare); - auto mf = std::move(_readers.back().row); - _next_readers.emplace_back(std::move(_readers.back().reader)); - _readers.pop_back(); - return std::move(mf); - }(); - - while (!_readers.empty()) { - if (cmp(result.position(), _readers.front().row.position())) { - break; - } - boost::range::pop_heap(_readers, heap_compare); - if (result.is_range_tombstone()) { - auto remainder = result.as_mutable_range_tombstone().apply(*_schema, std::move(_readers.back().row).as_range_tombstone()); - if (remainder) { - _deferred_tombstones.apply(std::move(*remainder)); - } - } else { - result.apply(*_schema, std::move(_readers.back().row)); - } - _next_readers.emplace_back(std::move(_readers.back().reader)); - _readers.pop_back(); - } - - push_mutation_fragment(std::move(result)); - } - - void do_fill_buffer() { - position_in_partition::less_compare cmp(*_schema); - auto heap_compare = [&] (auto& a, auto& b) { return cmp(b.row.position(), a.row.position()); }; - - for (auto& rd : _next_readers) { - if (rd->is_buffer_empty()) { - assert(rd->is_end_of_stream()); - continue; - } - _readers.emplace_back(row_and_reader { rd->pop_mutation_fragment(), std::move(rd) }); - boost::range::push_heap(_readers, heap_compare); - } - _next_readers.clear(); - - read_next(); - } - void prefill_buffer() { - while (!is_end_of_stream() && !is_buffer_full()) { - for (auto& rd : _next_readers) { - if (rd->is_buffer_empty() && !rd->is_end_of_stream()) { - return; - } - } - do_fill_buffer(); - } - } - - static tombstone merge_partition_tombstones(const std::vector& readers) { - tombstone t; - for (auto& r : readers) { - t.apply(r.partition_tombstone()); - } - return t; - } -protected: - virtual future<> fill_buffer() override { - while (!is_end_of_stream() && !is_buffer_full()) { - std::vector> more_data; - for (auto& rd : _next_readers) { - if (rd->is_buffer_empty() && !rd->is_end_of_stream()) { - auto f = rd->fill_buffer(); - if (!f.available() || f.failed()) { - more_data.emplace_back(std::move(f)); - } - } - } - if (!more_data.empty()) { - return parallel_for_each(std::move(more_data), [] (auto& f) { return std::move(f); }).then([this] { return fill_buffer(); }); - } - do_fill_buffer(); - } - return make_ready_future<>(); - } - virtual future<> fast_forward_to(position_range pr) override { - _deferred_tombstones.forward_to(pr.start()); - forward_buffer_to(pr.start()); - _end_of_stream = false; - - _next_readers.clear(); - _readers.clear(); - return parallel_for_each(_original_readers, [this, &pr] (streamed_mutation& rd) { - _next_readers.emplace_back(&rd); - return rd.fast_forward_to(pr); - }); - } -public: - mutation_merger(schema_ptr s, dht::decorated_key dk, std::vector readers) - : streamed_mutation::impl(s, std::move(dk), merge_partition_tombstones(readers)) - , _original_readers(std::move(readers)), _deferred_tombstones(*s) - { - _next_readers.reserve(_original_readers.size()); - _readers.reserve(_original_readers.size()); - for (auto& rd : _original_readers) { - _next_readers.emplace_back(&rd); - } - prefill_buffer(); - } -}; - -streamed_mutation merge_mutations(std::vector ms) -{ - assert(!ms.empty()); - return make_streamed_mutation(ms.back().schema(), ms.back().decorated_key(), std::move(ms)); -} - -mutation_fragment_opt range_tombstone_stream::do_get_next() -{ - auto& rt = *_list.tombstones().begin(); - auto mf = mutation_fragment(std::move(rt)); - _list.tombstones().erase(_list.begin()); - current_deleter()(&rt); - return mf; -} - -mutation_fragment_opt range_tombstone_stream::get_next(const rows_entry& re) -{ - if (!_list.empty()) { - return !_cmp(re.position(), _list.begin()->position()) ? do_get_next() : mutation_fragment_opt(); - } - return { }; -} - -mutation_fragment_opt range_tombstone_stream::get_next(const mutation_fragment& mf) -{ - if (!_list.empty()) { - return !_cmp(mf.position(), _list.begin()->position()) ? do_get_next() : mutation_fragment_opt(); - } - return { }; -} - -mutation_fragment_opt range_tombstone_stream::get_next(position_in_partition_view upper_bound) -{ - if (!_list.empty()) { - return _cmp(_list.begin()->position(), upper_bound) ? do_get_next() : mutation_fragment_opt(); - } - return { }; -} - -mutation_fragment_opt range_tombstone_stream::get_next() -{ - if (!_list.empty()) { - return do_get_next(); - } - return { }; -} - -void range_tombstone_stream::forward_to(position_in_partition_view pos) { - _list.erase_where([this, &pos] (const range_tombstone& rt) { - return !_cmp(pos, rt.end_position()); - }); -} - -void range_tombstone_stream::apply(const range_tombstone_list& list, const query::clustering_range& range) { - for (const range_tombstone& rt : list.slice(_schema, range)) { - _list.apply(_schema, rt); - } -} - -void range_tombstone_stream::reset() { - _inside_range_tombstone = false; - _list.clear(); -} - -streamed_mutation reverse_streamed_mutation(streamed_mutation sm) { - class reversing_steamed_mutation final : public streamed_mutation::impl { - streamed_mutation_opt _source; - mutation_fragment_opt _static_row; - std::stack _mutation_fragments; - private: - future<> consume_source() { - return repeat([&] { - return (*_source)().then([&] (mutation_fragment_opt mf) { - if (!mf) { - return stop_iteration::yes; - } else if (mf->is_static_row()) { - _static_row = std::move(mf); - } else { - if (mf->is_range_tombstone()) { - mf->as_mutable_range_tombstone().flip(); - } - _mutation_fragments.emplace(std::move(*mf)); - } - return stop_iteration::no; - }); - }).then([&] { - _source = { }; - }); - } - public: - explicit reversing_steamed_mutation(streamed_mutation sm) - : streamed_mutation::impl(sm.schema(), sm.decorated_key(), sm.partition_tombstone()) - , _source(std::move(sm)) - { } - - virtual future<> fill_buffer() override { - if (_source) { - return consume_source().then([this] { return fill_buffer(); }); - } - if (_static_row) { - push_mutation_fragment(std::move(*_static_row)); - _static_row = { }; - } - while (!is_end_of_stream() && !is_buffer_full()) { - if (_mutation_fragments.empty()) { - _end_of_stream = true; - } else { - push_mutation_fragment(std::move(_mutation_fragments.top())); - _mutation_fragments.pop(); - } - } - return make_ready_future<>(); - } - }; - - return make_streamed_mutation(std::move(sm)); -} - -streamed_mutation streamed_mutation_returning(schema_ptr s, dht::decorated_key key, std::vector frags, tombstone t) { - class reader : public streamed_mutation::impl { - public: - explicit reader(schema_ptr s, dht::decorated_key key, std::vector frags, tombstone t) - : streamed_mutation::impl(std::move(s), std::move(key), t) - { - for (auto&& f : frags) { - push_mutation_fragment(std::move(f)); - } - _end_of_stream = true; - } - - virtual future<> fill_buffer() override { - return make_ready_future<>(); - } - }; - return make_streamed_mutation(std::move(s), std::move(key), std::move(frags), t); -} - -position_range position_range::from_range(const query::clustering_range& range) { - auto bv_range = bound_view::from_range(range); - return { - position_in_partition(position_in_partition::range_tag_t(), bv_range.first), - position_in_partition(position_in_partition::range_tag_t(), bv_range.second) - }; -} - -position_range::position_range(const query::clustering_range& range) - : position_range(from_range(range)) -{ } - -position_range::position_range(query::clustering_range&& range) - : position_range(range) // FIXME: optimize -{ } - -void streamed_mutation::impl::forward_buffer_to(const position_in_partition& pos) { - _buffer.erase(std::remove_if(_buffer.begin(), _buffer.end(), [this, &pos] (mutation_fragment& f) { - return !f.relevant_for_range_assuming_after(*_schema, pos); - }), _buffer.end()); - - _buffer_size = 0; - for (auto&& f : _buffer) { - _buffer_size += f.memory_usage(); - } -} - -bool mutation_fragment::relevant_for_range(const schema& s, position_in_partition_view pos) const { - position_in_partition::less_compare cmp(s); - if (!cmp(position(), pos)) { - return true; - } - return relevant_for_range_assuming_after(s, pos); -} - -bool mutation_fragment::relevant_for_range_assuming_after(const schema& s, position_in_partition_view pos) const { - position_in_partition::less_compare cmp(s); - // Range tombstones overlapping with the new range are let in - return is_range_tombstone() && cmp(pos, as_range_tombstone().end_position()); -} - -std::ostream& operator<<(std::ostream& out, const range_tombstone_stream& rtl) { - return out << rtl._list; -} diff --git a/scylla/streamed_mutation.hh b/scylla/streamed_mutation.hh deleted file mode 100644 index 04bda34..0000000 --- a/scylla/streamed_mutation.hh +++ /dev/null @@ -1,790 +0,0 @@ -/* - * Copyright (C) 2016 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "mutation_partition.hh" -#include "utils/optimized_optional.hh" -#include "position_in_partition.hh" - -#include - -#include "stdx.hh" - -// mutation_fragments are the objects that streamed_mutation are going to -// stream. They can represent: -// - a static row -// - a clustering row -// - a range tombstone -// -// There exists an ordering (implemented in position_in_partition class) between -// mutation_fragment objects. It reflects the order in which content of -// partition appears in the sstables. - -class clustering_row { - clustering_key_prefix _ck; - row_tombstone _t; - row_marker _marker; - row _cells; -public: - explicit clustering_row(clustering_key_prefix ck) : _ck(std::move(ck)) { } - clustering_row(clustering_key_prefix ck, row_tombstone t, row_marker marker, row cells) - : _ck(std::move(ck)), _t(t), _marker(std::move(marker)), _cells(std::move(cells)) { - _t.maybe_shadow(marker); - } - clustering_row(const rows_entry& re) - : clustering_row(re.key(), re.row().deleted_at(), re.row().marker(), re.row().cells()) { } - clustering_row(rows_entry&& re) - : clustering_row(std::move(re.key()), re.row().deleted_at(), re.row().marker(), std::move(re.row().cells())) { } - - clustering_key_prefix& key() { return _ck; } - const clustering_key_prefix& key() const { return _ck; } - - void remove_tombstone() { _t = {}; } - row_tombstone tomb() const { return _t; } - - const row_marker& marker() const { return _marker; } - row_marker& marker() { return _marker; } - - const row& cells() const { return _cells; } - row& cells() { return _cells; } - - bool empty() const { - return !_t && _marker.is_missing() && _cells.empty(); - } - - void apply(const schema& s, clustering_row&& cr) { - _marker.apply(std::move(cr._marker)); - _t.apply(cr._t, _marker); - _cells.apply(s, column_kind::regular_column, std::move(cr._cells)); - } - void apply(const schema& s, const clustering_row& cr) { - _marker.apply(cr._marker); - _t.apply(cr._t, _marker); - _cells.apply(s, column_kind::regular_column, cr._cells); - } - void set_cell(const column_definition& def, atomic_cell_or_collection&& value) { - _cells.apply(def, std::move(value)); - } - void apply(row_marker rm) { - _marker.apply(std::move(rm)); - _t.maybe_shadow(_marker); - } - void apply(tombstone t) { - _t.apply(t); - } - void apply(shadowable_tombstone t) { - _t.apply(t, _marker); - } - void apply(const schema& s, const rows_entry& r) { - _marker.apply(r.row().marker()); - _t.apply(r.row().deleted_at(), _marker); - _cells.apply(s, column_kind::regular_column, r.row().cells()); - } - - position_in_partition_view position() const; - - size_t external_memory_usage() const { - return _ck.external_memory_usage() + _cells.external_memory_usage(); - } - - size_t memory_usage() const { - return sizeof(clustering_row) + external_memory_usage(); - } - - bool equal(const schema& s, const clustering_row& other) const { - return _ck.equal(s, other._ck) - && _t == other._t - && _marker == other._marker - && _cells.equal(column_kind::static_column, s, other._cells, s); - } - - friend std::ostream& operator<<(std::ostream& os, const clustering_row& row); -}; - -class static_row { - row _cells; -public: - static_row() = default; - explicit static_row(const row& r) : _cells(r) { } - explicit static_row(row&& r) : _cells(std::move(r)) { } - - row& cells() { return _cells; } - const row& cells() const { return _cells; } - - bool empty() const { - return _cells.empty(); - } - - void apply(const schema& s, const row& r) { - _cells.apply(s, column_kind::static_column, r); - } - void apply(const schema& s, static_row&& sr) { - _cells.apply(s, column_kind::static_column, std::move(sr._cells)); - } - void set_cell(const column_definition& def, atomic_cell_or_collection&& value) { - _cells.apply(def, std::move(value)); - } - - position_in_partition_view position() const; - - size_t external_memory_usage() const { - return _cells.external_memory_usage(); - } - - size_t memory_usage() const { - return sizeof(static_row) + external_memory_usage(); - } - - bool equal(const schema& s, const static_row& other) const { - return _cells.equal(column_kind::static_column, s, other._cells, s); - } - - friend std::ostream& operator<<(std::ostream& is, const static_row& row); -}; - -class mutation_fragment { -public: - enum class kind { - static_row, - clustering_row, - range_tombstone, - }; -private: - struct data { - data() { } - ~data() { } - - stdx::optional _size_in_bytes; - union { - static_row _static_row; - clustering_row _clustering_row; - range_tombstone _range_tombstone; - }; - }; -private: - kind _kind; - std::unique_ptr _data; - - mutation_fragment() = default; - explicit operator bool() const noexcept { return bool(_data); } - void destroy_data() noexcept; - friend class optimized_optional; - - friend class position_in_partition; -public: - mutation_fragment(static_row&& r); - mutation_fragment(clustering_row&& r); - mutation_fragment(range_tombstone&& r); - - mutation_fragment(const mutation_fragment& o) - : _kind(o._kind), _data(std::make_unique()) { - switch(_kind) { - case kind::static_row: - new (&_data->_static_row) static_row(o._data->_static_row); - break; - case kind::clustering_row: - new (&_data->_clustering_row) clustering_row(o._data->_clustering_row); - break; - case kind::range_tombstone: - new (&_data->_range_tombstone) range_tombstone(o._data->_range_tombstone); - break; - } - } - mutation_fragment(mutation_fragment&& other) = default; - mutation_fragment& operator=(const mutation_fragment& other) { - if (this != &other) { - mutation_fragment copy(other); - this->~mutation_fragment(); - new (this) mutation_fragment(std::move(copy)); - } - return *this; - } - mutation_fragment& operator=(mutation_fragment&& other) noexcept { - if (this != &other) { - this->~mutation_fragment(); - new (this) mutation_fragment(std::move(other)); - } - return *this; - } - [[gnu::always_inline]] - ~mutation_fragment() { - if (_data) { - destroy_data(); - } - } - - position_in_partition_view position() const; - - // Checks if this fragment may be relevant for any range starting at given position. - bool relevant_for_range(const schema& s, position_in_partition_view pos) const; - - // Like relevant_for_range() but makes use of assumption that pos is greater - // than the starting position of this fragment. - bool relevant_for_range_assuming_after(const schema& s, position_in_partition_view pos) const; - - bool has_key() const { return !is_static_row(); } - // Requirements: has_key() == true - const clustering_key_prefix& key() const; - - kind mutation_fragment_kind() const { return _kind; } - - bool is_static_row() const { return _kind == kind::static_row; } - bool is_clustering_row() const { return _kind == kind::clustering_row; } - bool is_range_tombstone() const { return _kind == kind::range_tombstone; } - - static_row& as_mutable_static_row() { - _data->_size_in_bytes = stdx::nullopt; - return _data->_static_row; - } - clustering_row& as_mutable_clustering_row() { - _data->_size_in_bytes = stdx::nullopt; - return _data->_clustering_row; - } - range_tombstone& as_mutable_range_tombstone() { - _data->_size_in_bytes = stdx::nullopt; - return _data->_range_tombstone; - } - - static_row&& as_static_row() && { return std::move(_data->_static_row); } - clustering_row&& as_clustering_row() && { return std::move(_data->_clustering_row); } - range_tombstone&& as_range_tombstone() && { return std::move(_data->_range_tombstone); } - - const static_row& as_static_row() const & { return _data->_static_row; } - const clustering_row& as_clustering_row() const & { return _data->_clustering_row; } - const range_tombstone& as_range_tombstone() const & { return _data->_range_tombstone; } - - // Requirements: mutation_fragment_kind() == mf.mutation_fragment_kind() && !is_range_tombstone() - void apply(const schema& s, mutation_fragment&& mf); - - /* - template - concept bool MutationFragmentConsumer() { - return requires(T t, static_row sr, clustering_row cr, range_tombstone rt) { - { t.consume(std::move(sr)) } -> ReturnType; - { t.consume(std::move(cr)) } -> ReturnType; - { t.consume(std::move(rt)) } -> ReturnType; - }; - } - */ - template - decltype(auto) consume(Consumer& consumer) && { - switch (_kind) { - case kind::static_row: - return consumer.consume(std::move(_data->_static_row)); - case kind::clustering_row: - return consumer.consume(std::move(_data->_clustering_row)); - case kind::range_tombstone: - return consumer.consume(std::move(_data->_range_tombstone)); - } - abort(); - } - - /* - template - concept bool MutationFragmentVisitor() { - return requires(T t, const static_row& sr, const clustering_row& cr, const range_tombstone& rt) { - { t(sr) } -> ReturnType; - { t(cr) } -> ReturnType; - { t(rt) } -> ReturnType; - }; - } - */ - template - decltype(auto) visit(Visitor&& visitor) const { - switch (_kind) { - case kind::static_row: - return visitor(as_static_row()); - case kind::clustering_row: - return visitor(as_clustering_row()); - case kind::range_tombstone: - return visitor(as_range_tombstone()); - } - abort(); - } - - size_t memory_usage() const { - if (!_data->_size_in_bytes) { - _data->_size_in_bytes = sizeof(data) + visit([] (auto& mf) { return mf.external_memory_usage(); }); - } - return *_data->_size_in_bytes; - } - - bool equal(const schema& s, const mutation_fragment& other) const { - if (other._kind != _kind) { - return false; - } - switch(_kind) { - case kind::static_row: - return as_static_row().equal(s, other.as_static_row()); - case kind::clustering_row: - return as_clustering_row().equal(s, other.as_clustering_row()); - case kind::range_tombstone: - return as_range_tombstone().equal(s, other.as_range_tombstone()); - } - abort(); - } - - friend std::ostream& operator<<(std::ostream&, const mutation_fragment& mf); -}; - -inline position_in_partition_view static_row::position() const -{ - return position_in_partition_view(position_in_partition_view::static_row_tag_t()); -} - -inline position_in_partition_view clustering_row::position() const -{ - return position_in_partition_view(position_in_partition_view::clustering_row_tag_t(), _ck); -} - -std::ostream& operator<<(std::ostream&, mutation_fragment::kind); - -std::ostream& operator<<(std::ostream&, const mutation_fragment& mf); - -template<> -struct move_constructor_disengages { - enum { value = true }; -}; -using mutation_fragment_opt = optimized_optional; - -// streamed_mutation represents a mutation in a form of a stream of -// mutation_fragments. streamed_mutation emits mutation fragments in the order -// they should appear in the sstables, i.e. static row is always the first one, -// then clustering rows and range tombstones are emitted according to the -// lexicographical ordering of their clustering keys and bounds of the range -// tombstones. -// -// The ordering of mutation_fragments also guarantees that by the time the -// consumer sees a clustering row it has already received all relevant tombstones. -// -// Partition key and partition tombstone are not streamed and is part of the -// streamed_mutation itself. -class streamed_mutation { -public: - // Determines whether streamed_mutation is in forwarding mode or not. - // - // In forwarding mode the stream does not return all fragments right away, - // but only those belonging to the current clustering range. Initially - // current range only covers the static row. The stream can be forwarded - // (even before end-of- stream) to a later range with fast_forward_to(). - // Forwarding doesn't change initial restrictions of the stream, it can - // only be used to skip over data. - // - // Monotonicity of positions is preserved by forwarding. That is fragments - // emitted after forwarding will have greater positions than any fragments - // emitted before forwarding. - // - // For any range, all range tombstones relevant for that range which are - // present in the original stream will be emitted. Range tombstones - // emitted before forwarding which overlap with the new range are not - // necessarily re-emitted. - // - // When streamed_mutation is not in forwarding mode, fast_forward_to() - // cannot be used. - class forwarding_tag; - using forwarding = bool_class; - - // streamed_mutation uses batching. The mutation implementations are - // supposed to fill a buffer with mutation fragments until is_buffer_full() - // or end of stream is encountered. - class impl { - circular_buffer _buffer; - size_t _buffer_size = 0; - protected: - static constexpr size_t max_buffer_size_in_bytes = 8 * 1024; - - schema_ptr _schema; - dht::decorated_key _key; - tombstone _partition_tombstone; - - bool _end_of_stream = false; - - friend class streamed_mutation; - protected: - template - void push_mutation_fragment(Args&&... args) { - auto mf = mutation_fragment(std::forward(args)...); - _buffer_size += mf.memory_usage(); - _buffer.emplace_back(std::move(mf)); - } - public: - explicit impl(schema_ptr s, dht::decorated_key dk, tombstone pt) - : _schema(std::move(s)), _key(std::move(dk)), _partition_tombstone(pt) - { } - - virtual ~impl() { } - virtual future<> fill_buffer() = 0; - - // See streamed_mutation::fast_forward_to(). - virtual future<> fast_forward_to(position_range) { - throw std::bad_function_call(); // FIXME: make pure virtual after implementing everywhere. - } - - bool is_end_of_stream() const { return _end_of_stream; } - bool is_buffer_empty() const { return _buffer.empty(); } - bool is_buffer_full() const { return _buffer_size >= max_buffer_size_in_bytes; } - - mutation_fragment pop_mutation_fragment() { - auto mf = std::move(_buffer.front()); - _buffer.pop_front(); - _buffer_size -= mf.memory_usage(); - return mf; - } - - future operator()() { - if (is_buffer_empty()) { - if (is_end_of_stream()) { - return make_ready_future(); - } - return fill_buffer().then([this] { return operator()(); }); - } - return make_ready_future(pop_mutation_fragment()); - } - - // Removes all fragments from the buffer which are not relevant for any range starting at given position. - // It is assumed that pos is greater than positions of fragments already in the buffer. - void forward_buffer_to(const position_in_partition& pos); - }; -private: - std::unique_ptr _impl; - - streamed_mutation() = default; - explicit operator bool() const { return bool(_impl); } - friend class optimized_optional; -public: - explicit streamed_mutation(std::unique_ptr i) - : _impl(std::move(i)) { } - - const partition_key& key() const { return _impl->_key.key(); } - const dht::decorated_key& decorated_key() const { return _impl->_key; } - - const schema_ptr& schema() const { return _impl->_schema; } - - tombstone partition_tombstone() const { return _impl->_partition_tombstone; } - - bool is_end_of_stream() const { return _impl->is_end_of_stream(); } - bool is_buffer_empty() const { return _impl->is_buffer_empty(); } - bool is_buffer_full() const { return _impl->is_buffer_full(); } - - mutation_fragment pop_mutation_fragment() { return _impl->pop_mutation_fragment(); } - - future<> fill_buffer() { return _impl->fill_buffer(); } - - // Skips to a later range of rows. - // The new range must not overlap with the current range. - // - // See docs of streamed_mutation::forwarding for semantics. - future<> fast_forward_to(position_range pr) { - return _impl->fast_forward_to(std::move(pr)); - } - - future operator()() { - return _impl->operator()(); - } -}; - -// Adapts streamed_mutation to a streamed_mutation which is in forwarding mode. -streamed_mutation make_forwardable(streamed_mutation); - -std::ostream& operator<<(std::ostream& os, const streamed_mutation& sm); - -template -streamed_mutation make_streamed_mutation(Args&&... args) { - return streamed_mutation(std::make_unique(std::forward(args)...)); -} - -template<> -struct move_constructor_disengages { - enum { value = true }; -}; -using streamed_mutation_opt = optimized_optional; - -/* -template -concept bool StreamedMutationConsumer() { - return MutationFragmentConsumer - && requires(T t, tombstone tomb) - { - { t.consume(tomb) } -> stop_iteration; - t.consume_end_of_stream(); - }; -} -*/ -template -auto consume(streamed_mutation& m, Consumer consumer) { - return do_with(std::move(consumer), [&m] (Consumer& c) { - if (c.consume(m.partition_tombstone()) == stop_iteration::yes) { - return make_ready_future().then([&] { return c.consume_end_of_stream(); }); - } - return repeat([&m, &c] { - if (m.is_buffer_empty()) { - if (m.is_end_of_stream()) { - return make_ready_future(stop_iteration::yes); - } - return m.fill_buffer().then([] { return stop_iteration::no; }); - } - return make_ready_future(m.pop_mutation_fragment().consume(c)); - }).then([&c] { - return c.consume_end_of_stream(); - }); - }); -} - -class mutation; - -streamed_mutation streamed_mutation_from_mutation(mutation, streamed_mutation::forwarding fwd = streamed_mutation::forwarding::no); -streamed_mutation streamed_mutation_returning(schema_ptr, dht::decorated_key, std::vector, tombstone t = {}); -streamed_mutation streamed_mutation_from_forwarding_streamed_mutation(streamed_mutation&&); - -//Requires all streamed_mutations to have the same schema. -streamed_mutation merge_mutations(std::vector); -streamed_mutation reverse_streamed_mutation(streamed_mutation); - -streamed_mutation make_empty_streamed_mutation(schema_ptr, dht::decorated_key, streamed_mutation::forwarding fwd = streamed_mutation::forwarding::no); - -// range_tombstone_stream is a helper object that simplifies producing a stream -// of range tombstones and merging it with a stream of clustering rows. -// Tombstones are added using apply() and retrieved using get_next(). -// -// get_next(const rows_entry&) and get_next(const mutation_fragment&) allow -// merging the stream of tombstones with a stream of clustering rows. If these -// overloads return disengaged optional it means that there is no tombstone -// in the stream that should be emitted before the object given as an argument. -// (And, consequently, if the optional is engaged that tombstone should be -// emitted first). After calling any of these overloads with a mutation_fragment -// which is at some position in partition P no range tombstone can be added to -// the stream which start bound is before that position. -// -// get_next() overload which doesn't take any arguments is used to return the -// remaining tombstones. After it was called no new tombstones can be added -// to the stream. -class range_tombstone_stream { - const schema& _schema; - position_in_partition::less_compare _cmp; - range_tombstone_list _list; - bool _inside_range_tombstone = false; -private: - mutation_fragment_opt do_get_next(); -public: - range_tombstone_stream(const schema& s) : _schema(s), _cmp(s), _list(s) { } - mutation_fragment_opt get_next(const rows_entry&); - mutation_fragment_opt get_next(const mutation_fragment&); - // Returns next fragment with position before upper_bound or disengaged optional if no such fragments are left. - mutation_fragment_opt get_next(position_in_partition_view upper_bound); - mutation_fragment_opt get_next(); - // Forgets all tombstones which are not relevant for any range starting at given position. - void forward_to(position_in_partition_view); - - void apply(range_tombstone&& rt) { - _list.apply(_schema, std::move(rt)); - } - void apply(const range_tombstone_list& list) { - _list.apply(_schema, list); - } - void apply(const range_tombstone_list&, const query::clustering_range&); - void reset(); - friend std::ostream& operator<<(std::ostream& out, const range_tombstone_stream&); -}; - -// mutation_hasher is an equivalent of hashing_partition_visitor for -// streamed mutations. -// -// mutation_hasher *IS NOT* compatible with hashing_partition_visitor. -// -// streamed_mutations do not guarantee that the emitted range tombstones -// are disjoint. However, we need to hash them after they are made disjoint -// because only in such form the hash won't depend on the unpredictable -// factors (e.g. which sstables contain which parts of the mutation). -template -class mutation_hasher { - const schema& _schema; - Hasher& _hasher; - - bound_view::compare _cmp; - range_tombstone_list _rt_list; - bool _inside_range_tombstone = false; -private: - void consume_cell(const column_definition& col, const atomic_cell_or_collection& cell) { - feed_hash(_hasher, col.name()); - feed_hash(_hasher, col.type->name()); - cell.feed_hash(_hasher, col); - } - - void consume_range_tombstone_start(const range_tombstone& rt) { - rt.start.feed_hash(_hasher, _schema); - feed_hash(_hasher, rt.start_kind); - feed_hash(_hasher, rt.tomb); - } - - void consume_range_tombstone_end(const range_tombstone& rt) { - rt.end.feed_hash(_hasher, _schema); - feed_hash(_hasher, rt.end_kind); - } - - void pop_rt_front() { - auto& rt = *_rt_list.tombstones().begin(); - _rt_list.tombstones().erase(_rt_list.begin()); - current_deleter()(&rt); - } - - void consume_range_tombstones_until(const clustering_row& cr) { - while (!_rt_list.empty()) { - auto it = _rt_list.begin(); - if (_inside_range_tombstone) { - if (_cmp(it->end_bound(), cr.key())) { - consume_range_tombstone_end(*it); - _inside_range_tombstone = false; - pop_rt_front(); - } else { - break; - } - } else { - if (_cmp(it->start_bound(), cr.key())) { - consume_range_tombstone_start(*it); - _inside_range_tombstone = true; - } else { - break; - } - } - } - } - - void consume_range_tombstones_until_end() { - if (_inside_range_tombstone) { - consume_range_tombstone_end(*_rt_list.begin()); - pop_rt_front(); - } - for (auto&& rt : _rt_list) { - consume_range_tombstone_start(rt); - consume_range_tombstone_end(rt); - } - } -public: - mutation_hasher(const schema& s, Hasher& h) - : _schema(s), _hasher(h), _cmp(s), _rt_list(s) { } - - stop_iteration consume(tombstone t) { - feed_hash(_hasher, t); - return stop_iteration::no; - } - - stop_iteration consume(const static_row& sr) { - sr.cells().for_each_cell([&] (column_id id, const atomic_cell_or_collection& cell) { - auto&& col = _schema.static_column_at(id); - consume_cell(col, cell); - }); - return stop_iteration::no; - } - - stop_iteration consume(const clustering_row& cr) { - consume_range_tombstones_until(cr); - - cr.key().feed_hash(_hasher, _schema); - feed_hash(_hasher, cr.tomb()); - feed_hash(_hasher, cr.marker()); - cr.cells().for_each_cell([&] (column_id id, const atomic_cell_or_collection& cell) { - auto&& col = _schema.regular_column_at(id); - consume_cell(col, cell); - }); - return stop_iteration::no; - } - - stop_iteration consume(range_tombstone&& rt) { - _rt_list.apply(_schema, std::move(rt)); - return stop_iteration::no; - } - - void consume_end_of_stream() { - consume_range_tombstones_until_end(); - } -}; - -// Consumes mutation fragments until StopCondition is true. -// The consumer will stop iff StopCondition returns true, in particular -// reaching the end of stream alone won't stop the reader. -template -GCC6_CONCEPT(requires requires(StopCondition stop, ConsumeMutationFragment consume_mf, ConsumeEndOfStream consume_eos, mutation_fragment mf) { - { stop() } -> bool; - { consume_mf(std::move(mf)) } -> void; - { consume_eos() } -> future<>; -}) -future<> consume_mutation_fragments_until(streamed_mutation& sm, StopCondition&& stop, - ConsumeMutationFragment&& consume_mf, ConsumeEndOfStream&& consume_eos) { - return do_until([stop] { return stop(); }, [&sm, stop, consume_mf, consume_eos] { - while (!sm.is_buffer_empty()) { - consume_mf(sm.pop_mutation_fragment()); - if (stop()) { - return make_ready_future<>(); - } - } - if (sm.is_end_of_stream()) { - return consume_eos(); - } - return sm.fill_buffer(); - }); -} - -GCC6_CONCEPT( - // F gets a stream element as an argument and returns the new value which replaces that element - // in the transformed stream. - template - concept bool StreamedMutationTranformer() { - return requires(F f, mutation_fragment mf, schema_ptr s) { - { f(std::move(mf)) } -> mutation_fragment - { f(s) } -> schema_ptr - }; - } -) - -// Creates a stream which is like sm but with transformation applied to the elements. -template -GCC6_CONCEPT( - requires StreamedMutationTranformer() -) -streamed_mutation transform(streamed_mutation sm, T t) { - class reader : public streamed_mutation::impl { - streamed_mutation _sm; - T _t; - public: - explicit reader(streamed_mutation sm, T&& t) - : impl(t(sm.schema()), sm.decorated_key(), sm.partition_tombstone()) - , _sm(std::move(sm)) - , _t(std::move(t)) - { } - - virtual future<> fill_buffer() override { - return _sm.fill_buffer().then([this] { - while (!_sm.is_buffer_empty()) { - push_mutation_fragment(_t(_sm.pop_mutation_fragment())); - } - _end_of_stream = _sm.is_end_of_stream(); - }); - } - - virtual future<> fast_forward_to(position_range pr) override { - _end_of_stream = false; - forward_buffer_to(pr.start()); - return _sm.fast_forward_to(std::move(pr)); - } - }; - return make_streamed_mutation(std::move(sm), std::move(t)); -} diff --git a/scylla/streaming/prepare_message.hh b/scylla/streaming/prepare_message.hh deleted file mode 100644 index dc5e477..0000000 --- a/scylla/streaming/prepare_message.hh +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "streaming/stream_request.hh" -#include "streaming/stream_summary.hh" - -namespace streaming { - -class prepare_message { -public: - /** - * Streaming requests - */ - std::vector requests; - - /** - * Summaries of streaming out - */ - std::vector summaries; - - uint32_t dst_cpu_id; - - prepare_message() = default; - prepare_message(std::vector reqs, std::vector sums, uint32_t dst_cpu_id_ = -1) - : requests(std::move(reqs)) - , summaries(std::move(sums)) - , dst_cpu_id(dst_cpu_id_) { - } -}; - -} // namespace streaming diff --git a/scylla/streaming/progress_info.cc b/scylla/streaming/progress_info.cc deleted file mode 100644 index 4518944..0000000 --- a/scylla/streaming/progress_info.cc +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "streaming/progress_info.hh" - -namespace streaming { - -std::ostream& operator<<(std::ostream& os, const progress_info& x) { - sstring dir = x.dir == progress_info::direction::OUT ? "sent to " : "received from "; - return os << sprint("%s %ld/(%f\%) %s %s", x.file_name, x.current_bytes, - x.current_bytes * 100 / x.total_bytes, dir, x.peer); -} - -} diff --git a/scylla/streaming/progress_info.hh b/scylla/streaming/progress_info.hh deleted file mode 100644 index 1062ca6..0000000 --- a/scylla/streaming/progress_info.hh +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "gms/inet_address.hh" -#include "core/sstring.hh" - -namespace streaming { - -/** - * ProgressInfo contains file transfer progress. - */ -class progress_info { -public: - using inet_address = gms::inet_address; - /** - * Direction of the stream. - */ - enum class direction { OUT, IN }; - - inet_address peer; - sstring file_name; - direction dir; - long current_bytes; - long total_bytes; - - progress_info() = default; - progress_info(inet_address _peer, sstring _file_name, direction _dir, long _current_bytes, long _total_bytes) - : peer(_peer) - , file_name(_file_name) - , dir(_dir) - , current_bytes(_current_bytes) - , total_bytes(_total_bytes) { - } - - /** - * @return true if file transfer is completed - */ - bool is_completed() { - return current_bytes >= total_bytes; - } - - friend std::ostream& operator<<(std::ostream& os, const progress_info& x); -}; - -} // namespace streaming diff --git a/scylla/streaming/session_info.cc b/scylla/streaming/session_info.cc deleted file mode 100644 index 7b15b4d..0000000 --- a/scylla/streaming/session_info.cc +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "streaming/session_info.hh" - -namespace streaming { - -void session_info::update_progress(progress_info new_progress) { - assert(peer == new_progress.peer); - auto& current_files = new_progress.dir == progress_info::direction::IN - ? receiving_files : sending_files; - current_files[new_progress.file_name] = new_progress; -} - -std::vector session_info::get_receiving_files() { - std::vector ret; - for (auto& x : receiving_files) { - ret.push_back(x.second); - } - return ret; -} - -std::vector session_info::get_sending_files() { - std::vector ret; - for (auto& x : sending_files) { - ret.push_back(x.second); - } - return ret; -} - -long session_info::get_total_size_in_progress(std::vector files) { - long total = 0; - for (auto& file : files) { - total += file.current_bytes; - } - return total; -} - -long session_info::get_total_files(std::vector& summaries) { - long total = 0; - for (auto& summary : summaries) { - total += summary.files; - } - return total; -} - -long session_info::get_total_sizes(std::vector& summaries) { - long total = 0; - for (auto& summary : summaries) - total += summary.total_size; - return total; -} - -long session_info::get_total_files_completed(std::vector files) { - long size = 0; - for (auto& x : files) { - if (x.is_completed()) { - size++; - } - } - return size; -} - -} // namespace streaming diff --git a/scylla/streaming/session_info.hh b/scylla/streaming/session_info.hh deleted file mode 100644 index 288742a..0000000 --- a/scylla/streaming/session_info.hh +++ /dev/null @@ -1,159 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "gms/inet_address.hh" -#include "streaming/stream_summary.hh" -#include "streaming/stream_session_state.hh" -#include "streaming/progress_info.hh" -#include -#include - -namespace streaming { - -/** - * Stream session info. - */ -class session_info { -public: - using inet_address = gms::inet_address; - inet_address peer; - /** Immutable collection of receiving summaries */ - std::vector receiving_summaries; - /** Immutable collection of sending summaries*/ - std::vector sending_summaries; - /** Current session state */ - stream_session_state state; - - std::map receiving_files; - std::map sending_files; - - session_info() = default; - session_info(inet_address peer_, - std::vector receiving_summaries_, - std::vector sending_summaries_, - stream_session_state state_) - : peer(peer_) - , receiving_summaries(std::move(receiving_summaries_)) - , sending_summaries(std::move(sending_summaries_)) - , state(state_) { - } - - bool is_failed() const { - return state == stream_session_state::FAILED; - } - - /** - * Update progress of receiving/sending file. - * - * @param newProgress new progress info - */ - void update_progress(progress_info new_progress); - - std::vector get_receiving_files(); - - std::vector get_sending_files(); - - /** - * @return total number of files already received. - */ - long get_total_files_received() { - return get_total_files_completed(get_receiving_files()); - } - - /** - * @return total number of files already sent. - */ - long get_total_files_sent() { - return get_total_files_completed(get_sending_files()); - } - - /** - * @return total size(in bytes) already received. - */ - long get_total_size_received() { - return get_total_size_in_progress(get_receiving_files()); - } - - /** - * @return total size(in bytes) already sent. - */ - long get_total_size_sent() { - return get_total_size_in_progress(get_sending_files()); - } - - /** - * @return total number of files to receive in the session - */ - long get_total_files_to_receive() { - return get_total_files(receiving_summaries); - } - - /** - * @return total number of files to send in the session - */ - long get_total_files_to_send() { - return get_total_files(sending_summaries); - } - - /** - * @return total size(in bytes) to receive in the session - */ - long get_total_size_to_receive() { - return get_total_sizes(receiving_summaries); - } - - /** - * @return total size(in bytes) to send in the session - */ - long get_total_size_to_send() { - return get_total_sizes(sending_summaries); - } - -private: - long get_total_size_in_progress(std::vector files); - - long get_total_files(std::vector& summaries); - - long get_total_sizes(std::vector& summaries); - - long get_total_files_completed(std::vector files); -}; - -} // namespace streaming diff --git a/scylla/streaming/stream_coordinator.cc b/scylla/streaming/stream_coordinator.cc deleted file mode 100644 index 71b5202..0000000 --- a/scylla/streaming/stream_coordinator.cc +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "streaming/stream_detail.hh" -#include "streaming/stream_session_state.hh" -#include "streaming/stream_coordinator.hh" -#include "log.hh" - -namespace streaming { - -extern logging::logger sslog; - -using gms::inet_address; - -bool stream_coordinator::has_active_sessions() { - for (auto& x : _peer_sessions) { - auto state = x.second->get_state(); - if (state != stream_session_state::COMPLETE && state != stream_session_state::FAILED) { - return true; - } - } - return false; -} - -std::vector> stream_coordinator::get_all_stream_sessions() { - std::vector> results; - for (auto& x : _peer_sessions) { - results.push_back(x.second); - } - return results; -} - -std::vector stream_coordinator::get_all_session_info() { - std::vector results; - for (auto& x : _peer_sessions) { - auto& session = x.second; - results.push_back(session->get_session_info()); - } - return results; -} - -std::vector stream_coordinator::get_peer_session_info(inet_address peer) { - std::vector results; - auto it = _peer_sessions.find(peer); - if (it != _peer_sessions.end()) { - auto& session = it->second; - results.push_back(session->get_session_info()); - } - return results; -} - -bool stream_coordinator::is_receiving() { - return _is_receiving; -} - -std::set stream_coordinator::get_peers() { - std::set results; - for (auto& x : _peer_sessions) { - results.insert(x.first); - } - return results; -} - -void stream_coordinator::connect_all_stream_sessions() { - for (auto& x : _peer_sessions) { - auto& session = x.second; - session->start(); - sslog.info("[Stream #{}] Beginning stream session with {}", session->plan_id(), session->peer); - } -} - -} // namespace streaming diff --git a/scylla/streaming/stream_coordinator.hh b/scylla/streaming/stream_coordinator.hh deleted file mode 100644 index 486c20c..0000000 --- a/scylla/streaming/stream_coordinator.hh +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "gms/inet_address.hh" -#include "streaming/stream_session.hh" -#include "streaming/session_info.hh" -#include -#include - -namespace streaming { - -/** - * {@link StreamCoordinator} is a helper class that abstracts away maintaining multiple - * StreamSession and ProgressInfo instances per peer. - * - * This class coordinates multiple SessionStreams per peer in both the outgoing StreamPlan context and on the - * inbound StreamResultFuture context. - */ -class stream_coordinator { -public: - using inet_address = gms::inet_address; - -private: - class host_streaming_data; - std::map> _peer_sessions; - bool _is_receiving; - -public: - stream_coordinator(bool is_receiving = false) - : _is_receiving(is_receiving) { - } -public: - /** - * @return true if any stream session is active - */ - bool has_active_sessions(); - - std::vector> get_all_stream_sessions(); - - bool is_receiving(); - - void connect_all_stream_sessions(); - std::set get_peers(); - -public: - shared_ptr get_or_create_session(inet_address peer) { - auto& session = _peer_sessions[peer]; - if (!session) { - session = make_shared(peer); - } - return session; - } - - std::vector get_all_session_info(); - std::vector get_peer_session_info(inet_address peer); -}; - -} // namespace streaming diff --git a/scylla/streaming/stream_detail.hh b/scylla/streaming/stream_detail.hh deleted file mode 100644 index 4caf68b..0000000 --- a/scylla/streaming/stream_detail.hh +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "query-request.hh" -#include "mutation_reader.hh" -#include "utils/UUID.hh" -#include -#include "range.hh" -#include "dht/i_partitioner.hh" - -namespace streaming { - -struct stream_detail { - using UUID = utils::UUID; - UUID cf_id; - stream_detail() = default; - stream_detail(UUID cf_id_) - : cf_id(std::move(cf_id_)) { - } -}; - -} // namespace streaming diff --git a/scylla/streaming/stream_event.hh b/scylla/streaming/stream_event.hh deleted file mode 100644 index 2e105a0..0000000 --- a/scylla/streaming/stream_event.hh +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "utils/UUID.hh" -#include "gms/inet_address.hh" -#include "streaming/stream_session.hh" -#include "streaming/session_info.hh" -#include "streaming/progress_info.hh" - -namespace streaming { - -class stream_event { -public: - using UUID = utils::UUID; - enum class type { - STREAM_PREPARED, - STREAM_COMPLETE, - FILE_PROGRESS, - }; - - type event_type; - UUID plan_id; - - stream_event(type event_type_, UUID plan_id_) - : event_type(event_type_) - , plan_id(plan_id_) { - } -}; - -struct session_complete_event : public stream_event { - using inet_address = gms::inet_address; - inet_address peer; - bool success; - - session_complete_event(shared_ptr session) - : stream_event(stream_event::type::STREAM_COMPLETE, session->plan_id()) - , peer(session->peer) - , success(session->is_success()) { - } -}; - -struct progress_event : public stream_event { - using UUID = utils::UUID; - progress_info progress; - progress_event(UUID plan_id_, progress_info progress_) - : stream_event(stream_event::type::FILE_PROGRESS, plan_id_) - , progress(std::move(progress_)) { - } -}; - -struct session_prepared_event : public stream_event { - using UUID = utils::UUID; - session_info session; - session_prepared_event(UUID plan_id_, session_info session_) - : stream_event(stream_event::type::STREAM_PREPARED, plan_id_) - , session(std::move(session_)) { - } -}; - -} // namespace streaming diff --git a/scylla/streaming/stream_event_handler.hh b/scylla/streaming/stream_event_handler.hh deleted file mode 100644 index cc09e60..0000000 --- a/scylla/streaming/stream_event_handler.hh +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "streaming/stream_event.hh" - -namespace streaming { - -class stream_event_handler /* extends FutureCallback */ { -public: - /** - * Callback for various streaming events. - * - * @see StreamEvent.Type - * @param event Stream event. - */ - virtual void handle_stream_event(session_complete_event event) {} - virtual void handle_stream_event(progress_event event) {} - virtual void handle_stream_event(session_prepared_event event) {} - virtual ~stream_event_handler() {}; -}; - -} // namespace streaming diff --git a/scylla/streaming/stream_exception.hh b/scylla/streaming/stream_exception.hh deleted file mode 100644 index 1aff017..0000000 --- a/scylla/streaming/stream_exception.hh +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "streaming/stream_state.hh" -#include -#include - -namespace streaming { - -class stream_exception : public std::exception { -public: - stream_state state; - sstring msg; - stream_exception(stream_state s, sstring m) - : state(std::move(s)) - , msg(std::move(m)) { - } - virtual const char* what() const noexcept override { - return msg.c_str(); - } -}; - -} // namespace streaming diff --git a/scylla/streaming/stream_manager.cc b/scylla/streaming/stream_manager.cc deleted file mode 100644 index 34fee95..0000000 --- a/scylla/streaming/stream_manager.cc +++ /dev/null @@ -1,294 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "core/distributed.hh" -#include "streaming/stream_manager.hh" -#include "streaming/stream_result_future.hh" -#include "log.hh" -#include "streaming/stream_session_state.hh" -#include - -namespace streaming { - -extern logging::logger sslog; - -distributed _the_stream_manager; - - -stream_manager::stream_manager() { - namespace sm = seastar::metrics; - - _metrics.add_group("streaming", { - sm::make_derive("total_incoming_bytes", [this] { return get_progress_on_local_shard().bytes_received; }, - sm::description("This is a received bytes rate.")), - - sm::make_derive("total_outgoing_bytes", [this] { return get_progress_on_local_shard().bytes_sent; }, - sm::description("This is a sent bytes rate.")), - }); -} - -void stream_manager::register_sending(shared_ptr result) { -#if 0 - result.addEventListener(notifier); - // Make sure we remove the stream on completion (whether successful or not) - result.addListener(new Runnable() - { - public void run() - { - initiatedStreams.remove(result.planId); - } - }, MoreExecutors.sameThreadExecutor()); -#endif - _initiated_streams[result->plan_id] = std::move(result); -} - -void stream_manager::register_receiving(shared_ptr result) { -#if 0 - result->add_event_listener(notifier); - // Make sure we remove the stream on completion (whether successful or not) - result.addListener(new Runnable() - { - public void run() - { - receivingStreams.remove(result.planId); - } - }, MoreExecutors.sameThreadExecutor()); -#endif - _receiving_streams[result->plan_id] = std::move(result); -} - -shared_ptr stream_manager::get_sending_stream(UUID plan_id) { - auto it = _initiated_streams.find(plan_id); - if (it != _initiated_streams.end()) { - return it->second; - } - return {}; -} - -shared_ptr stream_manager::get_receiving_stream(UUID plan_id) { - auto it = _receiving_streams.find(plan_id); - if (it != _receiving_streams.end()) { - return it->second; - } - return {}; -} - -void stream_manager::remove_stream(UUID plan_id) { - sslog.debug("stream_manager: removing plan_id={}", plan_id); - _initiated_streams.erase(plan_id); - _receiving_streams.erase(plan_id); - // FIXME: Do not ignore the future - remove_progress_on_all_shards(plan_id).handle_exception([plan_id] (auto ep) { - sslog.info("stream_manager: Fail to remove progress for plan_id={}: {}", plan_id, ep); - }); -} - -void stream_manager::show_streams() { - for (auto& x : _initiated_streams) { - sslog.debug("stream_manager:initiated_stream: plan_id={}", x.first); - } - for (auto& x : _receiving_streams) { - sslog.debug("stream_manager:receiving_stream: plan_id={}", x.first); - } -} - -std::vector> stream_manager::get_all_streams() const { - std::vector> result; - for (auto& x : _initiated_streams) { - result.push_back(x.second); - } - for (auto& x : _receiving_streams) { - result.push_back(x.second); - } - return result; -} - -void stream_manager::update_progress(UUID cf_id, gms::inet_address peer, progress_info::direction dir, size_t fm_size) { - auto& sbytes = _stream_bytes[cf_id]; - if (dir == progress_info::direction::OUT) { - sbytes[peer].bytes_sent += fm_size; - } else { - sbytes[peer].bytes_received += fm_size; - } -} - -future<> stream_manager::update_all_progress_info() { - return seastar::async([this] { - for (auto sr: get_all_streams()) { - for (auto session : sr->get_coordinator()->get_all_stream_sessions()) { - session->update_progress().get(); - } - } - }); -} - -void stream_manager::remove_progress(UUID plan_id) { - _stream_bytes.erase(plan_id); -} - -stream_bytes stream_manager::get_progress(UUID plan_id, gms::inet_address peer) { - auto& sbytes = _stream_bytes[plan_id]; - return sbytes[peer]; -} - -stream_bytes stream_manager::get_progress(UUID plan_id) { - stream_bytes ret; - for (auto& x : _stream_bytes[plan_id]) { - ret += x.second; - } - return ret; -} - -future<> stream_manager::remove_progress_on_all_shards(UUID plan_id) { - return get_stream_manager().invoke_on_all([plan_id] (auto& sm) { - sm.remove_progress(plan_id); - }); -} - -future stream_manager::get_progress_on_all_shards(UUID plan_id, gms::inet_address peer) { - return get_stream_manager().map_reduce0( - [plan_id, peer] (auto& sm) { - return sm.get_progress(plan_id, peer); - }, - stream_bytes(), - std::plus() - ); -} - -future stream_manager::get_progress_on_all_shards(UUID plan_id) { - return get_stream_manager().map_reduce0( - [plan_id] (auto& sm) { - return sm.get_progress(plan_id); - }, - stream_bytes(), - std::plus() - ); -} - -future stream_manager::get_progress_on_all_shards(gms::inet_address peer) { - return get_stream_manager().map_reduce0( - [peer] (auto& sm) { - stream_bytes ret; - for (auto& sbytes : sm._stream_bytes) { - ret += sbytes.second[peer]; - } - return ret; - }, - stream_bytes(), - std::plus() - ); -} - -future stream_manager::get_progress_on_all_shards() { - return get_stream_manager().map_reduce0( - [] (auto& sm) { - stream_bytes ret; - for (auto& sbytes : sm._stream_bytes) { - for (auto& sb : sbytes.second) { - ret += sb.second; - } - } - return ret; - }, - stream_bytes(), - std::plus() - ); -} - -stream_bytes stream_manager::get_progress_on_local_shard() { - stream_bytes ret; - for (auto& sbytes : _stream_bytes) { - for (auto& sb : sbytes.second) { - ret += sb.second; - } - } - return ret; -} - -bool stream_manager::has_peer(inet_address endpoint) { - for (auto sr : get_all_streams()) { - for (auto session : sr->get_coordinator()->get_all_stream_sessions()) { - if (session->peer == endpoint) { - return true; - } - } - } - return false; -} - -void stream_manager::fail_sessions(inet_address endpoint) { - for (auto sr : get_all_streams()) { - for (auto session : sr->get_coordinator()->get_all_stream_sessions()) { - if (session->peer == endpoint) { - session->close_session(stream_session_state::FAILED); - } - } - } -} - -void stream_manager::fail_all_sessions() { - for (auto sr : get_all_streams()) { - for (auto session : sr->get_coordinator()->get_all_stream_sessions()) { - session->close_session(stream_session_state::FAILED); - } - } -} - -void stream_manager::on_remove(inet_address endpoint) { - if (has_peer(endpoint)) { - sslog.info("stream_manager: Close all stream_session with peer = {} in on_remove", endpoint); - get_stream_manager().invoke_on_all([endpoint] (auto& sm) { - sm.fail_sessions(endpoint); - }).handle_exception([endpoint] (auto ep) { - sslog.warn("stream_manager: Fail to close sessions peer = {} in on_remove", endpoint); - }); - } -} - -void stream_manager::on_restart(inet_address endpoint, endpoint_state ep_state) { - if (has_peer(endpoint)) { - sslog.info("stream_manager: Close all stream_session with peer = {} in on_restart", endpoint); - get_stream_manager().invoke_on_all([endpoint] (auto& sm) { - sm.fail_sessions(endpoint); - }).handle_exception([endpoint] (auto ep) { - sslog.warn("stream_manager: Fail to close sessions peer = {} in on_restart", endpoint); - }); - } -} - -} // namespace streaming diff --git a/scylla/streaming/stream_manager.hh b/scylla/streaming/stream_manager.hh deleted file mode 100644 index e08584e..0000000 --- a/scylla/streaming/stream_manager.hh +++ /dev/null @@ -1,179 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once -#include "streaming/progress_info.hh" -#include "core/shared_ptr.hh" -#include "core/distributed.hh" -#include "utils/UUID.hh" -#include "gms/i_endpoint_state_change_subscriber.hh" -#include "gms/inet_address.hh" -#include "gms/endpoint_state.hh" -#include "gms/application_state.hh" -#include -#include -#include - -namespace streaming { - -class stream_result_future; - -struct stream_bytes { - int64_t bytes_sent = 0; - int64_t bytes_received = 0; - friend stream_bytes operator+(const stream_bytes& x, const stream_bytes& y) { - stream_bytes ret(x); - ret += y; - return ret; - } - friend bool operator!=(const stream_bytes& x, const stream_bytes& y) { - return x.bytes_sent != y.bytes_sent && x.bytes_received != y.bytes_received; - } - friend bool operator==(const stream_bytes& x, const stream_bytes& y) { - return x.bytes_sent == y.bytes_sent && x.bytes_received == y.bytes_received; - } - stream_bytes& operator+=(const stream_bytes& x) { - bytes_sent += x.bytes_sent; - bytes_received += x.bytes_received; - return *this; - } -}; - -/** - * StreamManager manages currently running {@link StreamResultFuture}s and provides status of all operation invoked. - * - * All stream operation should be created through this class to track streaming status and progress. - */ -class stream_manager : public gms::i_endpoint_state_change_subscriber, public enable_shared_from_this { - using UUID = utils::UUID; - using inet_address = gms::inet_address; - using endpoint_state = gms::endpoint_state; - using application_state = gms::application_state; - using versioned_value = gms::versioned_value; - /* - * Currently running streams. Removed after completion/failure. - * We manage them in two different maps to distinguish plan from initiated ones to - * receiving ones withing the same JVM. - */ -private: - std::unordered_map> _initiated_streams; - std::unordered_map> _receiving_streams; - std::unordered_map> _stream_bytes; - semaphore _mutation_send_limiter{256}; - seastar::metrics::metric_groups _metrics; - -public: - stream_manager(); - - semaphore& mutation_send_limiter() { return _mutation_send_limiter; } - - void register_sending(shared_ptr result); - - void register_receiving(shared_ptr result); - - shared_ptr get_sending_stream(UUID plan_id); - - shared_ptr get_receiving_stream(UUID plan_id); - - std::vector> get_all_streams() const ; - - - const std::unordered_map>& get_initiated_streams() const { - return _initiated_streams; - } - - const std::unordered_map>& get_receiving_streams() const { - return _receiving_streams; - } - - void remove_stream(UUID plan_id); - - void show_streams(); - - future<> stop() { - fail_all_sessions(); - return make_ready_future<>(); - } - - void update_progress(UUID cf_id, gms::inet_address peer, progress_info::direction dir, size_t fm_size); - future<> update_all_progress_info(); - - void remove_progress(UUID plan_id); - - stream_bytes get_progress(UUID plan_id, gms::inet_address peer); - - stream_bytes get_progress(UUID plan_id); - - future<> remove_progress_on_all_shards(UUID plan_id); - - future get_progress_on_all_shards(UUID plan_id, gms::inet_address peer); - - future get_progress_on_all_shards(UUID plan_id); - - future get_progress_on_all_shards(gms::inet_address peer); - - future get_progress_on_all_shards(); - - stream_bytes get_progress_on_local_shard(); - -public: - virtual void on_join(inet_address endpoint, endpoint_state ep_state) override {} - virtual void before_change(inet_address endpoint, endpoint_state current_state, application_state new_state_key, const versioned_value& new_value) override {} - virtual void on_change(inet_address endpoint, application_state state, const versioned_value& value) override {} - virtual void on_alive(inet_address endpoint, endpoint_state state) override {} - virtual void on_dead(inet_address endpoint, endpoint_state state) override {} - virtual void on_remove(inet_address endpoint) override; - virtual void on_restart(inet_address endpoint, endpoint_state ep_state) override; - -private: - void fail_all_sessions(); - void fail_sessions(inet_address endpoint); - bool has_peer(inet_address endpoint); -}; - -extern distributed _the_stream_manager; - -inline distributed& get_stream_manager() { - return _the_stream_manager; -} - -inline stream_manager& get_local_stream_manager() { - return _the_stream_manager.local(); -} - -} // namespace streaming diff --git a/scylla/streaming/stream_plan.cc b/scylla/streaming/stream_plan.cc deleted file mode 100644 index fc5fc7b..0000000 --- a/scylla/streaming/stream_plan.cc +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "streaming/stream_plan.hh" -#include "streaming/stream_result_future.hh" -#include "streaming/stream_state.hh" - -namespace streaming { - -extern logging::logger sslog; - -stream_plan& stream_plan::request_ranges(inet_address from, sstring keyspace, dht::token_range_vector ranges) { - return request_ranges(from, keyspace, std::move(ranges), {}); -} - -stream_plan& stream_plan::request_ranges(inet_address from, sstring keyspace, dht::token_range_vector ranges, std::vector column_families) { - _range_added = true; - auto session = _coordinator->get_or_create_session(from); - session->add_stream_request(keyspace, std::move(ranges), std::move(column_families)); - return *this; -} - -stream_plan& stream_plan::transfer_ranges(inet_address to, sstring keyspace, dht::token_range_vector ranges) { - return transfer_ranges(to, keyspace, std::move(ranges), {}); -} - -stream_plan& stream_plan::transfer_ranges(inet_address to, sstring keyspace, dht::token_range_vector ranges, std::vector column_families) { - _range_added = true; - auto session = _coordinator->get_or_create_session(to); - session->add_transfer_ranges(keyspace, std::move(ranges), std::move(column_families)); - return *this; -} - -future stream_plan::execute() { - sslog.debug("[Stream #{}] Executing stream_plan description={} range_added={}", _plan_id, _description, _range_added); - if (!_range_added) { - stream_state state(_plan_id, _description, std::vector()); - return make_ready_future(std::move(state)); - } - return stream_result_future::init_sending_side(_plan_id, _description, _handlers, _coordinator); -} - -stream_plan& stream_plan::listeners(std::vector handlers) { - std::copy(handlers.begin(), handlers.end(), std::back_inserter(_handlers)); - return *this; -} - -} diff --git a/scylla/streaming/stream_plan.hh b/scylla/streaming/stream_plan.hh deleted file mode 100644 index 4a968bf..0000000 --- a/scylla/streaming/stream_plan.hh +++ /dev/null @@ -1,147 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "utils/UUID.hh" -#include "utils/UUID_gen.hh" -#include "core/sstring.hh" -#include "gms/inet_address.hh" -#include "query-request.hh" -#include "dht/i_partitioner.hh" -#include "streaming/stream_coordinator.hh" -#include "streaming/stream_event_handler.hh" -#include "streaming/stream_detail.hh" -#include - -namespace streaming { - -class stream_state; - -/** - * {@link StreamPlan} is a helper class that builds StreamOperation of given configuration. - * - * This is the class you want to use for building streaming plan and starting streaming. - */ -class stream_plan { -private: - using inet_address = gms::inet_address; - using UUID = utils::UUID; - using token = dht::token; - UUID _plan_id; - sstring _description; - std::vector _handlers; - shared_ptr _coordinator; - bool _range_added = false; -public: - - /** - * Start building stream plan. - * - * @param description Stream type that describes this StreamPlan - */ - stream_plan(sstring description) - : _plan_id(utils::UUID_gen::get_time_UUID()) - , _description(description) - , _coordinator(make_shared()) { - } - - /** - * Request data in {@code keyspace} and {@code ranges} from specific node. - * - * @param from endpoint address to fetch data from. - * @param connecting Actual connecting address for the endpoint - * @param keyspace name of keyspace - * @param ranges ranges to fetch - * @return this object for chaining - */ - stream_plan& request_ranges(inet_address from, sstring keyspace, dht::token_range_vector ranges); - - /** - * Request data in {@code columnFamilies} under {@code keyspace} and {@code ranges} from specific node. - * - * @param from endpoint address to fetch data from. - * @param connecting Actual connecting address for the endpoint - * @param keyspace name of keyspace - * @param ranges ranges to fetch - * @param columnFamilies specific column families - * @return this object for chaining - */ - stream_plan& request_ranges(inet_address from, sstring keyspace, dht::token_range_vector ranges, std::vector column_families); - - /** - * Add transfer task to send data of specific keyspace and ranges. - * - * @param to endpoint address of receiver - * @param connecting Actual connecting address of the endpoint - * @param keyspace name of keyspace - * @param ranges ranges to send - * @return this object for chaining - */ - stream_plan& transfer_ranges(inet_address to, sstring keyspace, dht::token_range_vector ranges); - - /** - * Add transfer task to send data of specific {@code columnFamilies} under {@code keyspace} and {@code ranges}. - * - * @param to endpoint address of receiver - * @param connecting Actual connecting address of the endpoint - * @param keyspace name of keyspace - * @param ranges ranges to send - * @param columnFamilies specific column families - * @return this object for chaining - */ - stream_plan& transfer_ranges(inet_address to, sstring keyspace, dht::token_range_vector ranges, std::vector column_families); - - stream_plan& listeners(std::vector handlers); -public: - /** - * @return true if this plan has no plan to execute - */ - bool is_empty() { - return !_coordinator->has_active_sessions(); - } - - /** - * Execute this {@link StreamPlan} asynchronously. - * - * @return Future {@link StreamState} that you can use to listen on progress of streaming. - */ - future execute(); -}; - -} // namespace streaming diff --git a/scylla/streaming/stream_receive_task.cc b/scylla/streaming/stream_receive_task.cc deleted file mode 100644 index 772191f..0000000 --- a/scylla/streaming/stream_receive_task.cc +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "streaming/stream_session.hh" -#include "streaming/stream_receive_task.hh" - -namespace streaming { - -stream_receive_task::stream_receive_task(shared_ptr _session, UUID _cf_id, int _total_files, long _total_size) - : stream_task(_session, _cf_id) - , total_files(_total_files) - , total_size(_total_size) { -} - -stream_receive_task::~stream_receive_task() { -} - -} // namespace streaming diff --git a/scylla/streaming/stream_receive_task.hh b/scylla/streaming/stream_receive_task.hh deleted file mode 100644 index fe1766e..0000000 --- a/scylla/streaming/stream_receive_task.hh +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "utils/UUID.hh" -#include "streaming/stream_task.hh" -#include - -namespace streaming { - -class stream_session; - -/** - * Task that manages receiving files for the session for certain ColumnFamily. - */ -class stream_receive_task : public stream_task { -private: - // number of files to receive - int total_files; - // total size of files to receive - long total_size; - // true if task is done (either completed or aborted) - bool done = false; -public: - stream_receive_task(shared_ptr _session, UUID _cf_id, int _total_files, long _total_size); - ~stream_receive_task(); - - virtual int get_total_number_of_files() override { - return total_files; - } - - virtual long get_total_size() override { - return total_size; - } - - - /** - * Abort this task. - * If the task already received all files and - * {@link org.apache.cassandra.streaming.StreamReceiveTask.OnCompletionRunnable} task is submitted, - * then task cannot be aborted. - */ - virtual void abort() override { - } -}; - -} // namespace streaming diff --git a/scylla/streaming/stream_request.cc b/scylla/streaming/stream_request.cc deleted file mode 100644 index d76fd13..0000000 --- a/scylla/streaming/stream_request.cc +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "streaming/stream_request.hh" -#include "query-request.hh" - -namespace streaming { - -std::ostream& operator<<(std::ostream& os, const stream_request& sr) { - os << "[ ks = " << sr.keyspace << " cf = "; - for (auto& cf : sr.column_families) { - os << cf << " "; - } - return os << "]"; -} - -} // namespace streaming; diff --git a/scylla/streaming/stream_request.hh b/scylla/streaming/stream_request.hh deleted file mode 100644 index 6a5f50f..0000000 --- a/scylla/streaming/stream_request.hh +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "core/sstring.hh" -#include "range.hh" -#include "dht/i_partitioner.hh" -#include "partition_range_compat.hh" -#include - -namespace streaming { - -class stream_request { -public: - using token = dht::token; - sstring keyspace; - dht::token_range_vector ranges; - // For compatibility with <= 1.5, we send wrapping ranges (though they will never wrap). - std::vector> ranges_compat() const { - return compat::wrap(ranges); - } - std::vector column_families; - stream_request() = default; - stream_request(sstring _keyspace, dht::token_range_vector _ranges, std::vector _column_families) - : keyspace(std::move(_keyspace)) - , ranges(std::move(_ranges)) - , column_families(std::move(_column_families)) { - } - stream_request(sstring _keyspace, std::vector> _ranges, std::vector _column_families) - : stream_request(std::move(_keyspace), compat::unwrap(std::move(_ranges)), std::move(_column_families)) { - } - friend std::ostream& operator<<(std::ostream& os, const stream_request& r); -}; - -} // namespace streaming diff --git a/scylla/streaming/stream_result_future.cc b/scylla/streaming/stream_result_future.cc deleted file mode 100644 index 0ce4d1b..0000000 --- a/scylla/streaming/stream_result_future.cc +++ /dev/null @@ -1,156 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "streaming/stream_result_future.hh" -#include "streaming/stream_manager.hh" -#include "streaming/stream_exception.hh" -#include "log.hh" -#include - -namespace streaming { - -extern logging::logger sslog; - -future stream_result_future::init_sending_side(UUID plan_id_, sstring description_, - std::vector listeners_, shared_ptr coordinator_) { - auto sr = make_shared(plan_id_, description_, coordinator_); - get_local_stream_manager().register_sending(sr); - - for (auto& listener : listeners_) { - sr->add_event_listener(listener); - } - - sslog.info("[Stream #{}] Executing streaming plan for {}", plan_id_, description_); - - // Initialize and start all sessions - for (auto& session : coordinator_->get_all_stream_sessions()) { - session->init(sr); - } - coordinator_->connect_all_stream_sessions(); - - return sr->_done.get_future(); -} - -shared_ptr stream_result_future::init_receiving_side(UUID plan_id, sstring description, inet_address from) { - auto& sm = get_local_stream_manager(); - auto sr = sm.get_receiving_stream(plan_id); - if (sr) { - auto err = sprint("[Stream #%s] GOT PREPARE_MESSAGE from %s, description=%s," - "stream_plan exists, duplicated message received?", plan_id, description, from); - sslog.warn(err.c_str()); - throw std::runtime_error(err); - } - sslog.info("[Stream #{}] Creating new streaming plan for {}, with {}", plan_id, description, from); - bool is_receiving = true; - sr = make_shared(plan_id, description, is_receiving); - sm.register_receiving(sr); - return sr; -} - -void stream_result_future::handle_session_prepared(shared_ptr session) { - auto si = session->make_session_info(); - sslog.info("[Stream #{}] Prepare completed with {}. Receiving {}, sending {}", - session->plan_id(), - session->peer, - si.get_total_files_to_receive(), - si.get_total_files_to_send()); - auto event = session_prepared_event(plan_id, si); - session->get_session_info() = si; - fire_stream_event(std::move(event)); -} - -void stream_result_future::handle_session_complete(shared_ptr session) { - sslog.info("[Stream #{}] Session with {} is complete, state={}", session->plan_id(), session->peer, session->get_state()); - auto event = session_complete_event(session); - fire_stream_event(std::move(event)); - auto si = session->make_session_info(); - session->get_session_info() = si; - maybe_complete(); -} - -template -void stream_result_future::fire_stream_event(Event event) { - // delegate to listener - for (auto listener : _event_listeners) { - listener->handle_stream_event(std::move(event)); - } -} - -void stream_result_future::maybe_complete() { - auto has_active_sessions = _coordinator->has_active_sessions(); - auto plan_id = this->plan_id; - sslog.debug("[Stream #{}] stream_result_future: has_active_sessions={}", plan_id, has_active_sessions); - if (!has_active_sessions) { - auto& sm = get_local_stream_manager(); - if (sslog.is_enabled(logging::log_level::debug)) { - sm.show_streams(); - } - auto duration = std::chrono::duration_cast>(lowres_clock::now() - _start_time).count(); - sm.get_progress_on_all_shards(plan_id).then([plan_id, duration] (auto sbytes) { - auto tx_bw = sstring("+inf"); - auto rx_bw = sstring("+inf"); - if (std::fabs(duration) > FLT_EPSILON) { - tx_bw = sprint("%.3f", sbytes.bytes_sent / duration / (1024 * 1024)); - rx_bw = sprint("%.3f", sbytes.bytes_received / duration / (1024 * 1024)); - } - sslog.info("[Stream #{}] bytes_sent = {}, bytes_received = {}, tx_bandwidth = {} MiB/s, rx_bandwidth = {} MiB/s", - plan_id, sbytes.bytes_sent, sbytes.bytes_received, tx_bw, rx_bw); - }).handle_exception([plan_id] (auto ep) { - sslog.warn("[Stream #{}] Fail to get progess on all shards: {}", plan_id, ep); - }).finally([this, plan_id, &sm] { - sm.remove_stream(plan_id); - auto final_state = get_current_state(); - if (final_state.has_failed_session()) { - sslog.warn("[Stream #{}] Stream failed for streaming plan {}, peers={}", plan_id, description, _coordinator->get_peers()); - _done.set_exception(stream_exception(final_state, "Stream failed")); - } else { - sslog.info("[Stream #{}] All sessions completed for streaming plan {}, peers={}", plan_id, description, _coordinator->get_peers()); - _done.set_value(final_state); - } - }); - } -} - -stream_state stream_result_future::get_current_state() { - return stream_state(plan_id, description, _coordinator->get_all_session_info()); -} - -void stream_result_future::handle_progress(progress_info progress) { - fire_stream_event(progress_event(plan_id, std::move(progress))); -} - -} // namespace streaming diff --git a/scylla/streaming/stream_result_future.hh b/scylla/streaming/stream_result_future.hh deleted file mode 100644 index 1dad8fa..0000000 --- a/scylla/streaming/stream_result_future.hh +++ /dev/null @@ -1,134 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "core/sstring.hh" -#include "core/shared_ptr.hh" -#include "utils/UUID.hh" -#include "gms/inet_address.hh" -#include "streaming/stream_coordinator.hh" -#include "streaming/stream_event_handler.hh" -#include "streaming/stream_state.hh" -#include "streaming/progress_info.hh" -#include - -namespace streaming { - using UUID = utils::UUID; - using inet_address = gms::inet_address; -/** - * A future on the result ({@link StreamState}) of a streaming plan. - * - * In practice, this object also groups all the {@link StreamSession} for the streaming job - * involved. One StreamSession will be created for every peer involved and said session will - * handle every streaming (outgoing and incoming) to that peer for this job. - *

- * The future will return a result once every session is completed (successfully or not). If - * any session ended up with an error, the future will throw a StreamException. - *

- * You can attach {@link StreamEventHandler} to this object to listen on {@link StreamEvent}s to - * track progress of the streaming. - */ -class stream_result_future { -public: - using UUID = utils::UUID; - UUID plan_id; - sstring description; -private: - shared_ptr _coordinator; - std::vector _event_listeners; - promise _done; - lowres_clock::time_point _start_time; -public: - stream_result_future(UUID plan_id_, sstring description_, bool is_receiving) - : stream_result_future(plan_id_, description_, make_shared(is_receiving)) { - // Note: Origin sets connections_per_host = 0 on receiving side, We set 1 to - // refelct the fact that we actaully create one conncetion to the initiator. - } - - /** - * Create new StreamResult of given {@code planId} and type. - * - * Constructor is package private. You need to use {@link StreamPlan#execute()} to get the instance. - * - * @param planId Stream plan ID - * @param description Stream description - */ - stream_result_future(UUID plan_id_, sstring description_, shared_ptr coordinator_) - : plan_id(std::move(plan_id_)) - , description(std::move(description_)) - , _coordinator(coordinator_) - , _start_time(lowres_clock::now()) { - // if there is no session to listen to, we immediately set result for returning - if (!_coordinator->is_receiving() && !_coordinator->has_active_sessions()) { - _done.set_value(get_current_state()); - } - } - -public: - shared_ptr get_coordinator() { return _coordinator; }; - -public: - static future init_sending_side(UUID plan_id_, sstring description_, std::vector listeners_, shared_ptr coordinator_); - static shared_ptr init_receiving_side(UUID plan_id, sstring description, inet_address from); - -public: - void add_event_listener(stream_event_handler* listener) { - // FIXME: Futures.addCallback(this, listener); - _event_listeners.push_back(listener); - } - - /** - * @return Current snapshot of streaming progress. - */ - stream_state get_current_state(); - - void handle_session_prepared(shared_ptr session); - - - void handle_session_complete(shared_ptr session); - - void handle_progress(progress_info progress); - - template - void fire_stream_event(Event event); - -private: - void maybe_complete(); -}; - -} // namespace streaming diff --git a/scylla/streaming/stream_session.cc b/scylla/streaming/stream_session.cc deleted file mode 100644 index 20d1913..0000000 --- a/scylla/streaming/stream_session.cc +++ /dev/null @@ -1,554 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "log.hh" -#include "message/messaging_service.hh" -#include "streaming/stream_session.hh" -#include "streaming/prepare_message.hh" -#include "streaming/stream_result_future.hh" -#include "streaming/stream_manager.hh" -#include "mutation_reader.hh" -#include "dht/i_partitioner.hh" -#include "database.hh" -#include "utils/fb_utilities.hh" -#include "streaming/stream_plan.hh" -#include "core/sleep.hh" -#include "service/storage_service.hh" -#include "core/thread.hh" -#include "cql3/query_processor.hh" -#include "streaming/stream_state.hh" -#include "streaming/stream_session_state.hh" -#include "streaming/stream_exception.hh" -#include "service/storage_proxy.hh" -#include "service/priority_manager.hh" -#include "query-request.hh" -#include "schema_registry.hh" - -namespace streaming { - -logging::logger sslog("stream_session"); - -static auto get_stream_result_future(utils::UUID plan_id) { - auto& sm = get_local_stream_manager(); - auto f = sm.get_sending_stream(plan_id); - if (!f) { - f = sm.get_receiving_stream(plan_id); - } - return f; -} - -static auto get_session(utils::UUID plan_id, gms::inet_address from, const char* verb, std::experimental::optional cf_id = {}) { - if (cf_id) { - sslog.debug("[Stream #{}] GOT {} from {}: cf_id={}", plan_id, verb, from, *cf_id); - } else { - sslog.debug("[Stream #{}] GOT {} from {}", plan_id, verb, from); - } - auto sr = get_stream_result_future(plan_id); - if (!sr) { - auto err = sprint("[Stream #%s] GOT %s from %s: Can not find stream_manager", plan_id, verb, from); - sslog.warn(err.c_str()); - throw std::runtime_error(err); - } - auto coordinator = sr->get_coordinator(); - if (!coordinator) { - auto err = sprint("[Stream #%s] GOT %s from %s: Can not find coordinator", plan_id, verb, from); - sslog.warn(err.c_str()); - throw std::runtime_error(err); - } - return coordinator->get_or_create_session(from); -} - -void stream_session::init_messaging_service_handler() { - ms().register_prepare_message([] (const rpc::client_info& cinfo, prepare_message msg, UUID plan_id, sstring description) { - const auto& src_cpu_id = cinfo.retrieve_auxiliary("src_cpu_id"); - const auto& from = cinfo.retrieve_auxiliary("baddr"); - auto dst_cpu_id = engine().cpu_id(); - return smp::submit_to(dst_cpu_id, [msg = std::move(msg), plan_id, description = std::move(description), from, src_cpu_id, dst_cpu_id] () mutable { - auto sr = stream_result_future::init_receiving_side(plan_id, description, from); - auto session = get_session(plan_id, from, "PREPARE_MESSAGE"); - session->init(sr); - session->dst_cpu_id = src_cpu_id; - return session->prepare(std::move(msg.requests), std::move(msg.summaries)); - }); - }); - ms().register_prepare_done_message([] (const rpc::client_info& cinfo, UUID plan_id, unsigned dst_cpu_id) { - const auto& from = cinfo.retrieve_auxiliary("baddr"); - return smp::submit_to(dst_cpu_id, [plan_id, from] () mutable { - auto session = get_session(plan_id, from, "PREPARE_DONE_MESSAGE"); - session->follower_start_sent(); - return make_ready_future<>(); - }); - }); - ms().register_stream_mutation([] (const rpc::client_info& cinfo, UUID plan_id, frozen_mutation fm, unsigned dst_cpu_id, rpc::optional fragmented_opt) { - auto from = netw::messaging_service::get_source(cinfo); - auto fragmented = fragmented_opt && *fragmented_opt; - return do_with(std::move(fm), [plan_id, from, fragmented] (const auto& fm) { - auto fm_size = fm.representation().size(); - get_local_stream_manager().update_progress(plan_id, from.addr, progress_info::direction::IN, fm_size); - return service::get_schema_for_write(fm.schema_version(), from).then([plan_id, from, &fm, fragmented] (schema_ptr s) { - auto cf_id = fm.column_family_id(); - sslog.debug("[Stream #{}] GOT STREAM_MUTATION from {}: cf_id={}", plan_id, from.addr, cf_id); - - auto& db = service::get_local_storage_proxy().get_db().local(); - if (!db.column_family_exists(cf_id)) { - sslog.warn("[Stream #{}] STREAM_MUTATION from {}: cf_id={} is missing, assume the table is dropped", - plan_id, from.addr, cf_id); - return make_ready_future<>(); - } - return service::get_storage_proxy().local().mutate_streaming_mutation(std::move(s), plan_id, fm, fragmented).then_wrapped([plan_id, cf_id, from] (auto&& f) { - try { - f.get(); - return make_ready_future<>(); - } catch (no_such_column_family) { - sslog.warn("[Stream #{}] STREAM_MUTATION from {}: cf_id={} is missing, assume the table is dropped", - plan_id, from.addr, cf_id); - return make_ready_future<>(); - } catch (...) { - throw; - } - return make_ready_future<>(); - }); - }); - }); - }); - ms().register_stream_mutation_done([] (const rpc::client_info& cinfo, UUID plan_id, dht::token_range_vector ranges, UUID cf_id, unsigned dst_cpu_id) { - const auto& from = cinfo.retrieve_auxiliary("baddr"); - return smp::submit_to(dst_cpu_id, [ranges = std::move(ranges), plan_id, cf_id, from] () mutable { - auto session = get_session(plan_id, from, "STREAM_MUTATION_DONE", cf_id); - return session->get_db().invoke_on_all([ranges = std::move(ranges), plan_id, from, cf_id] (database& db) { - if (!db.column_family_exists(cf_id)) { - sslog.warn("[Stream #{}] STREAM_MUTATION_DONE from {}: cf_id={} is missing, assume the table is dropped", - plan_id, from, cf_id); - return make_ready_future<>(); - } - dht::partition_range_vector query_ranges; - try { - auto& cf = db.find_column_family(cf_id); - query_ranges.reserve(ranges.size()); - for (auto& range : ranges) { - query_ranges.push_back(dht::to_partition_range(range)); - } - return cf.flush_streaming_mutations(plan_id, std::move(query_ranges)); - } catch (no_such_column_family) { - sslog.warn("[Stream #{}] STREAM_MUTATION_DONE from {}: cf_id={} is missing, assume the table is dropped", - plan_id, from, cf_id); - return make_ready_future<>(); - } catch (...) { - throw; - } - }).then([session, cf_id] { - session->receive_task_completed(cf_id); - }); - }); - }); - ms().register_complete_message([] (const rpc::client_info& cinfo, UUID plan_id, unsigned dst_cpu_id) { - const auto& from = cinfo.retrieve_auxiliary("baddr"); - // Be compatible with old version. Do nothing but return a ready future. - sslog.debug("[Stream #{}] COMPLETE_MESSAGE from {} dst_cpu_id={}", plan_id, from, dst_cpu_id); - return make_ready_future<>(); - }); -} - -distributed* stream_session::_db; - -stream_session::stream_session() = default; - -stream_session::stream_session(inet_address peer_) - : peer(peer_) { - //this.metrics = StreamingMetrics.get(connecting); -} - -stream_session::~stream_session() = default; - -future<> stream_session::init_streaming_service(distributed& db) { - _db = &db; - // #293 - do not stop anything - // engine().at_exit([] { - // return get_stream_manager().stop(); - // }); - return get_stream_manager().start().then([] { - gms::get_local_gossiper().register_(get_local_stream_manager().shared_from_this()); - return _db->invoke_on_all([] (auto& db) { - init_messaging_service_handler(); - }); - }); -} - -future<> stream_session::on_initialization_complete() { - // send prepare message - set_state(stream_session_state::PREPARING); - auto prepare = prepare_message(); - std::copy(_requests.begin(), _requests.end(), std::back_inserter(prepare.requests)); - for (auto& x : _transfers) { - prepare.summaries.emplace_back(x.second.get_summary()); - } - auto id = msg_addr{this->peer, 0}; - sslog.debug("[Stream #{}] SEND PREPARE_MESSAGE to {}", plan_id(), id); - return ms().send_prepare_message(id, std::move(prepare), plan_id(), description()).then_wrapped([this, id] (auto&& f) { - try { - auto msg = f.get0(); - sslog.debug("[Stream #{}] GOT PREPARE_MESSAGE Reply from {}", this->plan_id(), this->peer); - this->dst_cpu_id = msg.dst_cpu_id; - for (auto& summary : msg.summaries) { - this->prepare_receiving(summary); - } - _stream_result->handle_session_prepared(this->shared_from_this()); - } catch (...) { - sslog.warn("[Stream #{}] Fail to send PREPARE_MESSAGE to {}, {}", this->plan_id(), id, std::current_exception()); - throw; - } - return make_ready_future<>(); - }).then([this, id] { - auto plan_id = this->plan_id(); - sslog.debug("[Stream #{}] SEND PREPARE_DONE_MESSAGE to {}", plan_id, id); - return ms().send_prepare_done_message(id, plan_id, this->dst_cpu_id).then([this] { - sslog.debug("[Stream #{}] GOT PREPARE_DONE_MESSAGE Reply from {}", this->plan_id(), this->peer); - }).handle_exception([id, plan_id] (auto ep) { - sslog.warn("[Stream #{}] Fail to send PREPARE_DONE_MESSAGE to {}, {}", plan_id, id, ep); - std::rethrow_exception(ep); - }); - }).then([this] { - sslog.debug("[Stream #{}] Initiator starts to sent", this->plan_id()); - this->start_streaming_files(); - }); -} - -void stream_session::on_error() { - sslog.warn("[Stream #{}] Streaming error occurred", plan_id()); - // fail session - close_session(stream_session_state::FAILED); -} - -// Only follower calls this function upon receiving of prepare_message from initiator -future stream_session::prepare(std::vector requests, std::vector summaries) { - auto plan_id = this->plan_id(); - sslog.debug("[Stream #{}] prepare requests nr={}, summaries nr={}", plan_id, requests.size(), summaries.size()); - // prepare tasks - set_state(stream_session_state::PREPARING); - auto& db = get_local_db(); - for (auto& request : requests) { - // always flush on stream request - sslog.debug("[Stream #{}] prepare stream_request={}", plan_id, request); - auto ks = request.keyspace; - // Make sure cf requested by peer node exists - for (auto& cf : request.column_families) { - try { - db.find_column_family(ks, cf); - } catch (no_such_column_family) { - auto err = sprint("[Stream #{}] prepare requested ks={} cf={} does not exist", ks, cf); - sslog.warn(err.c_str()); - throw std::runtime_error(err); - } - } - add_transfer_ranges(request.keyspace, request.ranges, request.column_families); - } - for (auto& summary : summaries) { - sslog.debug("[Stream #{}] prepare stream_summary={}", plan_id, summary); - auto cf_id = summary.cf_id; - // Make sure cf the peer node will send to us exists - try { - db.find_column_family(cf_id); - } catch (no_such_column_family) { - auto err = sprint("[Stream #{}] prepare cf_id=%s does not exist", plan_id, cf_id); - sslog.warn(err.c_str()); - throw std::runtime_error(err); - } - prepare_receiving(summary); - } - - // Always send a prepare_message back to follower - prepare_message prepare; - if (!requests.empty()) { - for (auto& x: _transfers) { - auto& task = x.second; - prepare.summaries.emplace_back(task.get_summary()); - } - } - prepare.dst_cpu_id = engine().cpu_id();; - _stream_result->handle_session_prepared(shared_from_this()); - return make_ready_future(std::move(prepare)); -} - -void stream_session::follower_start_sent() { - sslog.debug("[Stream #{}] Follower start to sent", this->plan_id()); - this->start_streaming_files(); -} - -void stream_session::session_failed() { - close_session(stream_session_state::FAILED); -} - -session_info stream_session::make_session_info() { - std::vector receiving_summaries; - for (auto& receiver : _receivers) { - receiving_summaries.emplace_back(receiver.second.get_summary()); - } - std::vector transfer_summaries; - for (auto& transfer : _transfers) { - transfer_summaries.emplace_back(transfer.second.get_summary()); - } - return session_info(peer, std::move(receiving_summaries), std::move(transfer_summaries), _state); -} - -void stream_session::receive_task_completed(UUID cf_id) { - _receivers.erase(cf_id); - sslog.debug("[Stream #{}] receive task_completed: cf_id={} done, stream_receive_task.size={} stream_transfer_task.size={}", - plan_id(), cf_id, _receivers.size(), _transfers.size()); - maybe_completed(); -} - -void stream_session::transfer_task_completed(UUID cf_id) { - _transfers.erase(cf_id); - sslog.debug("[Stream #{}] transfer task_completed: cf_id={} done, stream_receive_task.size={} stream_transfer_task.size={}", - plan_id(), cf_id, _receivers.size(), _transfers.size()); - maybe_completed(); -} - -void stream_session::send_complete_message() { - if (!_complete_sent) { - _complete_sent = true; - } else { - return; - } - auto id = msg_addr{this->peer, this->dst_cpu_id}; - auto plan_id = this->plan_id(); - sslog.debug("[Stream #{}] SEND COMPLETE_MESSAGE to {}", plan_id, id); - auto session = shared_from_this(); - this->ms().send_complete_message(id, plan_id, this->dst_cpu_id).then([session, id, plan_id] { - sslog.debug("[Stream #{}] GOT COMPLETE_MESSAGE Reply from {}", plan_id, id.addr); - }).handle_exception([session, id, plan_id] (auto ep) { - sslog.warn("[Stream #{}] COMPLETE_MESSAGE for {} has failed: {}", plan_id, id.addr, ep); - session->on_error(); - }); -} - -bool stream_session::maybe_completed() { - bool completed = _receivers.empty() && _transfers.empty(); - if (completed) { - send_complete_message(); - sslog.debug("[Stream #{}] maybe_completed: {} -> COMPLETE: session={}, peer={}", plan_id(), _state, this, peer); - close_session(stream_session_state::COMPLETE); - } - return completed; -} - -void stream_session::prepare_receiving(stream_summary& summary) { - if (summary.files > 0) { - // FIXME: handle when cf_id already exists - _receivers.emplace(summary.cf_id, stream_receive_task(shared_from_this(), summary.cf_id, summary.files, summary.total_size)); - } -} - -void stream_session::start_streaming_files() { - sslog.debug("[Stream #{}] {}: {} transfers to send", plan_id(), __func__, _transfers.size()); - if (!_transfers.empty()) { - set_state(stream_session_state::STREAMING); - } - for (auto it = _transfers.begin(); it != _transfers.end();) { - stream_transfer_task& task = it->second; - it++; - task.start(); - } -} - -std::vector stream_session::get_column_family_stores(const sstring& keyspace, const std::vector& column_families) { - // if columnfamilies are not specified, we add all cf under the keyspace - std::vector stores; - auto& db = get_local_db(); - if (column_families.empty()) { - for (auto& x : db.get_column_families()) { - column_family& cf = *(x.second); - auto cf_name = cf.schema()->cf_name(); - auto ks_name = cf.schema()->ks_name(); - if (ks_name == keyspace) { - sslog.debug("Find ks={} cf={}", ks_name, cf_name); - stores.push_back(&cf); - } - } - } else { - // TODO: We can move this to database class and use shared_ptr instead - for (auto& cf_name : column_families) { - try { - auto& x = db.find_column_family(keyspace, cf_name); - stores.push_back(&x); - } catch (no_such_column_family) { - sslog.warn("stream_session: {}.{} does not exist: {}\n", keyspace, cf_name, std::current_exception()); - continue; - } - } - } - return stores; -} - -void stream_session::add_transfer_ranges(sstring keyspace, dht::token_range_vector ranges, std::vector column_families) { - auto cfs = get_column_family_stores(keyspace, column_families); - for (auto& cf : cfs) { - auto cf_id = cf->schema()->id(); - auto it = _transfers.find(cf_id); - if (it == _transfers.end()) { - stream_transfer_task task(shared_from_this(), cf_id, ranges); - auto inserted = _transfers.emplace(cf_id, std::move(task)).second; - assert(inserted); - } else { - it->second.append_ranges(ranges); - } - } -} - -future<> stream_session::receiving_failed(UUID cf_id) -{ - return get_db().invoke_on_all([cf_id, plan_id = plan_id()] (database& db) { - try { - auto& cf = db.find_column_family(cf_id); - return cf.fail_streaming_mutations(plan_id); - } catch (no_such_column_family) { - return make_ready_future<>(); - } - }); -} - -void stream_session::close_session(stream_session_state final_state) { - sslog.debug("[Stream #{}] close_session session={}, state={}, is_aborted={}", plan_id(), this, final_state, _is_aborted); - if (!_is_aborted) { - _is_aborted = true; - set_state(final_state); - - if (final_state == stream_session_state::FAILED) { - for (auto& x : _transfers) { - stream_transfer_task& task = x.second; - sslog.debug("[Stream #{}] close_session session={}, state={}, abort stream_transfer_task cf_id={}", plan_id(), this, final_state, task.cf_id); - task.abort(); - } - for (auto& x : _receivers) { - stream_receive_task& task = x.second; - sslog.debug("[Stream #{}] close_session session={}, state={}, abort stream_receive_task cf_id={}", plan_id(), this, final_state, task.cf_id); - receiving_failed(x.first); - task.abort(); - } - } - - // Note that we shouldn't block on this close because this method is called on the handler - // incoming thread (so we would deadlock). - //handler.close(); - _stream_result->handle_session_complete(shared_from_this()); - - sslog.debug("[Stream #{}] close_session session={}, state={}, cancel keep_alive timer", plan_id(), this, final_state); - _keep_alive.cancel(); - } -} - -void stream_session::start() { - if (_requests.empty() && _transfers.empty()) { - sslog.info("[Stream #{}] Session does not have any tasks.", plan_id()); - close_session(stream_session_state::COMPLETE); - return; - } - auto connecting = netw::get_local_messaging_service().get_preferred_ip(peer); - if (peer == connecting) { - sslog.info("[Stream #{}] Starting streaming to {}", plan_id(), peer); - } else { - sslog.info("[Stream #{}] Starting streaming to {} through {}", plan_id(), peer, connecting); - } - on_initialization_complete().handle_exception([this] (auto ep) { - this->on_error(); - }); -} - -void stream_session::init(shared_ptr stream_result_) { - _stream_result = stream_result_; - _keep_alive.set_callback([this] { - auto plan_id = this->plan_id(); - auto peer = this->peer; - get_local_stream_manager().get_progress_on_all_shards(plan_id, peer).then([this, peer, plan_id] (stream_bytes sbytes) { - if (this->_is_aborted) { - sslog.info("[Stream #{}] The session {} is closed, keep alive timer will do nothing", plan_id, this); - return; - } - auto now = lowres_clock::now(); - sslog.debug("[Stream #{}] keep alive timer callback sbytes old: tx={}, rx={} new: tx={} rx={}", - plan_id, this->_last_stream_bytes.bytes_sent, this->_last_stream_bytes.bytes_received, - sbytes.bytes_sent, sbytes.bytes_received); - if (sbytes.bytes_sent > this->_last_stream_bytes.bytes_sent || - sbytes.bytes_received > this->_last_stream_bytes.bytes_received) { - sslog.debug("[Stream #{}] The session {} made progress with peer {}", plan_id, this, peer); - // Progress has been made - this->_last_stream_bytes = sbytes; - this->_last_stream_progress = now; - this->start_keep_alive_timer(); - } else if (now - this->_last_stream_progress >= this->_keep_alive_timeout) { - // Timeout - sslog.info("[Stream #{}] The session {} is idle for {} seconds, the peer {} is probably gone, close it", - plan_id, this, this->_keep_alive_timeout.count(), peer); - this->on_error(); - } else { - // Start the timer to check again - sslog.info("[Stream #{}] The session {} made no progress with peer {}", plan_id, this, peer); - this->start_keep_alive_timer(); - } - }).handle_exception([plan_id, peer, session = this->shared_from_this()] (auto ep) { - sslog.info("[Stream #{}] keep alive timer callback fails with peer {}: {}", plan_id, peer, ep); - }); - }); - _last_stream_progress = lowres_clock::now(); - start_keep_alive_timer(); -} - -utils::UUID stream_session::plan_id() { - return _stream_result ? _stream_result->plan_id : UUID(); -} - -sstring stream_session::description() { - return _stream_result ? _stream_result->description : ""; -} - -future<> stream_session::update_progress() { - return get_local_stream_manager().get_progress_on_all_shards(plan_id(), peer).then([this] (auto sbytes) { - auto bytes_sent = sbytes.bytes_sent; - if (bytes_sent > 0) { - auto tx = progress_info(this->peer, "txnofile", progress_info::direction::OUT, bytes_sent, bytes_sent); - _session_info.update_progress(std::move(tx)); - } - auto bytes_received = sbytes.bytes_received; - if (bytes_received > 0) { - auto rx = progress_info(this->peer, "rxnofile", progress_info::direction::IN, bytes_received, bytes_received); - _session_info.update_progress(std::move(rx)); - } - }); -} - -} // namespace streaming diff --git a/scylla/streaming/stream_session.hh b/scylla/streaming/stream_session.hh deleted file mode 100644 index 45d0901..0000000 --- a/scylla/streaming/stream_session.hh +++ /dev/null @@ -1,344 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "gms/i_endpoint_state_change_subscriber.hh" -#include "core/distributed.hh" -#include "cql3/query_processor.hh" -#include "message/messaging_service_fwd.hh" -#include "utils/UUID.hh" -#include "streaming/stream_session_state.hh" -#include "streaming/stream_transfer_task.hh" -#include "streaming/stream_receive_task.hh" -#include "streaming/stream_request.hh" -#include "streaming/prepare_message.hh" -#include "streaming/stream_detail.hh" -#include "streaming/stream_manager.hh" -#include "streaming/session_info.hh" -#include "sstables/sstables.hh" -#include "query-request.hh" -#include "dht/i_partitioner.hh" -#include -#include -#include - -namespace streaming { - -class stream_result_future; - -/** - * Handles the streaming a one or more section of one of more sstables to and from a specific - * remote node. - * - * Both this node and the remote one will create a similar symmetrical StreamSession. A streaming - * session has the following life-cycle: - * - * 1. Connections Initialization - * - * (a) A node (the initiator in the following) create a new StreamSession, initialize it (init()) - * and then start it (start()). Start will create a {@link ConnectionHandler} that will create - * two connections to the remote node (the follower in the following) with whom to stream and send - * a StreamInit message. The first connection will be the incoming connection for the - * initiator, and the second connection will be the outgoing. - * (b) Upon reception of that StreamInit message, the follower creates its own StreamSession, - * initialize it if it still does not exist, and attach connecting socket to its ConnectionHandler - * according to StreamInit message's isForOutgoing flag. - * (d) When the both incoming and outgoing connections are established, StreamSession calls - * StreamSession#onInitializationComplete method to start the streaming prepare phase - * (StreamResultFuture.startStreaming()). - * - * 2. Streaming preparation phase - * - * (a) This phase is started when the initiator onInitializationComplete() method is called. This method sends a - * PrepareMessage that includes what files/sections this node will stream to the follower - * (stored in a StreamTransferTask, each column family has it's own transfer task) and what - * the follower needs to stream back (StreamReceiveTask, same as above). If the initiator has - * nothing to receive from the follower, it goes directly to its Streaming phase. Otherwise, - * it waits for the follower PrepareMessage. - * (b) Upon reception of the PrepareMessage, the follower records which files/sections it will receive - * and send back its own PrepareMessage with a summary of the files/sections that will be sent to - * the initiator (prepare()). After having sent that message, the follower goes to its Streamning - * phase. - * (c) When the initiator receives the follower PrepareMessage, it records which files/sections it will - * receive and then goes to his own Streaming phase. - * - * 3. Streaming phase - * - * (a) The streaming phase is started by each node (the sender in the follower, but note that each side - * of the StreamSession may be sender for some of the files) involved by calling startStreamingFiles(). - * This will sequentially send a FileMessage for each file of each SteamTransferTask. Each FileMessage - * consists of a FileMessageHeader that indicates which file is coming and then start streaming the - * content for that file (StreamWriter in FileMessage.serialize()). When a file is fully sent, the - * fileSent() method is called for that file. If all the files for a StreamTransferTask are sent - * (StreamTransferTask.complete()), the task is marked complete (taskCompleted()). - * (b) On the receiving side, a SSTable will be written for the incoming file (StreamReader in - * FileMessage.deserialize()) and once the FileMessage is fully received, the file will be marked as - * complete (received()). When all files for the StreamReceiveTask have been received, the sstables - * are added to the CFS (and 2ndary index are built, StreamReceiveTask.complete()) and the task - * is marked complete (taskCompleted()) - * (b) If during the streaming of a particular file an I/O error occurs on the receiving end of a stream - * (FileMessage.deserialize), the node will retry the file (up to DatabaseDescriptor.getMaxStreamingRetries()) - * by sending a RetryMessage to the sender. On receiving a RetryMessage, the sender simply issue a new - * FileMessage for that file. - * (c) When all transfer and receive tasks for a session are complete, the move to the Completion phase - * (maybeCompleted()). - * - * 4. Completion phase - * - * (a) When a node has finished all transfer and receive task, it enter the completion phase (maybeCompleted()). - * If it had already received a CompleteMessage from the other side (it is in the WAIT_COMPLETE state), that - * session is done is is closed (closeSession()). Otherwise, the node switch to the WAIT_COMPLETE state and - * send a CompleteMessage to the other side. - */ -class stream_session : public enable_shared_from_this { -private: - using messaging_verb = netw::messaging_verb; - using messaging_service = netw::messaging_service; - using msg_addr = netw::msg_addr; - using inet_address = gms::inet_address; - using UUID = utils::UUID; - using token = dht::token; - using ring_position = dht::ring_position; - static void init_messaging_service_handler(); - static distributed* _db; -public: - static netw::messaging_service& ms() { - return netw::get_local_messaging_service(); - } - static database& get_local_db() { return _db->local(); } - static distributed& get_db() { return *_db; }; - static future<> init_streaming_service(distributed& db); -public: - /** - * Streaming endpoint. - * - * Each {@code StreamSession} is identified by this InetAddress which is broadcast address of the node streaming. - */ - inet_address peer; - unsigned dst_cpu_id; -private: - // should not be null when session is started - shared_ptr _stream_result; - - // stream requests to send to the peer - std::vector _requests; - // streaming tasks are created and managed per ColumnFamily ID - std::map _transfers; - // data receivers, filled after receiving prepare message - std::map _receivers; - //private final StreamingMetrics metrics; - /* can be null when session is created in remote */ - //private final StreamConnectionFactory factory; - - int64_t _bytes_sent = 0; - int64_t _bytes_received = 0; - - int _retries; - bool _is_aborted = false; - - stream_session_state _state = stream_session_state::INITIALIZED; - bool _complete_sent = false; - - // If the session is idle for 300 minutes, close the session - std::chrono::seconds _keep_alive_timeout{60 * 300}; - // Check every 10 minutes - std::chrono::seconds _keep_alive_interval{60 * 10}; - timer _keep_alive; - stream_bytes _last_stream_bytes; - lowres_clock::time_point _last_stream_progress; - - session_info _session_info; -public: - void start_keep_alive_timer() { - _keep_alive.rearm(lowres_clock::now() + _keep_alive_interval); - } - - void add_bytes_sent(int64_t bytes) { - _bytes_sent += bytes; - } - - void add_bytes_received(int64_t bytes) { - _bytes_received += bytes; - } - - int64_t get_bytes_sent() const { - return _bytes_sent; - } - - int64_t get_bytes_received() const { - return _bytes_received; - } -public: - stream_session(); - /** - * Create new streaming session with the peer. - * - * @param peer Address of streaming peer - * @param connecting Actual connecting address - * @param factory is used for establishing connection - */ - stream_session(inet_address peer_); - ~stream_session(); - - UUID plan_id(); - - sstring description(); - -public: - /** - * Bind this session to report to specific {@link StreamResultFuture} and - * perform pre-streaming initialization. - * - * @param streamResult result to report to - */ - void init(shared_ptr stream_result_); - - void start(); - - /** - * Request data fetch task to this session. - * - * @param keyspace Requesting keyspace - * @param ranges Ranges to retrieve data - * @param columnFamilies ColumnFamily names. Can be empty if requesting all CF under the keyspace. - */ - void add_stream_request(sstring keyspace, dht::token_range_vector ranges, std::vector column_families) { - _requests.emplace_back(std::move(keyspace), std::move(ranges), std::move(column_families)); - } - - /** - * Set up transfer for specific keyspace/ranges/CFs - * - * Used in repair - a streamed sstable in repair will be marked with the given repairedAt time - * - * @param keyspace Transfer keyspace - * @param ranges Transfer ranges - * @param columnFamilies Transfer ColumnFamilies - * @param flushTables flush tables? - * @param repairedAt the time the repair started. - */ - void add_transfer_ranges(sstring keyspace, dht::token_range_vector ranges, std::vector column_families); - - std::vector get_column_family_stores(const sstring& keyspace, const std::vector& column_families); - - void close_session(stream_session_state final_state); - -public: - /** - * Set current state to {@code newState}. - * - * @param newState new state to set - */ - void set_state(stream_session_state new_state) { - _state = new_state; - } - - /** - * @return current state - */ - stream_session_state get_state() { - return _state; - } - - /** - * Return if this session completed successfully. - * - * @return true if session completed successfully. - */ - bool is_success() { - return _state == stream_session_state::COMPLETE; - } - - future<> initiate(); - - /** - * Call back when connection initialization is complete to start the prepare phase. - */ - future<> on_initialization_complete(); - - /**l - * Call back for handling exception during streaming. - * - * @param e thrown exception - */ - void on_error(); - - /** - * Prepare this session for sending/receiving files. - */ - future prepare(std::vector requests, std::vector summaries); - - void follower_start_sent(); - - /** - * Check if session is completed on receiving {@code StreamMessage.Type.COMPLETE} message. - */ - void complete(); - - /** - * Call back on receiving {@code StreamMessage.Type.SESSION_FAILED} message. - */ - void session_failed(); - - /** - * @return Current snapshot of this session info. - */ - session_info make_session_info(); - - session_info& get_session_info() { - return _session_info; - } - - const session_info& get_session_info() const { - return _session_info; - } - - future<> update_progress(); - - void receive_task_completed(UUID cf_id); - void transfer_task_completed(UUID cf_id); -private: - void send_complete_message(); - bool maybe_completed(); - void prepare_receiving(stream_summary& summary); - void start_streaming_files(); - future<> receiving_failed(UUID cf_id); -}; - -} // namespace streaming diff --git a/scylla/streaming/stream_session_state.cc b/scylla/streaming/stream_session_state.cc deleted file mode 100644 index 2144178..0000000 --- a/scylla/streaming/stream_session_state.cc +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Modified by ScyllaDB. - * Copyright 2015 ScyllaDB. - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "streaming/stream_session_state.hh" -#include -#include -#include -#include "seastarx.hh" - -namespace streaming { - -static const std::map stream_session_state_names = { - {stream_session_state::INITIALIZED, "INITIALIZED"}, - {stream_session_state::PREPARING, "PREPARING"}, - {stream_session_state::STREAMING, "STREAMING"}, - {stream_session_state::WAIT_COMPLETE, "WAIT_COMPLETE"}, - {stream_session_state::COMPLETE, "COMPLETE"}, - {stream_session_state::FAILED, "FAILED"}, -}; - -std::ostream& operator<<(std::ostream& os, const stream_session_state& s) { - os << stream_session_state_names.at(s); - return os; -} - -} diff --git a/scylla/streaming/stream_session_state.hh b/scylla/streaming/stream_session_state.hh deleted file mode 100644 index c67f307..0000000 --- a/scylla/streaming/stream_session_state.hh +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include - -namespace streaming { - -enum class stream_session_state { - INITIALIZED, - PREPARING, - STREAMING, - WAIT_COMPLETE, - COMPLETE, - FAILED, -}; - -std::ostream& operator<<(std::ostream& os, const stream_session_state& s); - -} // namespace diff --git a/scylla/streaming/stream_state.hh b/scylla/streaming/stream_state.hh deleted file mode 100644 index 23242dc..0000000 --- a/scylla/streaming/stream_state.hh +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "utils/UUID.hh" -#include "streaming/session_info.hh" -#include - -namespace streaming { - -/** - * Current snapshot of streaming progress. - */ -class stream_state { -public: - using UUID = utils::UUID; - UUID plan_id; - sstring description; - std::vector sessions; - - stream_state(UUID plan_id_, sstring description_, std::vector sessions_) - : plan_id(std::move(plan_id_)) - , description(std::move(description_)) - , sessions(std::move(sessions_)) { - } - - bool has_failed_session() { - for (auto& x : sessions) { - if (x.is_failed()) { - return true; - } - } - return false; - } -}; - -} // namespace streaming diff --git a/scylla/streaming/stream_summary.cc b/scylla/streaming/stream_summary.cc deleted file mode 100644 index 1132ad0..0000000 --- a/scylla/streaming/stream_summary.cc +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "streaming/stream_summary.hh" -#include "types.hh" -#include "utils/serialization.hh" - -namespace streaming { - -std::ostream& operator<<(std::ostream& os, const stream_summary& x) { - os << "[ cf_id=" << x.cf_id << " ]"; - return os; -} - -} // namespace streaming diff --git a/scylla/streaming/stream_summary.hh b/scylla/streaming/stream_summary.hh deleted file mode 100644 index f0a385d..0000000 --- a/scylla/streaming/stream_summary.hh +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "utils/UUID.hh" -#include - -namespace streaming { - -/** - * Summary of streaming. - */ -class stream_summary { -public: - using UUID = utils::UUID; - UUID cf_id; - - /** - * Number of files to transfer. Can be 0 if nothing to transfer for some streaming request. - */ - int files; - long total_size; - - stream_summary() = default; - stream_summary(UUID _cf_id, int _files, long _total_size) - : cf_id (_cf_id) - , files(_files) - , total_size(_total_size) { - } - friend std::ostream& operator<<(std::ostream& os, const stream_summary& r); -}; - -} // namespace streaming diff --git a/scylla/streaming/stream_task.cc b/scylla/streaming/stream_task.cc deleted file mode 100644 index 203adf8..0000000 --- a/scylla/streaming/stream_task.cc +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "streaming/stream_task.hh" -#include "streaming/stream_session.hh" - -namespace streaming { - -stream_task::stream_task(shared_ptr _session, UUID _cf_id) - : session(_session) - , cf_id(std::move(_cf_id)) { -} - -stream_task::~stream_task() = default; - -} diff --git a/scylla/streaming/stream_task.hh b/scylla/streaming/stream_task.hh deleted file mode 100644 index ad6fd98..0000000 --- a/scylla/streaming/stream_task.hh +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "utils/UUID.hh" -#include "streaming/stream_summary.hh" -#include -#include "core/shared_ptr.hh" - -namespace streaming { - -class stream_session; - -/** - * StreamTask is an abstraction of the streaming task performed over specific ColumnFamily. - */ -class stream_task { -public: - using UUID = utils::UUID; - /** StreamSession that this task belongs */ - shared_ptr session; - - UUID cf_id; - - stream_task(shared_ptr _session, UUID _cf_id); - virtual ~stream_task(); - -public: - /** - * @return total number of files this task receives/streams. - */ - virtual int get_total_number_of_files() = 0; - - /** - * @return total bytes expected to receive - */ - virtual long get_total_size() = 0; - - /** - * Abort the task. - * Subclass should implement cleaning up resources. - */ - virtual void abort() = 0; - - /** - * @return StreamSummary that describes this task - */ - virtual stream_summary get_summary() { - return stream_summary(this->cf_id, this->get_total_number_of_files(), this->get_total_size()); - } -}; - -} // namespace streaming diff --git a/scylla/streaming/stream_transfer_task.cc b/scylla/streaming/stream_transfer_task.cc deleted file mode 100644 index 2a041ac..0000000 --- a/scylla/streaming/stream_transfer_task.cc +++ /dev/null @@ -1,193 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "log.hh" -#include "streaming/stream_detail.hh" -#include "streaming/stream_transfer_task.hh" -#include "streaming/stream_session.hh" -#include "streaming/stream_manager.hh" -#include "mutation_reader.hh" -#include "frozen_mutation.hh" -#include "mutation.hh" -#include "message/messaging_service.hh" -#include "range.hh" -#include "dht/i_partitioner.hh" -#include "service/priority_manager.hh" -#include -#include "service/storage_service.hh" -#include -#include - -namespace streaming { - -extern logging::logger sslog; - -stream_transfer_task::stream_transfer_task(shared_ptr session, UUID cf_id, dht::token_range_vector ranges, long total_size) - : stream_task(session, cf_id) - , _ranges(std::move(ranges)) - , _total_size(total_size) { -} - -stream_transfer_task::~stream_transfer_task() = default; - -struct send_info { - database& db; - utils::UUID plan_id; - utils::UUID cf_id; - dht::partition_range_vector prs; - netw::messaging_service::msg_addr id; - uint32_t dst_cpu_id; - size_t mutations_nr{0}; - semaphore mutations_done{0}; - bool error_logged = false; - mutation_reader reader; - send_info(database& db_, utils::UUID plan_id_, utils::UUID cf_id_, - dht::partition_range_vector prs_, netw::messaging_service::msg_addr id_, - uint32_t dst_cpu_id_) - : db(db_) - , plan_id(plan_id_) - , cf_id(cf_id_) - , prs(std::move(prs_)) - , id(id_) - , dst_cpu_id(dst_cpu_id_) { - auto& cf = db.find_column_family(this->cf_id); - reader = cf.make_streaming_reader(cf.schema(), this->prs); - } -}; - -future<> do_send_mutations(lw_shared_ptr si, frozen_mutation fm, bool fragmented) { - return get_local_stream_manager().mutation_send_limiter().wait().then([si, fragmented, fm = std::move(fm)] () mutable { - sslog.debug("[Stream #{}] SEND STREAM_MUTATION to {}, cf_id={}", si->plan_id, si->id, si->cf_id); - auto fm_size = fm.representation().size(); - netw::get_local_messaging_service().send_stream_mutation(si->id, si->plan_id, std::move(fm), si->dst_cpu_id, fragmented).then([si, fm_size] { - sslog.debug("[Stream #{}] GOT STREAM_MUTATION Reply from {}", si->plan_id, si->id.addr); - get_local_stream_manager().update_progress(si->plan_id, si->id.addr, progress_info::direction::OUT, fm_size); - si->mutations_done.signal(); - }).handle_exception([si] (auto ep) { - // There might be larger number of STREAM_MUTATION inflight. - // Log one error per column_family per range - if (!si->error_logged) { - si->error_logged = true; - sslog.warn("[Stream #{}] stream_transfer_task: Fail to send STREAM_MUTATION to {}: {}", si->plan_id, si->id, ep); - } - si->mutations_done.broken(); - }).finally([] { - get_local_stream_manager().mutation_send_limiter().signal(); - }); - }); -} - -future<> send_mutations(lw_shared_ptr si) { - return repeat([si] () { - return si->reader().then([si] (auto smopt) { - if (smopt && si->db.column_family_exists(si->cf_id)) { - size_t fragment_size = default_frozen_fragment_size; - // Mutations cannot be sent fragmented if the receiving side doesn't support that. - if (!service::get_local_storage_service().cluster_supports_large_partitions()) { - fragment_size = std::numeric_limits::max(); - } - return fragment_and_freeze(std::move(*smopt), [si] (auto fm, bool fragmented) { - si->mutations_nr++; - return do_send_mutations(si, std::move(fm), fragmented); - }, fragment_size).then([] { return stop_iteration::no; }); - } else { - return make_ready_future(stop_iteration::yes); - } - }); - }).then([si] { - return si->mutations_done.wait(si->mutations_nr); - }); -} - -void stream_transfer_task::start() { - auto plan_id = session->plan_id(); - auto cf_id = this->cf_id; - auto dst_cpu_id = session->dst_cpu_id; - auto& schema = session->get_local_db().find_column_family(cf_id).schema(); - auto id = netw::messaging_service::msg_addr{session->peer, session->dst_cpu_id}; - sslog.debug("[Stream #{}] stream_transfer_task: cf_id={}", plan_id, cf_id); - sort_and_merge_ranges(); - _shard_ranges = dht::split_ranges_to_shards(_ranges, *schema); - parallel_for_each(_shard_ranges, [this, dst_cpu_id, plan_id, cf_id, id] (auto& item) { - auto& shard = item.first; - auto& prs = item.second; - return session->get_db().invoke_on(shard, [plan_id, cf_id, id, dst_cpu_id, prs = std::move(prs)] (database& db) mutable { - auto si = make_lw_shared(db, plan_id, cf_id, prs, id, dst_cpu_id); - return send_mutations(std::move(si)); - }); - }).then([this, plan_id, cf_id, id] { - sslog.debug("[Stream #{}] SEND STREAM_MUTATION_DONE to {}, cf_id={}", plan_id, id, cf_id); - return session->ms().send_stream_mutation_done(id, plan_id, _ranges, - cf_id, session->dst_cpu_id).handle_exception([plan_id, id, cf_id] (auto ep) { - sslog.warn("[Stream #{}] stream_transfer_task: Fail to send STREAM_MUTATION_DONE to {}: {}", plan_id, id, ep); - std::rethrow_exception(ep); - }); - }).then([this, id, plan_id, cf_id] { - sslog.debug("[Stream #{}] GOT STREAM_MUTATION_DONE Reply from {}", plan_id, id.addr); - session->start_keep_alive_timer(); - session->transfer_task_completed(cf_id); - }).handle_exception([this, plan_id, id] (auto ep){ - sslog.warn("[Stream #{}] stream_transfer_task: Fail to send to {}: {}", plan_id, id, ep); - this->session->on_error(); - }); -} - -void stream_transfer_task::append_ranges(const dht::token_range_vector& ranges) { - _ranges.insert(_ranges.end(), ranges.begin(), ranges.end()); -} - -void stream_transfer_task::sort_and_merge_ranges() { - boost::icl::interval_set myset; - dht::token_range_vector ranges; - sslog.debug("cf_id = {}, before ranges = {}, size={}", cf_id, _ranges, _ranges.size()); - _ranges.swap(ranges); - for (auto& range : ranges) { - // TODO: We should convert range_to_interval and interval_to_range to - // take nonwrapping_range ranges. - myset += locator::token_metadata::range_to_interval(range); - } - ranges.clear(); - ranges.shrink_to_fit(); - for (auto& i : myset) { - auto r = locator::token_metadata::interval_to_range(i); - _ranges.push_back(dht::token_range(r)); - } - sslog.debug("cf_id = {}, after ranges = {}, size={}", cf_id, _ranges, _ranges.size()); -} - -} // namespace streaming diff --git a/scylla/streaming/stream_transfer_task.hh b/scylla/streaming/stream_transfer_task.hh deleted file mode 100644 index bd821b3..0000000 --- a/scylla/streaming/stream_transfer_task.hh +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modified by ScyllaDB - * Copyright (C) 2015 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include "utils/UUID.hh" -#include "streaming/stream_task.hh" -#include "streaming/stream_detail.hh" -#include "sstables/sstables.hh" -#include -#include - -namespace streaming { - -class stream_session; -class send_info; - -/** - * StreamTransferTask sends sections of SSTable files in certain ColumnFamily. - */ -class stream_transfer_task : public stream_task { -private: - int32_t sequence_number = 0; - bool aborted = false; - // A stream_transfer_task always contains the same range to stream - dht::token_range_vector _ranges; - std::map _shard_ranges; - long _total_size; -public: - using UUID = utils::UUID; - stream_transfer_task(stream_transfer_task&&) = default; - stream_transfer_task(shared_ptr session, UUID cf_id, dht::token_range_vector ranges, long total_size = 0); - ~stream_transfer_task(); -public: - virtual void abort() override { - } - - virtual int get_total_number_of_files() override { - return 1; - } - - virtual long get_total_size() override { - return _total_size; - } - - void start(); - - void append_ranges(const dht::token_range_vector& ranges); - void sort_and_merge_ranges(); -}; - -} // namespace streaming diff --git a/scylla/supervisor.cc b/scylla/supervisor.cc deleted file mode 100644 index 1a74c78..0000000 --- a/scylla/supervisor.cc +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (C) 2017 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#include "supervisor.hh" -#include "init.hh" -#include -#include -#include - -#ifdef HAVE_LIBSYSTEMD -#include -#endif - -const sstring supervisor::scylla_upstart_job_str("scylla-server"); -const sstring supervisor::upstart_job_env("UPSTART_JOB"); -const sstring supervisor::systemd_ready_msg("READY=1"); -const sstring supervisor::systemd_status_msg_prefix("STATUS"); - -sstring supervisor::get_upstart_job_env() { - const char* upstart_job = std::getenv(upstart_job_env.c_str()); - return !upstart_job ? "" : upstart_job; -} - -bool supervisor::try_notify_upstart(sstring msg, bool ready) { - static const sstring upstart_job_str(get_upstart_job_env()); - - if (upstart_job_str != scylla_upstart_job_str) { - return false; - } - - if (ready) { - std::raise(SIGSTOP); - } - - return true; -} - -void supervisor::try_notify_systemd(sstring msg, bool ready) { -#ifdef HAVE_LIBSYSTEMD - if (ready) { - sd_notify(0, sprint("%s\n%s=%s\n", systemd_ready_msg, systemd_status_msg_prefix, msg).c_str()); - } else { - sd_notify(0, sprint("%s=%s\n", systemd_status_msg_prefix, msg).c_str()); - } -#endif -} - -void supervisor::notify(sstring msg, bool ready) { - startlog.trace("{}", msg); - - if (try_notify_upstart(msg, ready) == true) { - return; - } else { - try_notify_systemd(msg, ready); - } -} diff --git a/scylla/supervisor.hh b/scylla/supervisor.hh deleted file mode 100644 index a8d120f..0000000 --- a/scylla/supervisor.hh +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (C) 2017 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include -#include "seastarx.hh" - -class supervisor { -public: - static const sstring scylla_upstart_job_str; - static const sstring upstart_job_env; - static const sstring systemd_ready_msg; - /** A systemd status message has a format = */ - static const sstring systemd_status_msg_prefix; -public: - /** - * @brief Notify the Supervisor with the given message. - * @param msg message to notify the Supervisor with - * @param ready set to TRUE when scylla service becomes ready - */ - static void notify(sstring msg, bool ready = false); - -private: - static void try_notify_systemd(sstring msg, bool ready); - static bool try_notify_upstart(sstring msg, bool ready); - static sstring get_upstart_job_env(); -}; diff --git a/scylla/swagger-ui/.dockerignore b/scylla/swagger-ui/.dockerignore deleted file mode 100644 index 8cfdf4b..0000000 --- a/scylla/swagger-ui/.dockerignore +++ /dev/null @@ -1,4 +0,0 @@ -.git -node_modules -bower_components -*.swp diff --git a/scylla/swagger-ui/.gitattributes b/scylla/swagger-ui/.gitattributes deleted file mode 100644 index cbcd3fe..0000000 --- a/scylla/swagger-ui/.gitattributes +++ /dev/null @@ -1,13 +0,0 @@ -* text eol=lf - -dist/**/*.js binary -dist/**/*.map binary -dist/**/*.eot binary -dist/**/*.svg binary -dist/**/*.ttf binary -dist/**/*.woff binary -dist/**/*.woff2 binary -dist/**/*.png binary -dist/*.html text - -src/main/html/images/*.png binary diff --git a/scylla/swagger-ui/.gitignore b/scylla/swagger-ui/.gitignore deleted file mode 100644 index 1f2db9b..0000000 --- a/scylla/swagger-ui/.gitignore +++ /dev/null @@ -1,13 +0,0 @@ -.DS_STORE -*.ipr -*.iml -*.iws -web/ -lib/*.zip -version.properties -.sass-cache -swagger-ui.sublime-workspace -.idea -.project -node_modules/* -/nbproject/private/ \ No newline at end of file diff --git a/scylla/swagger-ui/.jshintignore b/scylla/swagger-ui/.jshintignore deleted file mode 100644 index 66f4248..0000000 --- a/scylla/swagger-ui/.jshintignore +++ /dev/null @@ -1,5 +0,0 @@ -node_modules -src/main/javascript/doc.js -dist -lib -.log \ No newline at end of file diff --git a/scylla/swagger-ui/.jshintrc b/scylla/swagger-ui/.jshintrc deleted file mode 100644 index 9a1b0c8..0000000 --- a/scylla/swagger-ui/.jshintrc +++ /dev/null @@ -1,39 +0,0 @@ -{ - "node": true, - "browser": true, - "esnext": true, - "bitwise": true, - "curly": true, - "eqeqeq": true, - "immed": true, - "indent": 2, - "latedef": false, - "newcap": true, - "noarg": true, - "quotmark": "single", - "regexp": true, - "undef": true, - "unused": true, - "strict": true, - "trailing": true, - "smarttabs": true, - "validthis": true, - "globals": { - - // Libraries - "_": false, - "$": false, - "Backbone": false, - "Handlebars": false, - "jQuery": false, - "marked": false, - "SwaggerClient": false, - "hljs": false, - "SwaggerUi": false, - "define": false, - - // Global object - // TODO: remove these - "Docs": false - } -} \ No newline at end of file diff --git a/scylla/swagger-ui/.npmignore b/scylla/swagger-ui/.npmignore deleted file mode 100644 index 402d538..0000000 --- a/scylla/swagger-ui/.npmignore +++ /dev/null @@ -1,9 +0,0 @@ -*.sublime-* -example.html -*.tgz -.classpath -.project -.npmignore -dist/sample.html -dist/spec.js -node_modules diff --git a/scylla/swagger-ui/.travis.yml b/scylla/swagger-ui/.travis.yml deleted file mode 100644 index 37245c7..0000000 --- a/scylla/swagger-ui/.travis.yml +++ /dev/null @@ -1,10 +0,0 @@ -sudo: false -language: node_js -node_js: - - '0.10' - - '0.12' -install: - - export DISPLAY=:99.0 - - sh -e /etc/init.d/xvfb start - - npm i -g jshint - - npm install diff --git a/scylla/swagger-ui/CONTRIBUTING.md b/scylla/swagger-ui/CONTRIBUTING.md deleted file mode 100644 index edf33e5..0000000 --- a/scylla/swagger-ui/CONTRIBUTING.md +++ /dev/null @@ -1,8 +0,0 @@ -## Pull Requests -Plase make your pull requests are made to the [**`develop_2.0`**](https://github.com/swagger-api/swagger-ui/tree/develop_2.0) branch at this time. - -## Issues -SwaggerUI uses [SwaggerJS](https://github.com/swagger-api/swagger-js) library for many internal operations. If you see errors in -[`swagger-client.js`](lib/swagger-client.js) file, you should probably open the issue in [SwaggerJS](https://github.com/swagger-api/swagger-js) repository. - -Please open issues related to Swagger specifications in [Swagger Specs](https://github.com/swagger-api/swagger-spec) repository. diff --git a/scylla/swagger-ui/Dockerfile b/scylla/swagger-ui/Dockerfile deleted file mode 100644 index 6869d5a..0000000 --- a/scylla/swagger-ui/Dockerfile +++ /dev/null @@ -1,22 +0,0 @@ -### -# swagger-ui-builder - https://github.com/swagger-api/swagger-ui/ -# Container for building the swagger-ui static site -# -# Build: docker build -t swagger-ui-builder . -# Run: docker run -v $PWD/dist:/build/dist swagger-ui-builder -# -### - -FROM ubuntu:14.04 -MAINTAINER dnephin@gmail.com - -ENV DEBIAN_FRONTEND noninteractive - -RUN apt-get update && apt-get install -y git npm nodejs openjdk-7-jre -RUN ln -s /usr/bin/nodejs /usr/local/bin/node - -WORKDIR /build -ADD package.json /build/package.json -RUN npm install -ADD . /build -CMD ./node_modules/gulp/bin/gulp.js serve diff --git a/scylla/swagger-ui/LICENSE b/scylla/swagger-ui/LICENSE deleted file mode 100644 index 542991f..0000000 --- a/scylla/swagger-ui/LICENSE +++ /dev/null @@ -1,11 +0,0 @@ -Copyright 2015 SmartBear Software - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at [apache.org/licenses/LICENSE-2.0](http://www.apache.org/licenses/LICENSE-2.0) - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. diff --git a/scylla/swagger-ui/README.md b/scylla/swagger-ui/README.md deleted file mode 100644 index 72b2464..0000000 --- a/scylla/swagger-ui/README.md +++ /dev/null @@ -1,242 +0,0 @@ -# Swagger UI - -[![Build Status](https://travis-ci.org/swagger-api/swagger-ui.svg?branch=master)](https://travis-ci.org/swagger-api/swagger-ui) - -Swagger UI is part of the Swagger project. The Swagger project allows you to produce, visualize and consume your OWN RESTful services. No proxy or 3rd party services required. Do it your own way. - -Swagger UI is a dependency-free collection of HTML, Javascript, and CSS assets that dynamically -generate beautiful documentation and sandbox from a Swagger-compliant API. Because Swagger UI has no dependencies, you can host it in any server environment, or on your local machine. - -## What's Swagger? - -The goal of Swagger™ is to define a standard, language-agnostic interface to REST APIs which allows both humans and computers to discover and understand the capabilities of the service without access to source code, documentation, or through network traffic inspection. When properly defined via Swagger, a consumer can understand and interact with the remote service with a minimal amount of implementation logic. Similar to what interfaces have done for lower-level programming, Swagger removes the guesswork in calling the service. - - -Check out [Swagger-Spec](https://github.com/swagger-api/swagger-spec) for additional information about the Swagger project, including additional libraries with support for other languages and more. - - -## Compatibility -The Swagger Specification has undergone 4 revisions since initial creation in 2010. Compatibility between swagger-ui and the Swagger specification is as follows: - -Swagger UI Version | Release Date | Swagger Spec compatibility | Notes | Status ------------------- | ------------ | -------------------------- | ----- | ------ -2.1.1 | 2015-06-06 | 1.1, 1.2, 2.0 | [master](https://github.com/swagger-api/swagger-ui) | -2.0.24 | 2014-09-12 | 1.1, 1.2 | [tag v2.0.24](https://github.com/swagger-api/swagger-ui/tree/v2.0.24) | -1.0.13 | 2013-03-08 | 1.1, 1.2 | [tag v1.0.13](https://github.com/swagger-api/swagger-ui/tree/v1.0.13) | -1.0.1 | 2011-10-11 | 1.0, 1.1 | [tag v1.0.1](https://github.com/swagger-api/swagger-ui/tree/v1.0.1) | - -## How to Use It - -### Download -You can use the swagger-ui code AS-IS! No need to build or recompile--just clone this repo and use the pre-built files in the `dist` folder. If you like swagger-ui as-is, stop here. - -##### Browser support -Swagger UI works in all evergreen desktop browsers (Chrome, Safari, Firefox). Internet Explorer support is version 8 (IE8) and above. - -### Build -You can rebuild swagger-ui on your own to tweak it or just so you can say you did. To do so, follow these steps: - -1. `npm install` -2. `gulp` -3. You should see the distribution under the dist folder. Open [`./dist/index.html`](./dist/index.html) to launch Swagger UI in a browser - -### Development -Use `gulp watch` to make a new build and watch for changes in files. - -### Build using Docker - -To build swagger-ui using a docker container: - -``` -docker build -t swagger-ui-builder . -docker run -p 127.0.0.1:8080:8080 swagger-ui-builder -``` - -This will start Swagger UI at `http://localhost:8080`. - -### Use -Once you open the Swagger UI, it will load the [Swagger Petstore](http://petstore.swagger.io/v2/swagger.json) service and show its APIs. You can enter your own server url and click explore to view the API. - -### Customize -You may choose to customize Swagger UI for your organization. Here is an overview of whats in its various directories: - -- dist: Contains a distribution which you can deploy on a server or load from your local machine. -- dist/lang: The swagger localization -- lib: Contains javascript dependencies which swagger-ui depends on -- node_modules: Contains node modules which swagger-ui uses for its development. -- src -- src/main/templates: [handlebars](http://handlebarsjs.com/) templates used to render swagger-ui -- src/main/html: the html files, some images and css -- src/main/javascript: main code - -### SwaggerUi -To use swagger-ui you should take a look at the [source of swagger-ui html page](https://github.com/swagger-api/swagger-ui/blob/master/dist/index.html) and customize it. This basically requires you to instantiate a SwaggerUi object and call load() on it as below: - -```javascript -var swaggerUi = new SwaggerUi({ - url:"http://petstore.swagger.io/v2/swagger.json", - dom_id:"swagger-ui-container" -}); - -swaggerUi.load(); -``` - -##### Parameters - -Parameter Name | Description ---- | --- -url | The url pointing to `swagger.json` (Swagger 2.0) or the resource listing (earlier versions) as per [Swagger Spec](https://github.com/swagger-api/swagger-spec/). -spec | A JSON object describing the Swagger specification. When used, the `url` parameter will not be parsed. This is useful for testing manually-generated specifications without hosting them. Works for Swagger 2.0 specs only. -validatorUrl | By default, Swagger-UI attempts to validate specs against swagger.io's online validator. You can use this parameter to set a different validator URL, for example for locally deployed validators ([Validator Badge](https://github.com/swagger-api/validator-badge)). Setting it to `null` will disable validation. This parameter is relevant for Swagger 2.0 specs only. -dom_id | The id of a dom element inside which SwaggerUi will put the user interface for swagger. -booleanValues | SwaggerUI renders boolean data types as a dropdown. By default it provides a 'true' and 'false' string as the possible choices. You can use this parameter to change the values in dropdown to be something else, for example 0 and 1 by setting booleanValues to new Array(0, 1). -docExpansion | Controls how the API listing is displayed. It can be set to 'none' (default), 'list' (shows operations for each resource), or 'full' (fully expanded: shows operations and their details). -apisSorter | Apply a sort to the API/tags list. It can be 'alpha' (sort by name) or a function (see Array.prototype.sort() to know how sort function works). Default is the order returned by the server unchanged. -operationsSorter | Apply a sort to the operation list of each API. It can be 'alpha' (sort by paths alphanumerically), 'method' (sort by HTTP method) or a function (see Array.prototype.sort() to know how sort function works). Default is the order returned by the server unchanged. -onComplete | This is a callback function parameter which can be passed to be notified of when SwaggerUI has completed rendering successfully. -onFailure | This is a callback function parameter which can be passed to be notified of when SwaggerUI encountered a failure was unable to render. -highlightSizeThreshold | Any size response below this threshold will be highlighted syntactically, attempting to highlight large responses can lead to browser hangs, not including a threshold will default to highlight all returned responses. -supportedSubmitMethods | An array of of the HTTP operations that will have the 'Try it out!' option. An empty array disables all operations. This does not filter the operations from the display. -oauth2RedirectUrl | OAuth redirect URL -showRequestHeaders | Whether or not to show the headers that were sent when making a request via the 'Try it out!' option. Defaults to `false`. - -* All other parameters are explained in greater detail below - - -### HTTP Methods and API Invocation -swagger-ui supports invocation of all HTTP methods APIs including GET, PUT, POST, DELETE, PATCH, OPTIONS. These are handled in the [swagger-js](https://github.com/swagger-api/swagger-js) project, please see there for specifics on their usage. - - -### Header Parameters -Header params are supported through a pluggable mechanism in [swagger-js](https://github.com/swagger-api/swagger-js). You can see the [index.html](https://github.com/swagger-api/swagger-ui/blob/master/dist/index.html) for a sample of how to dynamically set headers: - -```js -// add a new SwaggerClient.ApiKeyAuthorization when the api-key changes in the ui. -$('#input_apiKey').change(function() { - var key = $('#input_apiKey')[0].value; - if(key && key.trim() != "") { - swaggerUi.api.clientAuthorizations.add("key", new SwaggerClient.ApiKeyAuthorization("api_key", key, "header")); - } -}) -``` - -This will add header `api_key` with value `key` on every call to the server. You can substitute `query` to send the values as a query param. - -### Custom Header Parameters - (For Basic auth etc) -If you have some header parameters which you need to send with every request, use the headers as below: - -```js -swaggerUi.api.clientAuthorizations.add("key", new SwaggerClient.ApiKeyAuthorization("Authorization", "XXXX", "header")); -``` - -Note! You can pass multiple header params on a single request, just use unique names for them (`key` is used in the above example). - -### Localization and translation -The localization files are in the [lang](/lang) directory. Note that language files and translator is not included in SwaggerUI by default. You need to add them manually. - -To enable translation you should append next two lines in your Swagger's index.html (or another entry point you use) -```html - - -``` -The first line script is a translator and the second one is your language lexemes. - -If you wish to append support for new language you just need to create lang/your_lang.js and fill it like it's done in existing files. - -To append new lexemex for translation you shoul do two things: -1. Add lexeme into the language file. - Example of new line: "new sentence":"translation of new sentence". -2. Mark this lexeme in source html with attribute data-sw-translate. - Example of changed source: -```html -new sentence -or -``` -. - -At this moment only inner html, title-attribute and value-attribute are going to be translated. - -## CORS Support -### OR: How to deal with "Can't read from server. It may not have the appropriate access-control-origin settings." - -CORS is a technique to prevent websites from doing bad things with your personal data. Most browsers + javascript toolkits not only support CORS but enforce it, which has implications for your API server which supports Swagger. - -You can read about CORS here: http://www.w3.org/TR/cors. - -There are two cases where no action is needed for CORS support: - -1. swagger-ui is hosted on the same server as the application itself (same host *and* port). -2. The application is located behind a proxy that enables the requires CORS headers. This may already be covered within your organization. - -Otherwise, CORS support needs to be enabled for: - -1. Your Swagger docs. For Swagger 2.0 it's the `swagger.json` and any externally `$ref`ed docs, and for prior version it's the `Resource Listing` and `API Declaration` files. -2. For the `Try it now` button to work, CORS needs to be enabled on your API endpoints as well. - -### Testing CORS Support - -You can verify CORS support with one of three techniques: - -- Curl your API and inspect the headers. For instance: - -```bash -$ curl -I "http://petstore.swagger.io/v2/swagger.json" -HTTP/1.1 200 OK -Date: Sat, 31 Jan 2015 23:05:44 GMT -Access-Control-Allow-Origin: * -Access-Control-Allow-Methods: GET, POST, DELETE, PUT, PATCH, OPTIONS -Access-Control-Allow-Headers: Content-Type, api_key, Authorization -Content-Type: application/json -Content-Length: 0 -``` - -This tells us that the petstore resource listing supports OPTIONS, and the following headers: `Content-Type`, `api_key`, `Authorization`. - -- Try swagger-ui from your file system and look at the debug console. If CORS is not enabled, you'll see something like this: - -``` -XMLHttpRequest cannot load http://sad.server.com/v2/api-docs. No 'Access-Control-Allow-Origin' header is present on the requested resource. Origin 'null' is therefore not allowed access. -``` - -Swagger-UI cannot easily show this error state. - -- Using the http://www.test-cors.org website. Keep in mind this will show a successful result even if `Access-Control-Allow-Headers` is not available, which is still required for Swagger-UI to function properly. - -### Enabling CORS - -The method of enabling CORS depends on the server and/or framework you use to host your application. http://enable-cors.org provides information on how to enable CORS in some common web servers. - -Other servers/frameworks may provide you information on how to enable it specifically in their use case. - -### CORS and Header Parameters - -Swagger lets you easily send headers as parameters to requests. The name of these headers *MUST* be supported in your CORS configuration as well. From our example above: - -``` -Access-Control-Allow-Headers: Content-Type, api_key, Authorization -``` - -Only headers with these names will be allowed to be sent by Swagger-UI. - -## How to Improve It - -Create your own fork of [swagger-api/swagger-ui](https://github.com/swagger-api/swagger-ui) - -To share your changes, [submit a pull request](https://github.com/swagger-api/swagger-ui/pull/new/develop_2.0). - -## Change Log -Please see [releases](https://github.com/swagger-api/swagger-ui/releases) for change log. - -## License - -Copyright 2011-2015 SmartBear Software - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at [apache.org/licenses/LICENSE-2.0](http://www.apache.org/licenses/LICENSE-2.0) - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. diff --git a/scylla/swagger-ui/bower.json b/scylla/swagger-ui/bower.json deleted file mode 100644 index da275f5..0000000 --- a/scylla/swagger-ui/bower.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "name": "swagger-ui", - "main": "dist/index.html", - "version": "2.1.0", - "authors": [ - "Mohsen Azimi " - ], - "description": "Swagger UI", - "moduleType": [ - "globals" - ], - "keywords": [ - "Swagger", - "API" - ], - "license": "Copyright 2015 SmartBear Software", - "homepage": "http://swagger.io", - "private": true, - "ignore": [ - "**/.*", - "node_modules", - "bower_components", - "test", - "tests" - ] -} diff --git a/scylla/swagger-ui/dist/css/print.css b/scylla/swagger-ui/dist/css/print.css deleted file mode 100644 index cd3aa8b..0000000 --- a/scylla/swagger-ui/dist/css/print.css +++ /dev/null @@ -1,1172 +0,0 @@ -/* Original style from softwaremaniacs.org (c) Ivan Sagalaev */ -.swagger-section pre code { - display: block; - padding: 0.5em; - background: #F0F0F0; -} -.swagger-section pre code, -.swagger-section pre .subst, -.swagger-section pre .tag .title, -.swagger-section pre .lisp .title, -.swagger-section pre .clojure .built_in, -.swagger-section pre .nginx .title { - color: black; -} -.swagger-section pre .string, -.swagger-section pre .title, -.swagger-section pre .constant, -.swagger-section pre .parent, -.swagger-section pre .tag .value, -.swagger-section pre .rules .value, -.swagger-section pre .rules .value .number, -.swagger-section pre .preprocessor, -.swagger-section pre .ruby .symbol, -.swagger-section pre .ruby .symbol .string, -.swagger-section pre .aggregate, -.swagger-section pre .template_tag, -.swagger-section pre .django .variable, -.swagger-section pre .smalltalk .class, -.swagger-section pre .addition, -.swagger-section pre .flow, -.swagger-section pre .stream, -.swagger-section pre .bash .variable, -.swagger-section pre .apache .tag, -.swagger-section pre .apache .cbracket, -.swagger-section pre .tex .command, -.swagger-section pre .tex .special, -.swagger-section pre .erlang_repl .function_or_atom, -.swagger-section pre .markdown .header { - color: #800; -} -.swagger-section pre .comment, -.swagger-section pre .annotation, -.swagger-section pre .template_comment, -.swagger-section pre .diff .header, -.swagger-section pre .chunk, -.swagger-section pre .markdown .blockquote { - color: #888; -} -.swagger-section pre .number, -.swagger-section pre .date, -.swagger-section pre .regexp, -.swagger-section pre .literal, -.swagger-section pre .smalltalk .symbol, -.swagger-section pre .smalltalk .char, -.swagger-section pre .go .constant, -.swagger-section pre .change, -.swagger-section pre .markdown .bullet, -.swagger-section pre .markdown .link_url { - color: #080; -} -.swagger-section pre .label, -.swagger-section pre .javadoc, -.swagger-section pre .ruby .string, -.swagger-section pre .decorator, -.swagger-section pre .filter .argument, -.swagger-section pre .localvars, -.swagger-section pre .array, -.swagger-section pre .attr_selector, -.swagger-section pre .important, -.swagger-section pre .pseudo, -.swagger-section pre .pi, -.swagger-section pre .doctype, -.swagger-section pre .deletion, -.swagger-section pre .envvar, -.swagger-section pre .shebang, -.swagger-section pre .apache .sqbracket, -.swagger-section pre .nginx .built_in, -.swagger-section pre .tex .formula, -.swagger-section pre .erlang_repl .reserved, -.swagger-section pre .prompt, -.swagger-section pre .markdown .link_label, -.swagger-section pre .vhdl .attribute, -.swagger-section pre .clojure .attribute, -.swagger-section pre .coffeescript .property { - color: #8888ff; -} -.swagger-section pre .keyword, -.swagger-section pre .id, -.swagger-section pre .phpdoc, -.swagger-section pre .title, -.swagger-section pre .built_in, -.swagger-section pre .aggregate, -.swagger-section pre .css .tag, -.swagger-section pre .javadoctag, -.swagger-section pre .phpdoc, -.swagger-section pre .yardoctag, -.swagger-section pre .smalltalk .class, -.swagger-section pre .winutils, -.swagger-section pre .bash .variable, -.swagger-section pre .apache .tag, -.swagger-section pre .go .typename, -.swagger-section pre .tex .command, -.swagger-section pre .markdown .strong, -.swagger-section pre .request, -.swagger-section pre .status { - font-weight: bold; -} -.swagger-section pre .markdown .emphasis { - font-style: italic; -} -.swagger-section pre .nginx .built_in { - font-weight: normal; -} -.swagger-section pre .coffeescript .javascript, -.swagger-section pre .javascript .xml, -.swagger-section pre .tex .formula, -.swagger-section pre .xml .javascript, -.swagger-section pre .xml .vbscript, -.swagger-section pre .xml .css, -.swagger-section pre .xml .cdata { - opacity: 0.5; -} -.swagger-section .swagger-ui-wrap { - line-height: 1; - font-family: "Droid Sans", sans-serif; - max-width: 960px; - margin-left: auto; - margin-right: auto; -} -.swagger-section .swagger-ui-wrap b, -.swagger-section .swagger-ui-wrap strong { - font-family: "Droid Sans", sans-serif; - font-weight: bold; -} -.swagger-section .swagger-ui-wrap q, -.swagger-section .swagger-ui-wrap blockquote { - quotes: none; -} -.swagger-section .swagger-ui-wrap p { - line-height: 1.4em; - padding: 0 0 10px; - color: #333333; -} -.swagger-section .swagger-ui-wrap q:before, -.swagger-section .swagger-ui-wrap q:after, -.swagger-section .swagger-ui-wrap blockquote:before, -.swagger-section .swagger-ui-wrap blockquote:after { - content: none; -} -.swagger-section .swagger-ui-wrap .heading_with_menu h1, -.swagger-section .swagger-ui-wrap .heading_with_menu h2, -.swagger-section .swagger-ui-wrap .heading_with_menu h3, -.swagger-section .swagger-ui-wrap .heading_with_menu h4, -.swagger-section .swagger-ui-wrap .heading_with_menu h5, -.swagger-section .swagger-ui-wrap .heading_with_menu h6 { - display: block; - clear: none; - float: left; - -moz-box-sizing: border-box; - -webkit-box-sizing: border-box; - -ms-box-sizing: border-box; - box-sizing: border-box; - width: 60%; -} -.swagger-section .swagger-ui-wrap table { - border-collapse: collapse; - border-spacing: 0; -} -.swagger-section .swagger-ui-wrap table thead tr th { - padding: 5px; - font-size: 0.9em; - color: #666666; - border-bottom: 1px solid #999999; -} -.swagger-section .swagger-ui-wrap table tbody tr:last-child td { - border-bottom: none; -} -.swagger-section .swagger-ui-wrap table tbody tr.offset { - background-color: #f0f0f0; -} -.swagger-section .swagger-ui-wrap table tbody tr td { - padding: 6px; - font-size: 0.9em; - border-bottom: 1px solid #cccccc; - vertical-align: top; - line-height: 1.3em; -} -.swagger-section .swagger-ui-wrap ol { - margin: 0px 0 10px; - padding: 0 0 0 18px; - list-style-type: decimal; -} -.swagger-section .swagger-ui-wrap ol li { - padding: 5px 0px; - font-size: 0.9em; - color: #333333; -} -.swagger-section .swagger-ui-wrap ol, -.swagger-section .swagger-ui-wrap ul { - list-style: none; -} -.swagger-section .swagger-ui-wrap h1 a, -.swagger-section .swagger-ui-wrap h2 a, -.swagger-section .swagger-ui-wrap h3 a, -.swagger-section .swagger-ui-wrap h4 a, -.swagger-section .swagger-ui-wrap h5 a, -.swagger-section .swagger-ui-wrap h6 a { - text-decoration: none; -} -.swagger-section .swagger-ui-wrap h1 a:hover, -.swagger-section .swagger-ui-wrap h2 a:hover, -.swagger-section .swagger-ui-wrap h3 a:hover, -.swagger-section .swagger-ui-wrap h4 a:hover, -.swagger-section .swagger-ui-wrap h5 a:hover, -.swagger-section .swagger-ui-wrap h6 a:hover { - text-decoration: underline; -} -.swagger-section .swagger-ui-wrap h1 span.divider, -.swagger-section .swagger-ui-wrap h2 span.divider, -.swagger-section .swagger-ui-wrap h3 span.divider, -.swagger-section .swagger-ui-wrap h4 span.divider, -.swagger-section .swagger-ui-wrap h5 span.divider, -.swagger-section .swagger-ui-wrap h6 span.divider { - color: #aaaaaa; -} -.swagger-section .swagger-ui-wrap a { - color: #547f00; -} -.swagger-section .swagger-ui-wrap a img { - border: none; -} -.swagger-section .swagger-ui-wrap article, -.swagger-section .swagger-ui-wrap aside, -.swagger-section .swagger-ui-wrap details, -.swagger-section .swagger-ui-wrap figcaption, -.swagger-section .swagger-ui-wrap figure, -.swagger-section .swagger-ui-wrap footer, -.swagger-section .swagger-ui-wrap header, -.swagger-section .swagger-ui-wrap hgroup, -.swagger-section .swagger-ui-wrap menu, -.swagger-section .swagger-ui-wrap nav, -.swagger-section .swagger-ui-wrap section, -.swagger-section .swagger-ui-wrap summary { - display: block; -} -.swagger-section .swagger-ui-wrap pre { - font-family: "Anonymous Pro", "Menlo", "Consolas", "Bitstream Vera Sans Mono", "Courier New", monospace; - background-color: #fcf6db; - border: 1px solid #e5e0c6; - padding: 10px; -} -.swagger-section .swagger-ui-wrap pre code { - line-height: 1.6em; - background: none; -} -.swagger-section .swagger-ui-wrap .content > .content-type > div > label { - clear: both; - display: block; - color: #0F6AB4; - font-size: 1.1em; - margin: 0; - padding: 15px 0 5px; -} -.swagger-section .swagger-ui-wrap .content pre { - font-size: 12px; - margin-top: 5px; - padding: 5px; -} -.swagger-section .swagger-ui-wrap .icon-btn { - cursor: pointer; -} -.swagger-section .swagger-ui-wrap .info_title { - padding-bottom: 10px; - font-weight: bold; - font-size: 25px; -} -.swagger-section .swagger-ui-wrap .footer { - margin-top: 20px; -} -.swagger-section .swagger-ui-wrap p.big, -.swagger-section .swagger-ui-wrap div.big p { - font-size: 1em; - margin-bottom: 10px; -} -.swagger-section .swagger-ui-wrap form.fullwidth ol li.string input, -.swagger-section .swagger-ui-wrap form.fullwidth ol li.url input, -.swagger-section .swagger-ui-wrap form.fullwidth ol li.text textarea, -.swagger-section .swagger-ui-wrap form.fullwidth ol li.numeric input { - width: 500px !important; -} -.swagger-section .swagger-ui-wrap .info_license { - padding-bottom: 5px; -} -.swagger-section .swagger-ui-wrap .info_tos { - padding-bottom: 5px; -} -.swagger-section .swagger-ui-wrap .message-fail { - color: #cc0000; -} -.swagger-section .swagger-ui-wrap .info_url { - padding-bottom: 5px; -} -.swagger-section .swagger-ui-wrap .info_email { - padding-bottom: 5px; -} -.swagger-section .swagger-ui-wrap .info_name { - padding-bottom: 5px; -} -.swagger-section .swagger-ui-wrap .info_description { - padding-bottom: 10px; - font-size: 15px; -} -.swagger-section .swagger-ui-wrap .markdown ol li, -.swagger-section .swagger-ui-wrap .markdown ul li { - padding: 3px 0px; - line-height: 1.4em; - color: #333333; -} -.swagger-section .swagger-ui-wrap form.formtastic fieldset.inputs ol li.string input, -.swagger-section .swagger-ui-wrap form.formtastic fieldset.inputs ol li.url input, -.swagger-section .swagger-ui-wrap form.formtastic fieldset.inputs ol li.numeric input { - display: block; - padding: 4px; - width: auto; - clear: both; -} -.swagger-section .swagger-ui-wrap form.formtastic fieldset.inputs ol li.string input.title, -.swagger-section .swagger-ui-wrap form.formtastic fieldset.inputs ol li.url input.title, -.swagger-section .swagger-ui-wrap form.formtastic fieldset.inputs ol li.numeric input.title { - font-size: 1.3em; -} -.swagger-section .swagger-ui-wrap table.fullwidth { - width: 100%; -} -.swagger-section .swagger-ui-wrap .model-signature { - font-family: "Droid Sans", sans-serif; - font-size: 1em; - line-height: 1.5em; -} -.swagger-section .swagger-ui-wrap .model-signature .signature-nav a { - text-decoration: none; - color: #AAA; -} -.swagger-section .swagger-ui-wrap .model-signature .signature-nav a:hover { - text-decoration: underline; - color: black; -} -.swagger-section .swagger-ui-wrap .model-signature .signature-nav .selected { - color: black; - text-decoration: none; -} -.swagger-section .swagger-ui-wrap .model-signature .propType { - color: #5555aa; -} -.swagger-section .swagger-ui-wrap .model-signature pre:hover { - background-color: #ffffdd; -} -.swagger-section .swagger-ui-wrap .model-signature pre { - font-size: .85em; - line-height: 1.2em; - overflow: auto; - max-height: 200px; - cursor: pointer; -} -.swagger-section .swagger-ui-wrap .model-signature ul.signature-nav { - display: block; - margin: 0; - padding: 0; -} -.swagger-section .swagger-ui-wrap .model-signature ul.signature-nav li:last-child { - padding-right: 0; - border-right: none; -} -.swagger-section .swagger-ui-wrap .model-signature ul.signature-nav li { - float: left; - margin: 0 5px 5px 0; - padding: 2px 5px 2px 0; - border-right: 1px solid #ddd; -} -.swagger-section .swagger-ui-wrap .model-signature .propOpt { - color: #555; -} -.swagger-section .swagger-ui-wrap .model-signature .snippet small { - font-size: 0.75em; -} -.swagger-section .swagger-ui-wrap .model-signature .propOptKey { - font-style: italic; -} -.swagger-section .swagger-ui-wrap .model-signature .description .strong { - font-weight: bold; - color: #000; - font-size: .9em; -} -.swagger-section .swagger-ui-wrap .model-signature .description div { - font-size: 0.9em; - line-height: 1.5em; - margin-left: 1em; -} -.swagger-section .swagger-ui-wrap .model-signature .description .stronger { - font-weight: bold; - color: #000; -} -.swagger-section .swagger-ui-wrap .model-signature .description .propWrap .optionsWrapper { - border-spacing: 0; - position: absolute; - background-color: #ffffff; - border: 1px solid #bbbbbb; - display: none; - font-size: 11px; - max-width: 400px; - line-height: 30px; - color: black; - padding: 5px; - margin-left: 10px; -} -.swagger-section .swagger-ui-wrap .model-signature .description .propWrap .optionsWrapper th { - text-align: center; - background-color: #eeeeee; - border: 1px solid #bbbbbb; - font-size: 11px; - color: #666666; - font-weight: bold; - padding: 5px; - line-height: 15px; -} -.swagger-section .swagger-ui-wrap .model-signature .description .propWrap .optionsWrapper .optionName { - font-weight: bold; -} -.swagger-section .swagger-ui-wrap .model-signature .description .propDesc.markdown > p:first-child, -.swagger-section .swagger-ui-wrap .model-signature .description .propDesc.markdown > p:last-child { - display: inline; -} -.swagger-section .swagger-ui-wrap .model-signature .description .propDesc.markdown > p:not(:first-child):before { - display: block; - content: ''; -} -.swagger-section .swagger-ui-wrap .model-signature .description span:last-of-type.propDesc.markdown > p:only-child { - margin-right: -3px; -} -.swagger-section .swagger-ui-wrap .model-signature .propName { - font-weight: bold; -} -.swagger-section .swagger-ui-wrap .model-signature .signature-container { - clear: both; -} -.swagger-section .swagger-ui-wrap .body-textarea { - width: 300px; - height: 100px; - border: 1px solid #aaa; -} -.swagger-section .swagger-ui-wrap .markdown p code, -.swagger-section .swagger-ui-wrap .markdown li code { - font-family: "Anonymous Pro", "Menlo", "Consolas", "Bitstream Vera Sans Mono", "Courier New", monospace; - background-color: #f0f0f0; - color: black; - padding: 1px 3px; -} -.swagger-section .swagger-ui-wrap .required { - font-weight: bold; -} -.swagger-section .swagger-ui-wrap input.parameter { - width: 300px; - border: 1px solid #aaa; -} -.swagger-section .swagger-ui-wrap h1 { - color: black; - font-size: 1.5em; - line-height: 1.3em; - padding: 10px 0 10px 0; - font-family: "Droid Sans", sans-serif; - font-weight: bold; -} -.swagger-section .swagger-ui-wrap .heading_with_menu { - float: none; - clear: both; - overflow: hidden; - display: block; -} -.swagger-section .swagger-ui-wrap .heading_with_menu ul { - display: block; - clear: none; - float: right; - -moz-box-sizing: border-box; - -webkit-box-sizing: border-box; - -ms-box-sizing: border-box; - box-sizing: border-box; - margin-top: 10px; -} -.swagger-section .swagger-ui-wrap h2 { - color: black; - font-size: 1.3em; - padding: 10px 0 10px 0; -} -.swagger-section .swagger-ui-wrap h2 a { - color: black; -} -.swagger-section .swagger-ui-wrap h2 span.sub { - font-size: 0.7em; - color: #999999; - font-style: italic; -} -.swagger-section .swagger-ui-wrap h2 span.sub a { - color: #777777; -} -.swagger-section .swagger-ui-wrap span.weak { - color: #666666; -} -.swagger-section .swagger-ui-wrap .message-success { - color: #89BF04; -} -.swagger-section .swagger-ui-wrap caption, -.swagger-section .swagger-ui-wrap th, -.swagger-section .swagger-ui-wrap td { - text-align: left; - font-weight: normal; - vertical-align: middle; -} -.swagger-section .swagger-ui-wrap .code { - font-family: "Anonymous Pro", "Menlo", "Consolas", "Bitstream Vera Sans Mono", "Courier New", monospace; -} -.swagger-section .swagger-ui-wrap form.formtastic fieldset.inputs ol li.text textarea { - font-family: "Droid Sans", sans-serif; - height: 250px; - padding: 4px; - display: block; - clear: both; -} -.swagger-section .swagger-ui-wrap form.formtastic fieldset.inputs ol li.select select { - display: block; - clear: both; -} -.swagger-section .swagger-ui-wrap form.formtastic fieldset.inputs ol li.boolean { - float: none; - clear: both; - overflow: hidden; - display: block; -} -.swagger-section .swagger-ui-wrap form.formtastic fieldset.inputs ol li.boolean label { - display: block; - float: left; - clear: none; - margin: 0; - padding: 0; -} -.swagger-section .swagger-ui-wrap form.formtastic fieldset.inputs ol li.boolean input { - display: block; - float: left; - clear: none; - margin: 0 5px 0 0; -} -.swagger-section .swagger-ui-wrap form.formtastic fieldset.inputs ol li.required label { - color: black; -} -.swagger-section .swagger-ui-wrap form.formtastic fieldset.inputs ol li label { - display: block; - clear: both; - width: auto; - padding: 0 0 3px; - color: #666666; -} -.swagger-section .swagger-ui-wrap form.formtastic fieldset.inputs ol li label abbr { - padding-left: 3px; - color: #888888; -} -.swagger-section .swagger-ui-wrap form.formtastic fieldset.inputs ol li p.inline-hints { - margin-left: 0; - font-style: italic; - font-size: 0.9em; - margin: 0; -} -.swagger-section .swagger-ui-wrap form.formtastic fieldset.buttons { - margin: 0; - padding: 0; -} -.swagger-section .swagger-ui-wrap span.blank, -.swagger-section .swagger-ui-wrap span.empty { - color: #888888; - font-style: italic; -} -.swagger-section .swagger-ui-wrap .markdown h3 { - color: #547f00; -} -.swagger-section .swagger-ui-wrap .markdown h4 { - color: #666666; -} -.swagger-section .swagger-ui-wrap .markdown pre { - font-family: "Anonymous Pro", "Menlo", "Consolas", "Bitstream Vera Sans Mono", "Courier New", monospace; - background-color: #fcf6db; - border: 1px solid #e5e0c6; - padding: 10px; - margin: 0 0 10px 0; -} -.swagger-section .swagger-ui-wrap .markdown pre code { - line-height: 1.6em; -} -.swagger-section .swagger-ui-wrap div.gist { - margin: 20px 0 25px 0 !important; -} -.swagger-section .swagger-ui-wrap ul#resources { - font-family: "Droid Sans", sans-serif; - font-size: 0.9em; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource { - border-bottom: 1px solid #dddddd; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource:hover div.heading h2 a, -.swagger-section .swagger-ui-wrap ul#resources li.resource.active div.heading h2 a { - color: black; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource:hover div.heading ul.options li a, -.swagger-section .swagger-ui-wrap ul#resources li.resource.active div.heading ul.options li a { - color: #555555; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource:last-child { - border-bottom: none; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource div.heading { - border: 1px solid transparent; - float: none; - clear: both; - overflow: hidden; - display: block; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource div.heading ul.options { - overflow: hidden; - padding: 0; - display: block; - clear: none; - float: right; - margin: 14px 10px 0 0; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource div.heading ul.options li { - float: left; - clear: none; - margin: 0; - padding: 2px 10px; - border-right: 1px solid #dddddd; - color: #666666; - font-size: 0.9em; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource div.heading ul.options li a { - color: #aaaaaa; - text-decoration: none; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource div.heading ul.options li a:hover { - text-decoration: underline; - color: black; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource div.heading ul.options li a:hover, -.swagger-section .swagger-ui-wrap ul#resources li.resource div.heading ul.options li a:active, -.swagger-section .swagger-ui-wrap ul#resources li.resource div.heading ul.options li a.active { - text-decoration: underline; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource div.heading ul.options li:first-child, -.swagger-section .swagger-ui-wrap ul#resources li.resource div.heading ul.options li.first { - padding-left: 0; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource div.heading ul.options li:last-child, -.swagger-section .swagger-ui-wrap ul#resources li.resource div.heading ul.options li.last { - padding-right: 0; - border-right: none; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource div.heading ul.options:first-child, -.swagger-section .swagger-ui-wrap ul#resources li.resource div.heading ul.options.first { - padding-left: 0; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource div.heading h2 { - color: #999999; - padding-left: 0; - display: block; - clear: none; - float: left; - font-family: "Droid Sans", sans-serif; - font-weight: bold; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource div.heading h2 a { - color: #999999; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource div.heading h2 a:hover { - color: black; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation { - float: none; - clear: both; - overflow: hidden; - display: block; - margin: 0 0 10px; - padding: 0; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation div.heading { - float: none; - clear: both; - overflow: hidden; - display: block; - margin: 0; - padding: 0; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation div.heading h3 { - display: block; - clear: none; - float: left; - width: auto; - margin: 0; - padding: 0; - line-height: 1.1em; - color: black; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation div.heading h3 span.path { - padding-left: 10px; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation div.heading h3 span.path a { - color: black; - text-decoration: none; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation div.heading h3 span.path a:hover { - text-decoration: underline; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation div.heading h3 span.http_method a { - text-transform: uppercase; - text-decoration: none; - color: white; - display: inline-block; - width: 50px; - font-size: 0.7em; - text-align: center; - padding: 7px 0 4px; - -moz-border-radius: 2px; - -webkit-border-radius: 2px; - -o-border-radius: 2px; - -ms-border-radius: 2px; - -khtml-border-radius: 2px; - border-radius: 2px; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation div.heading h3 span { - margin: 0; - padding: 0; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation div.heading ul.options { - overflow: hidden; - padding: 0; - display: block; - clear: none; - float: right; - margin: 6px 10px 0 0; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation div.heading ul.options li { - float: left; - clear: none; - margin: 0; - padding: 2px 10px; - font-size: 0.9em; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation div.heading ul.options li a { - text-decoration: none; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation div.heading ul.options li.access { - color: black; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation div.content { - border-top: none; - padding: 10px; - -moz-border-radius-bottomleft: 6px; - -webkit-border-bottom-left-radius: 6px; - -o-border-bottom-left-radius: 6px; - -ms-border-bottom-left-radius: 6px; - -khtml-border-bottom-left-radius: 6px; - border-bottom-left-radius: 6px; - -moz-border-radius-bottomright: 6px; - -webkit-border-bottom-right-radius: 6px; - -o-border-bottom-right-radius: 6px; - -ms-border-bottom-right-radius: 6px; - -khtml-border-bottom-right-radius: 6px; - border-bottom-right-radius: 6px; - margin: 0 0 20px; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation div.content h4 { - font-size: 1.1em; - margin: 0; - padding: 15px 0 5px; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation div.content div.sandbox_header { - float: none; - clear: both; - overflow: hidden; - display: block; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation div.content div.sandbox_header a { - padding: 4px 0 0 10px; - display: inline-block; - font-size: 0.9em; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation div.content div.sandbox_header input.submit { - display: block; - clear: none; - float: left; - padding: 6px 8px; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation div.content div.sandbox_header span.response_throbber { - background-image: url('../images/throbber.gif'); - width: 128px; - height: 16px; - display: block; - clear: none; - float: right; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation div.content form input[type='text'].error { - outline: 2px solid black; - outline-color: #cc0000; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation div.content div.response div.block pre { - font-family: "Anonymous Pro", "Menlo", "Consolas", "Bitstream Vera Sans Mono", "Courier New", monospace; - padding: 10px; - font-size: 0.9em; - max-height: 400px; - overflow-y: auto; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.put div.heading { - background-color: #f9f2e9; - border: 1px solid #f0e0ca; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.put div.heading h3 span.http_method a { - background-color: #c5862b; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.put div.heading ul.options li { - border-right: 1px solid #dddddd; - border-right-color: #f0e0ca; - color: #c5862b; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.put div.heading ul.options li a { - color: #c5862b; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.put div.content { - background-color: #faf5ee; - border: 1px solid #f0e0ca; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.put div.content h4 { - color: #c5862b; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.put div.content div.sandbox_header a { - color: #dcb67f; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.head div.heading { - background-color: #fcffcd; - border: 1px solid black; - border-color: #ffd20f; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.head div.heading h3 span.http_method a { - text-transform: uppercase; - background-color: #ffd20f; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.head div.heading ul.options li { - border-right: 1px solid #dddddd; - border-right-color: #ffd20f; - color: #ffd20f; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.head div.heading ul.options li a { - color: #ffd20f; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.head div.content { - background-color: #fcffcd; - border: 1px solid black; - border-color: #ffd20f; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.head div.content h4 { - color: #ffd20f; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.head div.content div.sandbox_header a { - color: #6fc992; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.delete div.heading { - background-color: #f5e8e8; - border: 1px solid #e8c6c7; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.delete div.heading h3 span.http_method a { - text-transform: uppercase; - background-color: #a41e22; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.delete div.heading ul.options li { - border-right: 1px solid #dddddd; - border-right-color: #e8c6c7; - color: #a41e22; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.delete div.heading ul.options li a { - color: #a41e22; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.delete div.content { - background-color: #f7eded; - border: 1px solid #e8c6c7; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.delete div.content h4 { - color: #a41e22; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.delete div.content div.sandbox_header a { - color: #c8787a; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.post div.heading { - background-color: #e7f6ec; - border: 1px solid #c3e8d1; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.post div.heading h3 span.http_method a { - background-color: #10a54a; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.post div.heading ul.options li { - border-right: 1px solid #dddddd; - border-right-color: #c3e8d1; - color: #10a54a; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.post div.heading ul.options li a { - color: #10a54a; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.post div.content { - background-color: #ebf7f0; - border: 1px solid #c3e8d1; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.post div.content h4 { - color: #10a54a; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.post div.content div.sandbox_header a { - color: #6fc992; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.patch div.heading { - background-color: #FCE9E3; - border: 1px solid #F5D5C3; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.patch div.heading h3 span.http_method a { - background-color: #D38042; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.patch div.heading ul.options li { - border-right: 1px solid #dddddd; - border-right-color: #f0cecb; - color: #D38042; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.patch div.heading ul.options li a { - color: #D38042; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.patch div.content { - background-color: #faf0ef; - border: 1px solid #f0cecb; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.patch div.content h4 { - color: #D38042; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.patch div.content div.sandbox_header a { - color: #dcb67f; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.get div.heading { - background-color: #e7f0f7; - border: 1px solid #c3d9ec; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.get div.heading h3 span.http_method a { - background-color: #0f6ab4; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.get div.heading ul.options li { - border-right: 1px solid #dddddd; - border-right-color: #c3d9ec; - color: #0f6ab4; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.get div.heading ul.options li a { - color: #0f6ab4; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.get div.content { - background-color: #ebf3f9; - border: 1px solid #c3d9ec; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.get div.content h4 { - color: #0f6ab4; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.get div.content div.sandbox_header a { - color: #6fa5d2; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.options div.heading { - background-color: #e7f0f7; - border: 1px solid #c3d9ec; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.options div.heading h3 span.http_method a { - background-color: #0f6ab4; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.options div.heading ul.options li { - border-right: 1px solid #dddddd; - border-right-color: #c3d9ec; - color: #0f6ab4; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.options div.heading ul.options li a { - color: #0f6ab4; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.options div.content { - background-color: #ebf3f9; - border: 1px solid #c3d9ec; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.options div.content h4 { - color: #0f6ab4; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.options div.content div.sandbox_header a { - color: #6fa5d2; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.get div.content, -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.post div.content, -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.head div.content, -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.put div.content, -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.patch div.content, -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.delete div.content { - border-top: none; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.get div.heading ul.options li:last-child, -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.post div.heading ul.options li:last-child, -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.head div.heading ul.options li:last-child, -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.put div.heading ul.options li:last-child, -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.patch div.heading ul.options li:last-child, -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.delete div.heading ul.options li:last-child, -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.get div.heading ul.options li.last, -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.post div.heading ul.options li.last, -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.head div.heading ul.options li.last, -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.put div.heading ul.options li.last, -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.patch div.heading ul.options li.last, -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.delete div.heading ul.options li.last { - padding-right: 0; - border-right: none; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations ul.options li a:hover, -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations ul.options li a:active, -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations ul.options li a.active { - text-decoration: underline; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations ul.options li:first-child, -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations ul.options li.first { - padding-left: 0; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations:first-child, -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations.first { - padding-left: 0; -} -.swagger-section .swagger-ui-wrap p#colophon { - margin: 0 15px 40px 15px; - padding: 10px 0; - font-size: 0.8em; - border-top: 1px solid #dddddd; - font-family: "Droid Sans", sans-serif; - color: #999999; - font-style: italic; -} -.swagger-section .swagger-ui-wrap p#colophon a { - text-decoration: none; - color: #547f00; -} -.swagger-section .swagger-ui-wrap h3 { - color: black; - font-size: 1.1em; - padding: 10px 0 10px 0; -} -.swagger-section .swagger-ui-wrap .markdown ol, -.swagger-section .swagger-ui-wrap .markdown ul { - font-family: "Droid Sans", sans-serif; - margin: 5px 0 10px; - padding: 0 0 0 18px; - list-style-type: disc; -} -.swagger-section .swagger-ui-wrap form.form_box { - background-color: #ebf3f9; - border: 1px solid #c3d9ec; - padding: 10px; -} -.swagger-section .swagger-ui-wrap form.form_box label { - color: #0f6ab4 !important; -} -.swagger-section .swagger-ui-wrap form.form_box input[type=submit] { - display: block; - padding: 10px; -} -.swagger-section .swagger-ui-wrap form.form_box p.weak { - font-size: 0.8em; -} -.swagger-section .swagger-ui-wrap form.form_box p { - font-size: 0.9em; - padding: 0 0 15px; - color: #7e7b6d; -} -.swagger-section .swagger-ui-wrap form.form_box p a { - color: #646257; -} -.swagger-section .swagger-ui-wrap form.form_box p strong { - color: black; -} -.swagger-section .swagger-ui-wrap .operation-status td.markdown > p:last-child { - padding-bottom: 0; -} -.swagger-section .title { - font-style: bold; -} -.swagger-section .secondary_form { - display: none; -} -.swagger-section .main_image { - display: block; - margin-left: auto; - margin-right: auto; -} -.swagger-section .oauth_body { - margin-left: 100px; - margin-right: 100px; -} -.swagger-section .oauth_submit { - text-align: center; -} -.swagger-section .api-popup-dialog { - z-index: 10000; - position: absolute; - width: 500px; - background: #FFF; - padding: 20px; - border: 1px solid #ccc; - border-radius: 5px; - display: none; - font-size: 13px; - color: #777; -} -.swagger-section .api-popup-dialog .api-popup-title { - font-size: 24px; - padding: 10px 0; -} -.swagger-section .api-popup-dialog .api-popup-title { - font-size: 24px; - padding: 10px 0; -} -.swagger-section .api-popup-dialog p.error-msg { - padding-left: 5px; - padding-bottom: 5px; -} -.swagger-section .api-popup-dialog button.api-popup-authbtn { - height: 30px; -} -.swagger-section .api-popup-dialog button.api-popup-cancel { - height: 30px; -} -.swagger-section .api-popup-scopes { - padding: 10px 20px; -} -.swagger-section .api-popup-scopes li { - padding: 5px 0; - line-height: 20px; -} -.swagger-section .api-popup-scopes .api-scope-desc { - padding-left: 20px; - font-style: italic; -} -.swagger-section .api-popup-scopes li input { - position: relative; - top: 2px; -} -.swagger-section .api-popup-actions { - padding-top: 10px; -} -#header { - display: none; -} -.swagger-section .swagger-ui-wrap .model-signature pre { - max-height: none; -} -.swagger-section .swagger-ui-wrap .body-textarea { - width: 100px; -} -.swagger-section .swagger-ui-wrap input.parameter { - width: 100px; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource div.heading ul.options { - display: none; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints { - display: block !important; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation div.content { - display: block !important; -} diff --git a/scylla/swagger-ui/dist/css/reset.css b/scylla/swagger-ui/dist/css/reset.css deleted file mode 100644 index b2b0789..0000000 --- a/scylla/swagger-ui/dist/css/reset.css +++ /dev/null @@ -1,125 +0,0 @@ -/* http://meyerweb.com/eric/tools/css/reset/ v2.0 | 20110126 */ -html, -body, -div, -span, -applet, -object, -iframe, -h1, -h2, -h3, -h4, -h5, -h6, -p, -blockquote, -pre, -a, -abbr, -acronym, -address, -big, -cite, -code, -del, -dfn, -em, -img, -ins, -kbd, -q, -s, -samp, -small, -strike, -strong, -sub, -sup, -tt, -var, -b, -u, -i, -center, -dl, -dt, -dd, -ol, -ul, -li, -fieldset, -form, -label, -legend, -table, -caption, -tbody, -tfoot, -thead, -tr, -th, -td, -article, -aside, -canvas, -details, -embed, -figure, -figcaption, -footer, -header, -hgroup, -menu, -nav, -output, -ruby, -section, -summary, -time, -mark, -audio, -video { - margin: 0; - padding: 0; - border: 0; - font-size: 100%; - font: inherit; - vertical-align: baseline; -} -/* HTML5 display-role reset for older browsers */ -article, -aside, -details, -figcaption, -figure, -footer, -header, -hgroup, -menu, -nav, -section { - display: block; -} -body { - line-height: 1; -} -ol, -ul { - list-style: none; -} -blockquote, -q { - quotes: none; -} -blockquote:before, -blockquote:after, -q:before, -q:after { - content: ''; - content: none; -} -table { - border-collapse: collapse; - border-spacing: 0; -} diff --git a/scylla/swagger-ui/dist/css/screen.css b/scylla/swagger-ui/dist/css/screen.css deleted file mode 100644 index d6ed90b..0000000 --- a/scylla/swagger-ui/dist/css/screen.css +++ /dev/null @@ -1,1273 +0,0 @@ -/* Original style from softwaremaniacs.org (c) Ivan Sagalaev */ -.swagger-section pre code { - display: block; - padding: 0.5em; - background: #F0F0F0; -} -.swagger-section pre code, -.swagger-section pre .subst, -.swagger-section pre .tag .title, -.swagger-section pre .lisp .title, -.swagger-section pre .clojure .built_in, -.swagger-section pre .nginx .title { - color: black; -} -.swagger-section pre .string, -.swagger-section pre .title, -.swagger-section pre .constant, -.swagger-section pre .parent, -.swagger-section pre .tag .value, -.swagger-section pre .rules .value, -.swagger-section pre .rules .value .number, -.swagger-section pre .preprocessor, -.swagger-section pre .ruby .symbol, -.swagger-section pre .ruby .symbol .string, -.swagger-section pre .aggregate, -.swagger-section pre .template_tag, -.swagger-section pre .django .variable, -.swagger-section pre .smalltalk .class, -.swagger-section pre .addition, -.swagger-section pre .flow, -.swagger-section pre .stream, -.swagger-section pre .bash .variable, -.swagger-section pre .apache .tag, -.swagger-section pre .apache .cbracket, -.swagger-section pre .tex .command, -.swagger-section pre .tex .special, -.swagger-section pre .erlang_repl .function_or_atom, -.swagger-section pre .markdown .header { - color: #800; -} -.swagger-section pre .comment, -.swagger-section pre .annotation, -.swagger-section pre .template_comment, -.swagger-section pre .diff .header, -.swagger-section pre .chunk, -.swagger-section pre .markdown .blockquote { - color: #888; -} -.swagger-section pre .number, -.swagger-section pre .date, -.swagger-section pre .regexp, -.swagger-section pre .literal, -.swagger-section pre .smalltalk .symbol, -.swagger-section pre .smalltalk .char, -.swagger-section pre .go .constant, -.swagger-section pre .change, -.swagger-section pre .markdown .bullet, -.swagger-section pre .markdown .link_url { - color: #080; -} -.swagger-section pre .label, -.swagger-section pre .javadoc, -.swagger-section pre .ruby .string, -.swagger-section pre .decorator, -.swagger-section pre .filter .argument, -.swagger-section pre .localvars, -.swagger-section pre .array, -.swagger-section pre .attr_selector, -.swagger-section pre .important, -.swagger-section pre .pseudo, -.swagger-section pre .pi, -.swagger-section pre .doctype, -.swagger-section pre .deletion, -.swagger-section pre .envvar, -.swagger-section pre .shebang, -.swagger-section pre .apache .sqbracket, -.swagger-section pre .nginx .built_in, -.swagger-section pre .tex .formula, -.swagger-section pre .erlang_repl .reserved, -.swagger-section pre .prompt, -.swagger-section pre .markdown .link_label, -.swagger-section pre .vhdl .attribute, -.swagger-section pre .clojure .attribute, -.swagger-section pre .coffeescript .property { - color: #8888ff; -} -.swagger-section pre .keyword, -.swagger-section pre .id, -.swagger-section pre .phpdoc, -.swagger-section pre .title, -.swagger-section pre .built_in, -.swagger-section pre .aggregate, -.swagger-section pre .css .tag, -.swagger-section pre .javadoctag, -.swagger-section pre .phpdoc, -.swagger-section pre .yardoctag, -.swagger-section pre .smalltalk .class, -.swagger-section pre .winutils, -.swagger-section pre .bash .variable, -.swagger-section pre .apache .tag, -.swagger-section pre .go .typename, -.swagger-section pre .tex .command, -.swagger-section pre .markdown .strong, -.swagger-section pre .request, -.swagger-section pre .status { - font-weight: bold; -} -.swagger-section pre .markdown .emphasis { - font-style: italic; -} -.swagger-section pre .nginx .built_in { - font-weight: normal; -} -.swagger-section pre .coffeescript .javascript, -.swagger-section pre .javascript .xml, -.swagger-section pre .tex .formula, -.swagger-section pre .xml .javascript, -.swagger-section pre .xml .vbscript, -.swagger-section pre .xml .css, -.swagger-section pre .xml .cdata { - opacity: 0.5; -} -.swagger-section .swagger-ui-wrap { - line-height: 1; - font-family: "Droid Sans", sans-serif; - max-width: 960px; - margin-left: auto; - margin-right: auto; -} -.swagger-section .swagger-ui-wrap b, -.swagger-section .swagger-ui-wrap strong { - font-family: "Droid Sans", sans-serif; - font-weight: bold; -} -.swagger-section .swagger-ui-wrap q, -.swagger-section .swagger-ui-wrap blockquote { - quotes: none; -} -.swagger-section .swagger-ui-wrap p { - line-height: 1.4em; - padding: 0 0 10px; - color: #333333; -} -.swagger-section .swagger-ui-wrap q:before, -.swagger-section .swagger-ui-wrap q:after, -.swagger-section .swagger-ui-wrap blockquote:before, -.swagger-section .swagger-ui-wrap blockquote:after { - content: none; -} -.swagger-section .swagger-ui-wrap .heading_with_menu h1, -.swagger-section .swagger-ui-wrap .heading_with_menu h2, -.swagger-section .swagger-ui-wrap .heading_with_menu h3, -.swagger-section .swagger-ui-wrap .heading_with_menu h4, -.swagger-section .swagger-ui-wrap .heading_with_menu h5, -.swagger-section .swagger-ui-wrap .heading_with_menu h6 { - display: block; - clear: none; - float: left; - -moz-box-sizing: border-box; - -webkit-box-sizing: border-box; - -ms-box-sizing: border-box; - box-sizing: border-box; - width: 60%; -} -.swagger-section .swagger-ui-wrap table { - border-collapse: collapse; - border-spacing: 0; -} -.swagger-section .swagger-ui-wrap table thead tr th { - padding: 5px; - font-size: 0.9em; - color: #666666; - border-bottom: 1px solid #999999; -} -.swagger-section .swagger-ui-wrap table tbody tr:last-child td { - border-bottom: none; -} -.swagger-section .swagger-ui-wrap table tbody tr.offset { - background-color: #f0f0f0; -} -.swagger-section .swagger-ui-wrap table tbody tr td { - padding: 6px; - font-size: 0.9em; - border-bottom: 1px solid #cccccc; - vertical-align: top; - line-height: 1.3em; -} -.swagger-section .swagger-ui-wrap ol { - margin: 0px 0 10px; - padding: 0 0 0 18px; - list-style-type: decimal; -} -.swagger-section .swagger-ui-wrap ol li { - padding: 5px 0px; - font-size: 0.9em; - color: #333333; -} -.swagger-section .swagger-ui-wrap ol, -.swagger-section .swagger-ui-wrap ul { - list-style: none; -} -.swagger-section .swagger-ui-wrap h1 a, -.swagger-section .swagger-ui-wrap h2 a, -.swagger-section .swagger-ui-wrap h3 a, -.swagger-section .swagger-ui-wrap h4 a, -.swagger-section .swagger-ui-wrap h5 a, -.swagger-section .swagger-ui-wrap h6 a { - text-decoration: none; -} -.swagger-section .swagger-ui-wrap h1 a:hover, -.swagger-section .swagger-ui-wrap h2 a:hover, -.swagger-section .swagger-ui-wrap h3 a:hover, -.swagger-section .swagger-ui-wrap h4 a:hover, -.swagger-section .swagger-ui-wrap h5 a:hover, -.swagger-section .swagger-ui-wrap h6 a:hover { - text-decoration: underline; -} -.swagger-section .swagger-ui-wrap h1 span.divider, -.swagger-section .swagger-ui-wrap h2 span.divider, -.swagger-section .swagger-ui-wrap h3 span.divider, -.swagger-section .swagger-ui-wrap h4 span.divider, -.swagger-section .swagger-ui-wrap h5 span.divider, -.swagger-section .swagger-ui-wrap h6 span.divider { - color: #aaaaaa; -} -.swagger-section .swagger-ui-wrap a { - color: #547f00; -} -.swagger-section .swagger-ui-wrap a img { - border: none; -} -.swagger-section .swagger-ui-wrap article, -.swagger-section .swagger-ui-wrap aside, -.swagger-section .swagger-ui-wrap details, -.swagger-section .swagger-ui-wrap figcaption, -.swagger-section .swagger-ui-wrap figure, -.swagger-section .swagger-ui-wrap footer, -.swagger-section .swagger-ui-wrap header, -.swagger-section .swagger-ui-wrap hgroup, -.swagger-section .swagger-ui-wrap menu, -.swagger-section .swagger-ui-wrap nav, -.swagger-section .swagger-ui-wrap section, -.swagger-section .swagger-ui-wrap summary { - display: block; -} -.swagger-section .swagger-ui-wrap pre { - font-family: "Anonymous Pro", "Menlo", "Consolas", "Bitstream Vera Sans Mono", "Courier New", monospace; - background-color: #fcf6db; - border: 1px solid #e5e0c6; - padding: 10px; -} -.swagger-section .swagger-ui-wrap pre code { - line-height: 1.6em; - background: none; -} -.swagger-section .swagger-ui-wrap .content > .content-type > div > label { - clear: both; - display: block; - color: #0F6AB4; - font-size: 1.1em; - margin: 0; - padding: 15px 0 5px; -} -.swagger-section .swagger-ui-wrap .content pre { - font-size: 12px; - margin-top: 5px; - padding: 5px; -} -.swagger-section .swagger-ui-wrap .icon-btn { - cursor: pointer; -} -.swagger-section .swagger-ui-wrap .info_title { - padding-bottom: 10px; - font-weight: bold; - font-size: 25px; -} -.swagger-section .swagger-ui-wrap .footer { - margin-top: 20px; -} -.swagger-section .swagger-ui-wrap p.big, -.swagger-section .swagger-ui-wrap div.big p { - font-size: 1em; - margin-bottom: 10px; -} -.swagger-section .swagger-ui-wrap form.fullwidth ol li.string input, -.swagger-section .swagger-ui-wrap form.fullwidth ol li.url input, -.swagger-section .swagger-ui-wrap form.fullwidth ol li.text textarea, -.swagger-section .swagger-ui-wrap form.fullwidth ol li.numeric input { - width: 500px !important; -} -.swagger-section .swagger-ui-wrap .info_license { - padding-bottom: 5px; -} -.swagger-section .swagger-ui-wrap .info_tos { - padding-bottom: 5px; -} -.swagger-section .swagger-ui-wrap .message-fail { - color: #cc0000; -} -.swagger-section .swagger-ui-wrap .info_url { - padding-bottom: 5px; -} -.swagger-section .swagger-ui-wrap .info_email { - padding-bottom: 5px; -} -.swagger-section .swagger-ui-wrap .info_name { - padding-bottom: 5px; -} -.swagger-section .swagger-ui-wrap .info_description { - padding-bottom: 10px; - font-size: 15px; -} -.swagger-section .swagger-ui-wrap .markdown ol li, -.swagger-section .swagger-ui-wrap .markdown ul li { - padding: 3px 0px; - line-height: 1.4em; - color: #333333; -} -.swagger-section .swagger-ui-wrap form.formtastic fieldset.inputs ol li.string input, -.swagger-section .swagger-ui-wrap form.formtastic fieldset.inputs ol li.url input, -.swagger-section .swagger-ui-wrap form.formtastic fieldset.inputs ol li.numeric input { - display: block; - padding: 4px; - width: auto; - clear: both; -} -.swagger-section .swagger-ui-wrap form.formtastic fieldset.inputs ol li.string input.title, -.swagger-section .swagger-ui-wrap form.formtastic fieldset.inputs ol li.url input.title, -.swagger-section .swagger-ui-wrap form.formtastic fieldset.inputs ol li.numeric input.title { - font-size: 1.3em; -} -.swagger-section .swagger-ui-wrap table.fullwidth { - width: 100%; -} -.swagger-section .swagger-ui-wrap .model-signature { - font-family: "Droid Sans", sans-serif; - font-size: 1em; - line-height: 1.5em; -} -.swagger-section .swagger-ui-wrap .model-signature .signature-nav a { - text-decoration: none; - color: #AAA; -} -.swagger-section .swagger-ui-wrap .model-signature .signature-nav a:hover { - text-decoration: underline; - color: black; -} -.swagger-section .swagger-ui-wrap .model-signature .signature-nav .selected { - color: black; - text-decoration: none; -} -.swagger-section .swagger-ui-wrap .model-signature .propType { - color: #5555aa; -} -.swagger-section .swagger-ui-wrap .model-signature pre:hover { - background-color: #ffffdd; -} -.swagger-section .swagger-ui-wrap .model-signature pre { - font-size: .85em; - line-height: 1.2em; - overflow: auto; - max-height: 200px; - cursor: pointer; -} -.swagger-section .swagger-ui-wrap .model-signature ul.signature-nav { - display: block; - margin: 0; - padding: 0; -} -.swagger-section .swagger-ui-wrap .model-signature ul.signature-nav li:last-child { - padding-right: 0; - border-right: none; -} -.swagger-section .swagger-ui-wrap .model-signature ul.signature-nav li { - float: left; - margin: 0 5px 5px 0; - padding: 2px 5px 2px 0; - border-right: 1px solid #ddd; -} -.swagger-section .swagger-ui-wrap .model-signature .propOpt { - color: #555; -} -.swagger-section .swagger-ui-wrap .model-signature .snippet small { - font-size: 0.75em; -} -.swagger-section .swagger-ui-wrap .model-signature .propOptKey { - font-style: italic; -} -.swagger-section .swagger-ui-wrap .model-signature .description .strong { - font-weight: bold; - color: #000; - font-size: .9em; -} -.swagger-section .swagger-ui-wrap .model-signature .description div { - font-size: 0.9em; - line-height: 1.5em; - margin-left: 1em; -} -.swagger-section .swagger-ui-wrap .model-signature .description .stronger { - font-weight: bold; - color: #000; -} -.swagger-section .swagger-ui-wrap .model-signature .description .propWrap .optionsWrapper { - border-spacing: 0; - position: absolute; - background-color: #ffffff; - border: 1px solid #bbbbbb; - display: none; - font-size: 11px; - max-width: 400px; - line-height: 30px; - color: black; - padding: 5px; - margin-left: 10px; -} -.swagger-section .swagger-ui-wrap .model-signature .description .propWrap .optionsWrapper th { - text-align: center; - background-color: #eeeeee; - border: 1px solid #bbbbbb; - font-size: 11px; - color: #666666; - font-weight: bold; - padding: 5px; - line-height: 15px; -} -.swagger-section .swagger-ui-wrap .model-signature .description .propWrap .optionsWrapper .optionName { - font-weight: bold; -} -.swagger-section .swagger-ui-wrap .model-signature .description .propDesc.markdown > p:first-child, -.swagger-section .swagger-ui-wrap .model-signature .description .propDesc.markdown > p:last-child { - display: inline; -} -.swagger-section .swagger-ui-wrap .model-signature .description .propDesc.markdown > p:not(:first-child):before { - display: block; - content: ''; -} -.swagger-section .swagger-ui-wrap .model-signature .description span:last-of-type.propDesc.markdown > p:only-child { - margin-right: -3px; -} -.swagger-section .swagger-ui-wrap .model-signature .propName { - font-weight: bold; -} -.swagger-section .swagger-ui-wrap .model-signature .signature-container { - clear: both; -} -.swagger-section .swagger-ui-wrap .body-textarea { - width: 300px; - height: 100px; - border: 1px solid #aaa; -} -.swagger-section .swagger-ui-wrap .markdown p code, -.swagger-section .swagger-ui-wrap .markdown li code { - font-family: "Anonymous Pro", "Menlo", "Consolas", "Bitstream Vera Sans Mono", "Courier New", monospace; - background-color: #f0f0f0; - color: black; - padding: 1px 3px; -} -.swagger-section .swagger-ui-wrap .required { - font-weight: bold; -} -.swagger-section .swagger-ui-wrap input.parameter { - width: 300px; - border: 1px solid #aaa; -} -.swagger-section .swagger-ui-wrap h1 { - color: black; - font-size: 1.5em; - line-height: 1.3em; - padding: 10px 0 10px 0; - font-family: "Droid Sans", sans-serif; - font-weight: bold; -} -.swagger-section .swagger-ui-wrap .heading_with_menu { - float: none; - clear: both; - overflow: hidden; - display: block; -} -.swagger-section .swagger-ui-wrap .heading_with_menu ul { - display: block; - clear: none; - float: right; - -moz-box-sizing: border-box; - -webkit-box-sizing: border-box; - -ms-box-sizing: border-box; - box-sizing: border-box; - margin-top: 10px; -} -.swagger-section .swagger-ui-wrap h2 { - color: black; - font-size: 1.3em; - padding: 10px 0 10px 0; -} -.swagger-section .swagger-ui-wrap h2 a { - color: black; -} -.swagger-section .swagger-ui-wrap h2 span.sub { - font-size: 0.7em; - color: #999999; - font-style: italic; -} -.swagger-section .swagger-ui-wrap h2 span.sub a { - color: #777777; -} -.swagger-section .swagger-ui-wrap span.weak { - color: #666666; -} -.swagger-section .swagger-ui-wrap .message-success { - color: #89BF04; -} -.swagger-section .swagger-ui-wrap caption, -.swagger-section .swagger-ui-wrap th, -.swagger-section .swagger-ui-wrap td { - text-align: left; - font-weight: normal; - vertical-align: middle; -} -.swagger-section .swagger-ui-wrap .code { - font-family: "Anonymous Pro", "Menlo", "Consolas", "Bitstream Vera Sans Mono", "Courier New", monospace; -} -.swagger-section .swagger-ui-wrap form.formtastic fieldset.inputs ol li.text textarea { - font-family: "Droid Sans", sans-serif; - height: 250px; - padding: 4px; - display: block; - clear: both; -} -.swagger-section .swagger-ui-wrap form.formtastic fieldset.inputs ol li.select select { - display: block; - clear: both; -} -.swagger-section .swagger-ui-wrap form.formtastic fieldset.inputs ol li.boolean { - float: none; - clear: both; - overflow: hidden; - display: block; -} -.swagger-section .swagger-ui-wrap form.formtastic fieldset.inputs ol li.boolean label { - display: block; - float: left; - clear: none; - margin: 0; - padding: 0; -} -.swagger-section .swagger-ui-wrap form.formtastic fieldset.inputs ol li.boolean input { - display: block; - float: left; - clear: none; - margin: 0 5px 0 0; -} -.swagger-section .swagger-ui-wrap form.formtastic fieldset.inputs ol li.required label { - color: black; -} -.swagger-section .swagger-ui-wrap form.formtastic fieldset.inputs ol li label { - display: block; - clear: both; - width: auto; - padding: 0 0 3px; - color: #666666; -} -.swagger-section .swagger-ui-wrap form.formtastic fieldset.inputs ol li label abbr { - padding-left: 3px; - color: #888888; -} -.swagger-section .swagger-ui-wrap form.formtastic fieldset.inputs ol li p.inline-hints { - margin-left: 0; - font-style: italic; - font-size: 0.9em; - margin: 0; -} -.swagger-section .swagger-ui-wrap form.formtastic fieldset.buttons { - margin: 0; - padding: 0; -} -.swagger-section .swagger-ui-wrap span.blank, -.swagger-section .swagger-ui-wrap span.empty { - color: #888888; - font-style: italic; -} -.swagger-section .swagger-ui-wrap .markdown h3 { - color: #547f00; -} -.swagger-section .swagger-ui-wrap .markdown h4 { - color: #666666; -} -.swagger-section .swagger-ui-wrap .markdown pre { - font-family: "Anonymous Pro", "Menlo", "Consolas", "Bitstream Vera Sans Mono", "Courier New", monospace; - background-color: #fcf6db; - border: 1px solid #e5e0c6; - padding: 10px; - margin: 0 0 10px 0; -} -.swagger-section .swagger-ui-wrap .markdown pre code { - line-height: 1.6em; -} -.swagger-section .swagger-ui-wrap div.gist { - margin: 20px 0 25px 0 !important; -} -.swagger-section .swagger-ui-wrap ul#resources { - font-family: "Droid Sans", sans-serif; - font-size: 0.9em; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource { - border-bottom: 1px solid #dddddd; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource:hover div.heading h2 a, -.swagger-section .swagger-ui-wrap ul#resources li.resource.active div.heading h2 a { - color: black; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource:hover div.heading ul.options li a, -.swagger-section .swagger-ui-wrap ul#resources li.resource.active div.heading ul.options li a { - color: #555555; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource:last-child { - border-bottom: none; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource div.heading { - border: 1px solid transparent; - float: none; - clear: both; - overflow: hidden; - display: block; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource div.heading ul.options { - overflow: hidden; - padding: 0; - display: block; - clear: none; - float: right; - margin: 14px 10px 0 0; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource div.heading ul.options li { - float: left; - clear: none; - margin: 0; - padding: 2px 10px; - border-right: 1px solid #dddddd; - color: #666666; - font-size: 0.9em; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource div.heading ul.options li a { - color: #aaaaaa; - text-decoration: none; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource div.heading ul.options li a:hover { - text-decoration: underline; - color: black; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource div.heading ul.options li a:hover, -.swagger-section .swagger-ui-wrap ul#resources li.resource div.heading ul.options li a:active, -.swagger-section .swagger-ui-wrap ul#resources li.resource div.heading ul.options li a.active { - text-decoration: underline; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource div.heading ul.options li:first-child, -.swagger-section .swagger-ui-wrap ul#resources li.resource div.heading ul.options li.first { - padding-left: 0; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource div.heading ul.options li:last-child, -.swagger-section .swagger-ui-wrap ul#resources li.resource div.heading ul.options li.last { - padding-right: 0; - border-right: none; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource div.heading ul.options:first-child, -.swagger-section .swagger-ui-wrap ul#resources li.resource div.heading ul.options.first { - padding-left: 0; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource div.heading h2 { - color: #999999; - padding-left: 0; - display: block; - clear: none; - float: left; - font-family: "Droid Sans", sans-serif; - font-weight: bold; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource div.heading h2 a { - color: #999999; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource div.heading h2 a:hover { - color: black; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation { - float: none; - clear: both; - overflow: hidden; - display: block; - margin: 0 0 10px; - padding: 0; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation div.heading { - float: none; - clear: both; - overflow: hidden; - display: block; - margin: 0; - padding: 0; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation div.heading h3 { - display: block; - clear: none; - float: left; - width: auto; - margin: 0; - padding: 0; - line-height: 1.1em; - color: black; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation div.heading h3 span.path { - padding-left: 10px; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation div.heading h3 span.path a { - color: black; - text-decoration: none; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation div.heading h3 span.path a:hover { - text-decoration: underline; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation div.heading h3 span.http_method a { - text-transform: uppercase; - text-decoration: none; - color: white; - display: inline-block; - width: 50px; - font-size: 0.7em; - text-align: center; - padding: 7px 0 4px; - -moz-border-radius: 2px; - -webkit-border-radius: 2px; - -o-border-radius: 2px; - -ms-border-radius: 2px; - -khtml-border-radius: 2px; - border-radius: 2px; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation div.heading h3 span { - margin: 0; - padding: 0; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation div.heading ul.options { - overflow: hidden; - padding: 0; - display: block; - clear: none; - float: right; - margin: 6px 10px 0 0; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation div.heading ul.options li { - float: left; - clear: none; - margin: 0; - padding: 2px 10px; - font-size: 0.9em; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation div.heading ul.options li a { - text-decoration: none; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation div.heading ul.options li.access { - color: black; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation div.content { - border-top: none; - padding: 10px; - -moz-border-radius-bottomleft: 6px; - -webkit-border-bottom-left-radius: 6px; - -o-border-bottom-left-radius: 6px; - -ms-border-bottom-left-radius: 6px; - -khtml-border-bottom-left-radius: 6px; - border-bottom-left-radius: 6px; - -moz-border-radius-bottomright: 6px; - -webkit-border-bottom-right-radius: 6px; - -o-border-bottom-right-radius: 6px; - -ms-border-bottom-right-radius: 6px; - -khtml-border-bottom-right-radius: 6px; - border-bottom-right-radius: 6px; - margin: 0 0 20px; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation div.content h4 { - font-size: 1.1em; - margin: 0; - padding: 15px 0 5px; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation div.content div.sandbox_header { - float: none; - clear: both; - overflow: hidden; - display: block; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation div.content div.sandbox_header a { - padding: 4px 0 0 10px; - display: inline-block; - font-size: 0.9em; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation div.content div.sandbox_header input.submit { - display: block; - clear: none; - float: left; - padding: 6px 8px; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation div.content div.sandbox_header span.response_throbber { - background-image: url('../images/throbber.gif'); - width: 128px; - height: 16px; - display: block; - clear: none; - float: right; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation div.content form input[type='text'].error { - outline: 2px solid black; - outline-color: #cc0000; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation div.content div.response div.block pre { - font-family: "Anonymous Pro", "Menlo", "Consolas", "Bitstream Vera Sans Mono", "Courier New", monospace; - padding: 10px; - font-size: 0.9em; - max-height: 400px; - overflow-y: auto; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.put div.heading { - background-color: #f9f2e9; - border: 1px solid #f0e0ca; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.put div.heading h3 span.http_method a { - background-color: #c5862b; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.put div.heading ul.options li { - border-right: 1px solid #dddddd; - border-right-color: #f0e0ca; - color: #c5862b; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.put div.heading ul.options li a { - color: #c5862b; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.put div.content { - background-color: #faf5ee; - border: 1px solid #f0e0ca; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.put div.content h4 { - color: #c5862b; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.put div.content div.sandbox_header a { - color: #dcb67f; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.head div.heading { - background-color: #fcffcd; - border: 1px solid black; - border-color: #ffd20f; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.head div.heading h3 span.http_method a { - text-transform: uppercase; - background-color: #ffd20f; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.head div.heading ul.options li { - border-right: 1px solid #dddddd; - border-right-color: #ffd20f; - color: #ffd20f; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.head div.heading ul.options li a { - color: #ffd20f; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.head div.content { - background-color: #fcffcd; - border: 1px solid black; - border-color: #ffd20f; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.head div.content h4 { - color: #ffd20f; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.head div.content div.sandbox_header a { - color: #6fc992; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.delete div.heading { - background-color: #f5e8e8; - border: 1px solid #e8c6c7; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.delete div.heading h3 span.http_method a { - text-transform: uppercase; - background-color: #a41e22; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.delete div.heading ul.options li { - border-right: 1px solid #dddddd; - border-right-color: #e8c6c7; - color: #a41e22; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.delete div.heading ul.options li a { - color: #a41e22; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.delete div.content { - background-color: #f7eded; - border: 1px solid #e8c6c7; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.delete div.content h4 { - color: #a41e22; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.delete div.content div.sandbox_header a { - color: #c8787a; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.post div.heading { - background-color: #e7f6ec; - border: 1px solid #c3e8d1; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.post div.heading h3 span.http_method a { - background-color: #10a54a; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.post div.heading ul.options li { - border-right: 1px solid #dddddd; - border-right-color: #c3e8d1; - color: #10a54a; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.post div.heading ul.options li a { - color: #10a54a; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.post div.content { - background-color: #ebf7f0; - border: 1px solid #c3e8d1; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.post div.content h4 { - color: #10a54a; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.post div.content div.sandbox_header a { - color: #6fc992; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.patch div.heading { - background-color: #FCE9E3; - border: 1px solid #F5D5C3; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.patch div.heading h3 span.http_method a { - background-color: #D38042; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.patch div.heading ul.options li { - border-right: 1px solid #dddddd; - border-right-color: #f0cecb; - color: #D38042; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.patch div.heading ul.options li a { - color: #D38042; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.patch div.content { - background-color: #faf0ef; - border: 1px solid #f0cecb; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.patch div.content h4 { - color: #D38042; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.patch div.content div.sandbox_header a { - color: #dcb67f; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.get div.heading { - background-color: #e7f0f7; - border: 1px solid #c3d9ec; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.get div.heading h3 span.http_method a { - background-color: #0f6ab4; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.get div.heading ul.options li { - border-right: 1px solid #dddddd; - border-right-color: #c3d9ec; - color: #0f6ab4; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.get div.heading ul.options li a { - color: #0f6ab4; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.get div.content { - background-color: #ebf3f9; - border: 1px solid #c3d9ec; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.get div.content h4 { - color: #0f6ab4; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.get div.content div.sandbox_header a { - color: #6fa5d2; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.options div.heading { - background-color: #e7f0f7; - border: 1px solid #c3d9ec; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.options div.heading h3 span.http_method a { - background-color: #0f6ab4; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.options div.heading ul.options li { - border-right: 1px solid #dddddd; - border-right-color: #c3d9ec; - color: #0f6ab4; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.options div.heading ul.options li a { - color: #0f6ab4; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.options div.content { - background-color: #ebf3f9; - border: 1px solid #c3d9ec; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.options div.content h4 { - color: #0f6ab4; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.options div.content div.sandbox_header a { - color: #6fa5d2; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.get div.content, -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.post div.content, -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.head div.content, -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.put div.content, -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.patch div.content, -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.delete div.content { - border-top: none; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.get div.heading ul.options li:last-child, -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.post div.heading ul.options li:last-child, -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.head div.heading ul.options li:last-child, -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.put div.heading ul.options li:last-child, -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.patch div.heading ul.options li:last-child, -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.delete div.heading ul.options li:last-child, -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.get div.heading ul.options li.last, -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.post div.heading ul.options li.last, -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.head div.heading ul.options li.last, -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.put div.heading ul.options li.last, -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.patch div.heading ul.options li.last, -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations li.operation.delete div.heading ul.options li.last { - padding-right: 0; - border-right: none; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations ul.options li a:hover, -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations ul.options li a:active, -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations ul.options li a.active { - text-decoration: underline; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations ul.options li:first-child, -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations ul.options li.first { - padding-left: 0; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations:first-child, -.swagger-section .swagger-ui-wrap ul#resources li.resource ul.endpoints li.endpoint ul.operations.first { - padding-left: 0; -} -.swagger-section .swagger-ui-wrap p#colophon { - margin: 0 15px 40px 15px; - padding: 10px 0; - font-size: 0.8em; - border-top: 1px solid #dddddd; - font-family: "Droid Sans", sans-serif; - color: #999999; - font-style: italic; -} -.swagger-section .swagger-ui-wrap p#colophon a { - text-decoration: none; - color: #547f00; -} -.swagger-section .swagger-ui-wrap h3 { - color: black; - font-size: 1.1em; - padding: 10px 0 10px 0; -} -.swagger-section .swagger-ui-wrap .markdown ol, -.swagger-section .swagger-ui-wrap .markdown ul { - font-family: "Droid Sans", sans-serif; - margin: 5px 0 10px; - padding: 0 0 0 18px; - list-style-type: disc; -} -.swagger-section .swagger-ui-wrap form.form_box { - background-color: #ebf3f9; - border: 1px solid #c3d9ec; - padding: 10px; -} -.swagger-section .swagger-ui-wrap form.form_box label { - color: #0f6ab4 !important; -} -.swagger-section .swagger-ui-wrap form.form_box input[type=submit] { - display: block; - padding: 10px; -} -.swagger-section .swagger-ui-wrap form.form_box p.weak { - font-size: 0.8em; -} -.swagger-section .swagger-ui-wrap form.form_box p { - font-size: 0.9em; - padding: 0 0 15px; - color: #7e7b6d; -} -.swagger-section .swagger-ui-wrap form.form_box p a { - color: #646257; -} -.swagger-section .swagger-ui-wrap form.form_box p strong { - color: black; -} -.swagger-section .swagger-ui-wrap .operation-status td.markdown > p:last-child { - padding-bottom: 0; -} -.swagger-section .title { - font-style: bold; -} -.swagger-section .secondary_form { - display: none; -} -.swagger-section .main_image { - display: block; - margin-left: auto; - margin-right: auto; -} -.swagger-section .oauth_body { - margin-left: 100px; - margin-right: 100px; -} -.swagger-section .oauth_submit { - text-align: center; -} -.swagger-section .api-popup-dialog { - z-index: 10000; - position: absolute; - width: 500px; - background: #FFF; - padding: 20px; - border: 1px solid #ccc; - border-radius: 5px; - display: none; - font-size: 13px; - color: #777; -} -.swagger-section .api-popup-dialog .api-popup-title { - font-size: 24px; - padding: 10px 0; -} -.swagger-section .api-popup-dialog .api-popup-title { - font-size: 24px; - padding: 10px 0; -} -.swagger-section .api-popup-dialog p.error-msg { - padding-left: 5px; - padding-bottom: 5px; -} -.swagger-section .api-popup-dialog button.api-popup-authbtn { - height: 30px; -} -.swagger-section .api-popup-dialog button.api-popup-cancel { - height: 30px; -} -.swagger-section .api-popup-scopes { - padding: 10px 20px; -} -.swagger-section .api-popup-scopes li { - padding: 5px 0; - line-height: 20px; -} -.swagger-section .api-popup-scopes .api-scope-desc { - padding-left: 20px; - font-style: italic; -} -.swagger-section .api-popup-scopes li input { - position: relative; - top: 2px; -} -.swagger-section .api-popup-actions { - padding-top: 10px; -} -.swagger-section .access { - float: right; -} -.swagger-section .auth { - float: right; -} -.swagger-section #api_information_panel { - position: absolute; - background: #FFF; - border: 1px solid #ccc; - border-radius: 5px; - display: none; - font-size: 13px; - max-width: 300px; - line-height: 30px; - color: black; - padding: 5px; -} -.swagger-section #api_information_panel p .api-msg-enabled { - color: green; -} -.swagger-section #api_information_panel p .api-msg-disabled { - color: red; -} -.swagger-section .api-ic { - height: 18px; - vertical-align: middle; - display: inline-block; - background: url(../images/explorer_icons.png) no-repeat; -} -.swagger-section .ic-info { - background-position: 0 0; - width: 18px; - margin-top: -7px; - margin-left: 4px; -} -.swagger-section .ic-warning { - background-position: -60px 0; - width: 18px; - margin-top: -7px; - margin-left: 4px; -} -.swagger-section .ic-error { - background-position: -30px 0; - width: 18px; - margin-top: -7px; - margin-left: 4px; -} -.swagger-section .ic-off { - background-position: -90px 0; - width: 58px; - margin-top: -4px; - cursor: pointer; -} -.swagger-section .ic-on { - background-position: -160px 0; - width: 58px; - margin-top: -4px; - cursor: pointer; -} -.swagger-section #header { - background-color: #89bf04; - padding: 14px; -} -.swagger-section #header a#logo { - font-size: 1.5em; - font-weight: bold; - text-decoration: none; - background: transparent url(../images/logo_small.png) no-repeat left center; - padding: 20px 0 20px 40px; - color: white; -} -.swagger-section #header form#api_selector { - display: block; - clear: none; - float: right; -} -.swagger-section #header form#api_selector .input { - display: block; - clear: none; - float: left; - margin: 0 10px 0 0; -} -.swagger-section #header form#api_selector .input input#input_apiKey { - width: 200px; -} -.swagger-section #header form#api_selector .input input#input_baseUrl { - width: 400px; -} -.swagger-section #header form#api_selector .input a#explore { - display: block; - text-decoration: none; - font-weight: bold; - padding: 6px 8px; - font-size: 0.9em; - color: white; - background-color: #547f00; - -moz-border-radius: 4px; - -webkit-border-radius: 4px; - -o-border-radius: 4px; - -ms-border-radius: 4px; - -khtml-border-radius: 4px; - border-radius: 4px; -} -.swagger-section #header form#api_selector .input a#explore:hover { - background-color: #547f00; -} -.swagger-section #header form#api_selector .input input { - font-size: 0.9em; - padding: 3px; - margin: 0; -} -.swagger-section #content_message { - margin: 10px 15px; - font-style: italic; - color: #999999; -} -.swagger-section #message-bar { - min-height: 30px; - text-align: center; - padding-top: 10px; -} diff --git a/scylla/swagger-ui/dist/css/style.css b/scylla/swagger-ui/dist/css/style.css deleted file mode 100644 index fc21a31..0000000 --- a/scylla/swagger-ui/dist/css/style.css +++ /dev/null @@ -1,250 +0,0 @@ -.swagger-section #header a#logo { - font-size: 1.5em; - font-weight: bold; - text-decoration: none; - background: transparent url(../images/logo.png) no-repeat left center; - padding: 20px 0 20px 40px; -} -#text-head { - font-size: 80px; - font-family: 'Roboto', sans-serif; - color: #ffffff; - float: right; - margin-right: 20%; -} -.navbar-fixed-top .navbar-nav { - height: auto; -} -.navbar-fixed-top .navbar-brand { - height: auto; -} -.navbar-header { - height: auto; -} -.navbar-inverse { - background-color: #000; - border-color: #000; -} -#navbar-brand { - margin-left: 20%; -} -.navtext { - font-size: 10px; -} -.h1, -h1 { - font-size: 60px; -} -.navbar-default .navbar-header .navbar-brand { - color: #a2dfee; -} -/* tag titles */ -.swagger-section .swagger-ui-wrap ul#resources li.resource div.heading h2 a { - color: #393939; - font-family: 'Arvo', serif; - font-size: 1.5em; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource div.heading h2 a:hover { - color: black; -} -.swagger-section .swagger-ui-wrap ul#resources li.resource div.heading h2 { - color: #525252; - padding-left: 0px; - display: block; - clear: none; - float: left; - font-family: 'Arvo', serif; - font-weight: bold; -} -.navbar-default .navbar-collapse, -.navbar-default .navbar-form { - border-color: #0A0A0A; -} -.container1 { - width: 1500px; - margin: auto; - margin-top: 0; - background-image: url('../images/shield.png'); - background-repeat: no-repeat; - background-position: -40px -20px; - margin-bottom: 210px; -} -.container-inner { - width: 1200px; - margin: auto; - background-color: rgba(223, 227, 228, 0.75); - padding-bottom: 40px; - padding-top: 40px; - border-radius: 15px; -} -.header-content { - padding: 0; - width: 1000px; -} -.title1 { - font-size: 80px; - font-family: 'Vollkorn', serif; - color: #404040; - text-align: center; - padding-top: 40px; - padding-bottom: 100px; -} -#icon { - margin-top: -18px; -} -.subtext { - font-size: 25px; - font-style: italic; - color: #08b; - text-align: right; - padding-right: 250px; -} -.bg-primary { - background-color: #00468b; -} -.navbar-default .nav > li > a, -.navbar-default .nav > li > a:focus { - color: #08b; -} -.navbar-default .nav > li > a, -.navbar-default .nav > li > a:hover { - color: #08b; -} -.navbar-default .nav > li > a, -.navbar-default .nav > li > a:focus:hover { - color: #08b; -} -.text-faded { - font-size: 25px; - font-family: 'Vollkorn', serif; -} -.section-heading { - font-family: 'Vollkorn', serif; - font-size: 45px; - padding-bottom: 10px; -} -hr { - border-color: #00468b; - padding-bottom: 10px; -} -.description { - margin-top: 20px; - padding-bottom: 200px; -} -.description li { - font-family: 'Vollkorn', serif; - font-size: 25px; - color: #525252; - margin-left: 28%; - padding-top: 5px; -} -.gap { - margin-top: 200px; -} -.troubleshootingtext { - color: rgba(255, 255, 255, 0.7); - padding-left: 30%; -} -.troubleshootingtext li { - list-style-type: circle; - font-size: 25px; - padding-bottom: 5px; -} -.overlay { - position: absolute; - top: 0; - left: 0; - width: 100%; - height: 100%; - z-index: 1000; -} -.block.response_body.json:hover { - cursor: pointer; -} -.backdrop { - color: blue; -} -#myModal { - height: 100%; -} -.modal-backdrop { - bottom: 0; - position: fixed; -} -.curl { - padding: 10px; - font-family: "Anonymous Pro", "Menlo", "Consolas", "Bitstream Vera Sans Mono", "Courier New", monospace; - font-size: 0.9em; - max-height: 400px; - margin-top: 5px; - overflow-y: auto; - background-color: #fcf6db; - border: 1px solid #e5e0c6; - border-radius: 4px; -} -.curl_title { - font-size: 1.1em; - margin: 0; - padding: 15px 0 5px; - font-family: 'Open Sans', 'Helvetica Neue', Arial, sans-serif; - font-weight: 500; - line-height: 1.1; -} -.footer { - display: none; -} -.swagger-section .swagger-ui-wrap h2 { - padding: 0; -} -h2 { - margin: 0; - margin-bottom: 5px; -} -.markdown p { - font-size: 15px; - font-family: 'Arvo', serif; -} -.swagger-section .swagger-ui-wrap .code { - font-size: 15px; - font-family: 'Arvo', serif; -} -.swagger-section .swagger-ui-wrap b { - font-family: 'Arvo', serif; -} -#signin:hover { - cursor: pointer; -} -.dropdown-menu { - padding: 15px; -} -.navbar-right .dropdown-menu { - left: 0; - right: auto; -} -#signinbutton { - width: 100%; - height: 32px; - font-size: 13px; - font-weight: bold; - color: #08b; -} -.navbar-default .nav > li .details { - color: #000000; - text-transform: none; - font-size: 15px; - font-weight: normal; - font-family: 'Open Sans', sans-serif; - font-style: italic; - line-height: 20px; - top: -2px; -} -.navbar-default .nav > li .details:hover { - color: black; -} -#signout { - width: 100%; - height: 32px; - font-size: 13px; - font-weight: bold; - color: #08b; -} diff --git a/scylla/swagger-ui/dist/css/typography.css b/scylla/swagger-ui/dist/css/typography.css deleted file mode 100644 index 27c3751..0000000 --- a/scylla/swagger-ui/dist/css/typography.css +++ /dev/null @@ -1,26 +0,0 @@ -/* droid-sans-regular - latin */ -@font-face { - font-family: 'Droid Sans'; - font-style: normal; - font-weight: 400; - src: url('../fonts/droid-sans-v6-latin-regular.eot'); /* IE9 Compat Modes */ - src: local('Droid Sans'), local('DroidSans'), - url('../fonts/droid-sans-v6-latin-regular.eot?#iefix') format('embedded-opentype'), /* IE6-IE8 */ - url('../fonts/droid-sans-v6-latin-regular.woff2') format('woff2'), /* Super Modern Browsers */ - url('../fonts/droid-sans-v6-latin-regular.woff') format('woff'), /* Modern Browsers */ - url('../fonts/droid-sans-v6-latin-regular.ttf') format('truetype'), /* Safari, Android, iOS */ - url('../fonts/droid-sans-v6-latin-regular.svg#DroidSans') format('svg'); /* Legacy iOS */ -} -/* droid-sans-700 - latin */ -@font-face { - font-family: 'Droid Sans'; - font-style: normal; - font-weight: 700; - src: url('../fonts/droid-sans-v6-latin-700.eot'); /* IE9 Compat Modes */ - src: local('Droid Sans Bold'), local('DroidSans-Bold'), - url('../fonts/droid-sans-v6-latin-700.eot?#iefix') format('embedded-opentype'), /* IE6-IE8 */ - url('../fonts/droid-sans-v6-latin-700.woff2') format('woff2'), /* Super Modern Browsers */ - url('../fonts/droid-sans-v6-latin-700.woff') format('woff'), /* Modern Browsers */ - url('../fonts/droid-sans-v6-latin-700.ttf') format('truetype'), /* Safari, Android, iOS */ - url('../fonts/droid-sans-v6-latin-700.svg#DroidSans') format('svg'); /* Legacy iOS */ -} diff --git a/scylla/swagger-ui/dist/fonts/droid-sans-v6-latin-700.eot b/scylla/swagger-ui/dist/fonts/droid-sans-v6-latin-700.eot deleted file mode 100644 index d8524983ad8d296be95cb5b469efd1987d6e04e3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 22922 zcmZsCQ;;QG5AA8&wr$&|ZQHhc+O}=mnzn75)3!D3p8Nf8_g3BHAyq4@QhC@_$==zC z)dm3SwEzIn{}deXpM`^l1cL?#1qBBI^nd~Y&;U85J5a#BE-^ru`al2wqyPj!{6D)M z%&GZ5`~M0$00?jaI0CEzX8+Mt0mc9afa`x0Xn^Q{sNH`k06-7W0GR*Ba{Z6x_@AE` zzye?dF#o5-026@wf9U^s|3g^-tpD)||GzPb|F4JufT*gZ(*K(Y000K~MGoMR0Vp>E z7?M$EFQO-mHZ&)#WL+GcoJrMrP3W1@SvQZim+$w-EQRyC*2sl9OOp#iuEC=DNFU0Y zu=jPv$K@pC>&OgLbsHAuV+;hb2z4e&N67~s+_XK5wd$(Ez4l=k{jD`bG?L0%;hm+W z*tCmf^`$X6PG#>*MXv(|#p`rZ=w^%P8%t!u+AYLwS>YKlUJgNny(KWR&hishMXzlL zWtZwzuV8DT#_AuSkVjYaH?b*lGT2p5GGX>8RwKl;c?#xY zYdD#ub&%(aRHUK0J!e-O2ZlrLl@|FScm7}d`xh*^3 zIvBUGVr0h_{Y)GMm;F?A1?6I))oE-=m^d9&2i`w!+zwIyjJ92wWagpkQal2RlekgD zr3OMt2g913FhT&Xg^pp>rQGsZrG!RA`|;b+?5nYYN}Rw+crsFU>tI!M6beL+9rKN0 zgGS5)RR`DtQEfRR*lba3R7S8FNR}@dYz-$!(1`P@)5Bs-G`+&wTYd>Ns7Qm;Wn};# zf;8fW65TarDg1`Ka6_Sqji`1saUxNuQVPpOUB4=AElx$~->DKpGRH z>E~$eo1y_%oDihi;w6Of`k=dppSCGYLh`i3`(b+HDzOnU6IvSXE6*;gQqptNs1}xE zX^{EELFWVbdju)?Xw|$Tv`6Fd=ui}#K@XB5x}-eqJEj=D-s#n+n6bG^yNZXye#654m0^90Aa zFrGpXPw>-#f`6mZU>AYF$7f$ytrqjWIGqxw5p{tjbh4#%3edgJMCvCSEdSl>_+EeD zJ*N+yhCcH%Bk|l`W^1v@4$5KGO9Mi18G1eX}g`Ky}PiFPY^sc-#5xQkjY7aPsm~DI&fd=-!v^H13hWVSZA{|yF;j^HuLTmS0l_|V&zZ)<@`>zQwprID68#+9P!2`9+DOw z5w>?{RKCg9zOL<30R)laW1>v?|7fA@$Ms!T3xp8dC~mv)?STa;B=dKD4kwph4h! z!9_EnU^a3Z5F=!0FSN^jyXx!DJ(gk<{^2xVm^7DO8$~4W-(CyP|b?m@&I-jXL6`NE8Y+nuF8?C&tdEkeWBZ!j3z1{I1MR8wHVF~-F z`l3~=+$)Z4T+Lu`$_V<{0oLE|To65m1rBNG4J~%R5knL^RZ8BQQ1L{?siyU(;d-8L z0CW`iCrvD*6XZ4-(FkTw?e2XpMGF8>1N~pE4&%>hMii55Iw$ANgYT7e46C}I$iGkC zfGTl+ceF_?gTA(rH7qc_^1Ig@#H3|-}_$U|+(7#!=Z5>DQ%*Wu@kdS76{2eecca@Bffa(eQkX^4LRygSpz3W? zQ-W^PE&=qUY|d;hLFh#AHW<8IUX|o8eyH5KkQ?w4*yzpoMiU6Y!#WX{O#_1*nFX`x z#W`_$A3ZN1O5lx;ux|Hp$c%1K5CGd+TOAQhxyZKEGT@pHBgm`4UJZnmc@w$9iNU8} zs>irtE_9^-iW(s)Mp!MY4uenHBe2O^xkU6464%rDW=24llsqdiYJF-oZUOZwyguuOV88OmkP3Zi;)&Fwpr&W#a{UQB|qD8 z;O*^ff__TJyYXVI-aPlqgd_qf)(9vCmm|SP5-n{PYazqF5~Erais!YtsN$Rfl+HFy z+-y=J$V&m6Q#lHTRv;}+oIpV}PePR%J-Yb#nzcmt<2Qm^LMO{5;V0#Ls-~q?gL9{# zsLckJlkRms92a`r!SEvhg(0V0!FeTm$Tg#5IG! zzSLCNuOUUspsodCYjXen8h?pxLDn4afi-?5zpO;GOvdj7&G`j`%q!-=uI-p%W-s&K zAeL~V5|tb`hGJ%T6?N69QOa!m z5|{%bi@2G{G3~^GjD@Cz@_R`&zOWfIvZzQ5Hsch%46T*Xs10TUp;ZJK#e3unk5pl7 z?u5ySwl!KHjuejb+gCw~fN`mZ%`I!GDReVbd{=grbg&yODFooW(H&EpJS*yND{rZB z@;qy#G6p6IFmTr$0kxuD$@mH?=wIcc+(!$nXgdMw3T0Lg=ZpekdBnM>vlWCvXBX+p z)*5ISoCc-CFsjZvp+ho2C#D>uc_sL!1}!b2f8a!^9a6XYn>iT53C>quppz#>dR1vu z$}aI?C{0~;4k5`8J^;{$!ZZvP)KdPuK3I?n77X>5oIqwS{si${T?tH`6+N3>-tw0@`r;NZs0FE1_ZcS; zn}V$ovDSMKI}I7I#-T=xkXf46J=bfk4oTD3L`!3oc1?3An6cnD{n(;z+n>*qK>3?1k$Q_$w0^PkUnrj%Nrjy1b1`u-a zqX-3tigNqX^CZLT&&F$X!>tTJBC1M53x;~GfH+|Zda##R>dB~{rjR{0{ntF%SOOhE zlLnM0z!I^+tXz+st*9jyL#$*`OA|ei6zERj*FP&5WP`95tpXdePD)=_9DB?Y#!# z@fXApu>lF#s3RMI%(B+mKY&dR3?!F%1X&7J;D$*sq?;O5q}vXqaO%3~qGD~{wmwRp z#Xa%`X%umbd?4b$LYyz;U6kpL-JKxgM4Je>9MT#dtI2#X@(dDsj0UO+ys?z=;+=-*G@+;trjHyV>? zvZg0oWw}hgL`NvGPX(+>pqaEO^^$O$>{Jr%emr9Zr>(ok_#M#} zkmAK}R06e}QyTOCSg*&aUi7v|hH}>I$jQ}#VNm(>n5D9P714Pcw3haO;Qxd;v}UyW zz*&0`v8eWUQE;dvd=CZi$(~TNa9CE4QBWuRwNi2?vn|tC2k|v+lm#4uq88DpZuH_p zaIWN~e$h)-f@55OET{^2ilu@6tH@B|50npGzu*)bJd>*0c7e zJwSFcPyDZQdkMh;#e@3xJ>7{CN zQQo$bs4B(Cd{kt^fEZv1noGL?qBIl|hZ>C47#o`o`wKw&&2?hd@h^`BR6{kCT*@ND zDXj6xcJ@lR8g@MpY{CinMOV|#%J9f@ne`2(D@=^cb2>BSRJ9~j+uRU#ctket!5btq zT4Z(l*>Rv2b-s~a#aaRlf)Fd^-WMJEzuw>`xZ6-TguNlG5Mp3i=mb;P%Uit>R7rOs z;<537pPA=BJ#Sexs~XdbvIGC-z8ye#;fLxWku&{dr7I>wr=RFK+iW##(%z$m3^{20 z0x=82uJihL1;*xCpbxx#y~5=55%j`wO%OeGL(BUUhz%1PQ?L8m; zhHyebqKokECljgJGP7(Bi$pes=4Vof3n_eH(isjgrLO78Nyfzo%p&u zh71_VepNy12E<|OawG{z*U#Cj|J_rIzl@@tFnyB$%VOglz3XW3UOtn7D~qF8doz@R zfhH|&laPko*qEk_Cf$q_DAXA@ci+h$u67CVRnx$AxOl=V48QrC8xSq@0|Bp2K<#NI z70TU^PtFyjEOF7A#>*FM!SltzpwOw&3?01|=4@`&Lf28lW+l!I58-^x7A$s9#w86O5-F;A~A{2SeaZ=cv(r6(Qc1$ZpawaZ7*gmot6Dq3fl^f29>hg`;%F3#xUZhKC9ymm+2aI;_V0U`(^`oV{alWIn?qM% zpc6FH`Rj5@DOL@kafWZ^agmk78lW7)zdZ-ae-EH7poQQyi*n{H4#`nCPDU*#P+H=s z`1&-3W{)zcSf?sbIv}!hqEpL(-98L3Pz=)$yi3JDgzNTMU70J3SA|V14rCS@;-u}{ z6``6q6*6aURtR%nOLBGj5yX5TsrS-QQz8t z1^+09odTg?z>W|&|8PD-UeqXcI`a%DpA=EKWwmc&mh|Nxi zM^6bDavp_D1a~IPqJS2yDiyzTSuzD zJ1lCsksyCD>=;s{_|ZP1p>CMgv9z3d1$948TL-+MmY+-Le;XiXsmVS4Y8wP9rMu9p zfPGOhr!_Ysxgu%Y8j%%CB;?MWs3MIODukIXSX#;>4F=RPo7!C0 zMpFc;iHX;p;v~iTN|f8I@bDa@q4hZ}>K%mnyc}t7qSVrEPvwGvpp)S2L@-V(qiZQ) zans^pVSY{K8O?PBdk(UEt%m_<9VAsk23%FM?GyIa} zX?g;oHAkgaU2HjLX+iXWP7O@S)SkCbI0iSi23G6oKlfUnSj2q+iYfNBc+^|Y0D<|| zjilFxt3v3Y9ypml85Y3+A>cnR)CbwK#$06Z)h z99=s*HOND(T#Ou`Tx_zqh*8VDO0W{7SR!C%kZ-dS&sAb`tjN?IYM0^tPRX}Yx5BB2 zg1;F(#LD>e^EYljqLYH4vep`VSPtI}d(uGTfauSF_3BR8Xf@i3 z{+6ZPn4}X%j_dT^_(d}7&P)SAnEOHWvXvv@1gn90APZYdTWwkCSk1J540$O(MdF-1 zRnuCEk+&X6Yto{c<6Y9=#+FW;@Bk+9h&LXv7-$Ya?4DG}DKI2;yXuVhITR?h+K?@# zR5D|x=HE0|?nTvS7GZ{?0tNg1ZfUhXS%ol%;gNR|o3ZGl+=r-cld0(` zHV!^MvY7bGKT6k3Nw-Y;SD)Z0+FWzSY7q-kkuXLov;Q%eWGU}%BWowc(U4v{sEZ6Q z;B0I#(jjGrkR&JLC>7^ik4fG{%~qq9WAnpdFrxDUnHMHRp;>DGA>aD2);{Dt^)rAk ztGJog4f0Nl;k4OJKL4U|aiVgQ2u}<|QJ9)b08L=V39fnw?ds3x5b@K~XZ>S{ky$ih zHk}81F5M9)jQ|Uq>IbD%NCrICnUWqF$_t4S z^Rk3vV&Ei?&qf^<$LPz+Vh*fA`x?=J*dT_&%Tzqf$=|{h2U}OvC+OcXGC_xYCcVYu zBqfB2GX|<+RUS7oRQ)*R>I#hU)ws}T2ZP0$$q3yTMw^VVZkg!FazPu6R*@ax(Ap&( zW$H<0pMS4#Le|5g8l=cqh&3MHyfzU~kT2&Mb3_CnLP5M8(L-)7yfvaA{r)@ix-V(` z=x~%=gZWAL0D&hQ5k{icD=L+HmCQx%g-a<47}*AqI{MEDz*N{uLPjBMSp^Cr?ux4g zyt$;kg*`TmZ6vHf9zhao<7A_rZt|k^^K+^~m_TV@QZgHTjmok<@*vGW3d?B}YP7)^` zWLH}G_*#STm`bG=4G;vz65v)*{*T5xx^dw8^GU+$&@D)h)9 zwY=hY`gy3SaH~72F&H08jx|;oM+QT1@VMx10*}ZUiNdxRu;5l=N6Uv;J(Gen%IU$y zh(z#Ru`c@HLUS?@nj&(C>C5;fn9>hyEBUhD#{BW#LU6Vi5{?KtN0>v>N|2sOKT3-4 zbDOXl0^q*<<(tz7Ll$lI+%8)LpX7R&i0tc&KKxU$0@en~j{~+cBdqdj9D?tTw8$h! z0;dYZn;jo?=Gx9m@$^tvEmI@2#AH@#S6;zzFz^V&UNNj&ZRLaOqih2l2-ieJZ+o(T z6DtXd#S&4{vD@XCW3v?b=x=UV(1WEV+t%|l{~NcUKcp~l0h=R{O?8RxrhULu%8!DpbduL>bh48nX;s)ZuG9| zmU_@Z!QpyAL;(i1a*U;F#8*qykZ|rFr8y8knkep2!N>z}24=Oe%Cyo$Z$Z-Dro|Dg z$;2J;TfKAD>pV$4aMYO~Br=Z!jYi4ea>KTt!Oz->ksTJ2nkpp=p|Ar|x;bL#Ao|kQ zVbhxp#cFWvh42w$fq#Z97L`_2Bw%0K;NdIN1&8we@xTpENxW;(jT0$KAs;;LIw^>w zyAgzt?_b?#wP5BNA^Q4CX6fjsy3`)~*ouvkb878ii#ewQee-KoP4wFFUPCnivMs4P zWT<`7l}21LV8Y;{COfmjn`!4oOp{wQtV19WD>DLHaz5q-P)=-5n=F;QL`ffBzB;G= z=wtHuwHjRt4M%QNMu&wgr4sB)RpxLJLdah?N2&3OAS3%fx>kb>3w&54hxw8NfED<5 z5W6LagPCmplx{!%ccgOEifAY#3M*Qzn zef~ z=e6BKXX)(&MaT+zr+*sMa#lW+lbeQ#Weu`N5UD7`i2DBeAk0+eyf3>DKW0M}m$F<+ z7!;3hJ^E0UY4H`88TCGCGJy#@-J7-U&SeQFfeuY9hvs*EZYFU724U18y5m3_BY}oh zd2)*zD}A=u^!|GgV7w)HTI4aa1oYuYg{|&V50x(rIihGZcgyDrge3Dl4KH;Lr@m`3 z2eE_PcvM2}CD#2B6>sBoxrRoN-j)ZR&jhA}yo@V;VMv>N9XYJ6IBu~{1w;udZE1)F z%rHQtg~%dkXmfE!R_&i2v@jre^E5~6Qm9k`i}r-Yq-)D32GX0|UjHP7U>NW3QSpc| zRLtgSq^2^g_qQ+yW=6Gw)Q^k~S{lc2&JT099PvLTv@_~FQ&dnsCnU+tZyJTZ11<-L zi4AU{!-IvI;wDUY-_|{w6NYl6*?4=h z-BVyH4GlT3Hqk-hvHR9UTpRw;65hWY_U?qK}bOur6u7h*QHl(&^ z3}LQP;HzA46z-NRG4;&_mL}heGz)#yY<_hRl*fcJgEvqZ7A9qKgcvjL_WTwDS_+tZ*YEgp)v9@GNp|YxdYMGGpG;$^GbfRn@1Azrs zK;}aa{Ju!guS!76sH#>e^QFQcr(!!y>s&lP=F!4)IdYF~|FjhmQTVC#A*p&xM1ysbW=})6J zgW~ucoklx6BsXwg_VB>Y`X(FWB1!xt zF9aRleQ_1zqW$U^h8?<1Zu}6h9X$$&YDZpT^)K)62TB7>a!5=5p_al&c|r~R0Y;Be z+L6#mW)9v+3ZbQ{M*Iz&AG2TN3-y#(;?TCp^`s=hZ<$g==7yRggHVNHX`7(KCbW9g zmxO}N>Wy~#Ac0BP=;}`RVPzW*EuUGzWh|M4@hxG4B8YgHBi<=qvJM)8GKx#Tf2+f# zXd(Vt>k)WeFb?i%I=?RL;FB)VMMtyty~^PxYxl6kUn>3@GtLgIz^s+90g3s{2GWWK1I-QDn-RFo2uKf5UVKDSJWV{uUL*G zdGqLUT!MU#Q1)D+<5U9`jSG^9xk4i~<|GeM=2>9j*vPTLQxX-Mi}$2Y*D2-Y~+o6#cm&%f838cc^0@JkV1^p+H(CD5K>bJYIzVjvgZE z3BSm~_`w5jB9QLMavPNf%P)jyPrZaLb&6BEqwyyXv^h080V8{tgpZ*1ILoTlK|%kp@~m18r7=83@=V52)yR(;MPf|&SII8|y5UQ|Ujbr)@;G-gm5 zYfu^xvNtY>OC8No)bcv}&k))B+~TZYj%McI#!MxDtT_`2IgvKFG_IUj9xR2^`0|8M znBW7;yOf+vev<()zEcY3&*iUVOi}BnFhU!U!x|wHGQC<>Ow;m>KiWFtGyFUE9L_4H znITL#1!R)?BhNgdTyAV2Rh{vZ@d*B{n8V+)|*jmUX)zT7kO5yJ%#zbx9a94U>|XB96an zvabA&{k_(Fq(uT}!594t#$Zu_Q`)EyQ}H=zB&QFjYrOMI5QZ?;(jN`Yynj48JbdTG zp1%czqz>}${fzkwAwl0^awFKTu;gzzmQjBn+SFt65~LeSqYybPa$yDi204i$gM0@| zT<$S9i70Q)y&xlw9sBRPnnVirxYExSa_X49wgMN)>}RX}t@aq+E#9H89lQX82h3-z zoIiUOeTd=6&WAXA;`MoDolBt!PYca88THv+A|<_W?{ zGa2gBeuYr4T~U-9O3!)l(on(8#d1-2+EoEWS4lFwg#F7h`dp0;C)+%f2sucYjT7Z?ImBeAwE?ubdjV7w+*n4^PH~*k+eso z;W6{D$Cb#^c!g8Uup)J~2tDORo!{X_k^2RBqGtOz(9HcZ16V5$*f`>$VpTvXE%6>o zRMeNxO*|^&#@!T;Mo^hDRiC8K4ZI^s^xHiWDGO&&|2xT`ho=pKVB>Ptj#Jbi*#dN2 zsOgFCSZcO7_k{G9UH$AH-F-My3OvS}n>0n(%s7u7Dy-tOkb%X~s`vcMRz9oc<`!#x z_LhtRknyqMVw&X|6J*6IH@E=iOg88c+QW=2yII(q1rHi`EL2Y%5hQfH#n3oSkr&lpFKOs&l4<(JR(`i=p1_*;R~q8DEutS#P(WxdLkj@ zISc_W0~0B7lTCa^Hv^YDq`J^xGB~AVG16Rq83dW@OqR;Lf_g_ZT#aiuU5SC>3z}c8A-iqt3@V)J}@9)3hyxlQ0$ID zhFO=pK-9=VO$Zi;o|${lVw{bMTOsMpxX}k&HIr@3!?>y!tjSWvED`+6Q#u|uw(!Kb z)2_kEYjFC6BN;$?Brb{rgXgY;r9@Y%KFK}sk&S|_Q99PoX2uDP>Ms!~GXparT?W|raxOXf%7a^aM(4w)*z&K2K!I6$C zW{ron+Fb+f-+o);0tK$7B#j})fjcZ3XJ>#m@4J^&{p+ud37|=H=;D!H(mRis>-MW{ zZo&CXCSjNYHS4L2{J=8#Zk79 zSan|S0L$c5`94b{fH))2NIh^A`yPG>B+dVH6P8CCWgIU2FipU9i1>DlP*i`dwp^srozovapYhckxQ=pTX8Ud?P2a08F zBBFaEB#BD~-F=?@z0Nh6EsiE8Y(vmOuI_ybfO=TbO;K47opTNPOmN8#g#rnCFxQ1s zK7p1*bDK^75k`dzk2Ds$a!3LO#*@L)zf8cXV97fn$3XPBHD`;?4IIDwrrguZD<7w{|)S<`7|W)czS3)s;GF1iObMyG6c|?%E*Ir5!{ZGaKS-|Rien>{fQb0npY}1Jx9!8ZwPahBUKF~iUQY&V z!(Xdbih|m99k6n~d{5J1k?8Zea=npk zTwe4_y#XKG`;&-A8GMpM{DMJ~tap+XR<)m#*ySsXrXbx^<^8ey#*y5Z3tbMB`iCyk z0NE;g?gibDNZe881~k_hln1U=IltJj%wStL`w zfeeyQs1Axbv!~@jR)wVZdJ`liS_2dOmqbBO9t4NMAdy;YGFm_FT!SZ@DA62@mxHnc zAn>1Q$p8r+QZ&6G4$)_6BB{Rpqpit?1DP_U&;H&P%RFtARpLaqMNw4kkqp3C-6gI!x1V+Ti2 z7GS6D7>k7Mv(XYjzt|_joFAH%B%f_q!c~;s1-a-xaS&sWi8Cr3k4IV+*bnFuERkNP zblLreXK;1ktL>aZPSN_VJqyArPRnb(cy}Bz)3YeT23%p0&Rebj$a@8N> z0vzW|F-aJ-DUdhU)9BVf-jcQ$PgDICfy_6=EXd5x&xy78_v6ML7U|>Yrnw^w7F;ks z-T#716Ge`G_}$J~oS#>7Z=_bLZi&GRF|)Jdz0v;mP#cTS#8nJot4{o27A$+WT~KCj z@Eyp2995v9gw&vivUu6x*G1h&d7ukhsO%!7{??^j4T%Ry6p&(}6IUzp^YSu__PiBq zIoJDdQY0vqnl^1Tdj+0>xWf<(SumHWXBhs)K>6TfIx#$TRY1L9eeo(-XN(vMB06%l zO^_V#`dp76A|LZaT!g>v*idKCZRB(Q3UUVBnP^V2{iBaT6^|B#);)z{{YM&3ehG}$ z+)C~!klK&%k%NBX{M5P;)de>cY+oOgv{0dk)u0VzY{X<1X|AzdB+%^K_FzdgNr;iY zL?0pJSQf4y9f&!Wz=X$WbKBE;{eVxLUZS+LiBdHbSn?A(Kufe1M~@W6SkaKqR!*cd zRlsf?IqH%()=K}GM9|6Bb^ng*Kby@oEHp$fO#^4!xO@_gj$*~ZzR`z-oy@@eiytzm z>l(?a+&*C);P8+kJiiXzqQ*8XF=E&>keMSz8w34BS=euDX10cSllV7;4|^|W&+0sX zDJV%mR&_*C(~sk-FtNt5-EJ_s8c~KM`H9#tukpHKxjRBEJE20uq!};qC|H}p0tq4-WmKZ(b8-uFmSXyh< zeVT8_oq32gVqCgOX+)a4%k#KT!*x8gc3}yqxz(SZ1x)VvfTXHG748K>jmW-OOXjr; zp2TBA`$p|GRlR|jE~Jy}!pF9$kf{~A6cy4!u%Hm$j*NF1Wyf_q%iCa*n5>J!AY+X8 z87A%$!99SGoO&7ARJVq_dLK|BnQp6|X4f>VE=JjN*X1N-cKL3Q{O0%i+F@$+L$4Csf zQ$L+&oSHXqyvDeumAwXH^eQ~U8lpO^V(sl3KW+J zRpHLUvR*l?0juGb)NBWvocUa3=Wh9TndR`lrH(B2GJg|XoqDb7g}!5o@hb=o&{~3x z+(u0!;ytu@=z#5d1aUExntEjC{=x>6(QFiO+C&3k+gG&7M%_U^Jv7Z*G{S)bcGE4N z`qjfQ!cM6A>;_$ejgBZr;XH*t8qXgnTzQ4m0+MlP$|O@Zv5?@V=SGE_nL)8`%6vW@ zez=lO4Pxg}>99b!aYpD_9?r4DgTrYjjXchJI|At{5la7q{v;U7(VSiWdvH5mcix3_ zMqOJGLed^t`FrSthEUvv7&Q8rGI)4YG*!?>7c{pTHvr*@nW^V$9|Ba*XWVHI^vE*u z&<0MdIPDsWFa)Umz}~3tq};VG#+f`kysB(fJXWr_4X9|#Z$APkiiSt8oLYdJvZi?4 z^MNX#SrU^YC9|HDHu9n&_xKjpt%U~W@X}MH8s+a3tV;@Ll&R51rD@f0qsnqEdu6Z; z3=tLgszN`csiqT{D^*^^%q4$!IYf2PD=sEg6E)~!tK22v-Zg)#PvgnN^a#>*cZFJW zqM8#24Tuix5{3&O$_S0Z5NeqH0mLC|o_lf@@gaN}pq&qC2#vYBmwBoaFoj;Kdz4RJ z@nb&H{Xp?u=wN5x(@M?EH?`!=f&&V(B<=W@0@X}z>`~8}4s3>?ufu~Hji0G8I`|FV z+LJ^p<_+c)%C+aBJ#>Muow+*g%KV<4tkaL*pPbAbDV*W0c}zZ`@nzAef%CrQHZZ={ zV}4lfKsTx4&gINA>P<<=mzuAvs7~NOxM$bGTqDE9tWM)>Nu7aEWYJcJG=a8elkc~Q zDKq%X3%3$7bi#Su1oFu0THfL$WF~%njF@S&oz=z2nx5g*P#QI00V@!@5H{d^d6J$pewpoVy{QWWbGpLV+g>X}bV2L-v*3TX&s&|31O4Q8JY0k!{Y`dXN-%$| zmf1>4UY755wW`+HV6_`5x;(otdw{{!NRTmlDElis=V7IX>*t6(XUR3HWYl32b92z2(DkI4*fv=Ll%kFX*T7VhiAKAF9ixpjZzS{f zTGmMYTiYog*v4j7rY&aBXD|8ykant5HPJp#hz89+W+@gToCtc02Lh0d?6W{jj5d`< z)OFBW)y;{p0FuG;sK0@Y40TDoG^N!xgCG^<&t}Ta6xbSm+h!W=m>U*L<;+b-8JQ*) zf3+%DVRueDRTj9A>0`9_P~aHhmF4Ltj(j(7$gIf^rCa2pI*5WKe5!X%+M4c4R@{Bf zg~d-mYwaEc?8w_Fds;32d-jmK`H}m}Go`>VOZJ#GYP#~w3?3ezTH2k=r)(JP1THR0 zDhTUQHkgv*hsAlDtRD(j41=^~IuP6M_cc_MI00;Rj(Ydy0A#?aX*hyLhF^nGT|!id z5|`5?Hn#COtw7f_8=5xe8jp;NaE)WT8ivAYiRqOR$S>j3NMGRAN+bQwCO|nW$VlB% z0#QorO}PiUQ*#zC(2zH|WGah(t9iMrVUsBB1Ams}h||WC4V=32;bZQaq@s&GHc0zo zGaE7U_7mBDOW{rFRm6H>+q8l$#Sv?)9Y@RthX1#(g7Ah+b?>zEM2IZO-5B}-;k>x` zG$S)vIW2|oL^MWc5}3K||DMaX`gZxum0f3*+X+yqWifMcO9h4U2&K&R;cJ|wEEysXX z;hCfHDMK5iu+7RzgN-O8lv;Xxf+m0or!vR|Q-t%PM5?Sec)-Fz9QywC< zIgt!9N|0HP4Wintfw(q@Yx;&vBv`&6?u5r6uQKl zWn{5l9N^FxZVsi~Hp&xSYM?7a1pTbF)cHxo2j89G*je0K>;7*T$yE5g4!^&k47J@m z25$-``IwrD;!egpU)TnF%=wjSDAX@qks%zgZ_h;jd(16s>G>H4rSGE0lAuqZ=n$2#|czW_VGMLqWPS})p!G&WoWM~H{N|fF1C9>29 zWG;r#9X%x5hy%&)C^WVUjlVDc+@Y;7U;3IMCfJRe=diT2JO;lSMm?x_bv9ljTs2`+8l-U|2Q zv(-tGh6-#$C9ieWX??Y2&1@4r=AX*_CX|bCr35D=;1;rwCP$53RT~ zrpk9*k;(%z211IHA||fiQ%$y~ITHb8ypFDT_3nfs4-n9gz6FebM)^}1fn39sOBUmP ztE7e(ZkzOKtcV8#4Tw_c#n?yjH^~|Q<50TL*F=I?LNQ(Gr^#Q z;bA%a0SwsWkO!r17TN-4A;=PN=PxY*^^B_>Z{QcnDX?g;f_awDtjc z_=4x_U<~!R#S?>DY*B2Z&^Ma5+}sgYy+6sZo;+dkc)Ur?c7aPx#B$K6bR);LTou}k zP6j2*@k)D6>&6!OtfLH}2)M9~S?7Eye5l0kJ1@AQh40>MC06SS@;86e-kl07` zoOwMxY)NPaY}A8YsD=|z9HU(*U=}!Y@GNkoPJVR;=`D{M9HAN#eb|D5>1qBBh9g9S zhOm=x<4QM)0Lejqfl#&}Ubzm{e5Nb(JpYuiL%!@Ko~%n$?JgReu&Al32e$0v-jcn^ zG1gsJewhrwr^nXMq1!#@o&Y{SvVD;WAw^&kFrgkKsvrQMC(Wr@*D53FxB)%GY@`f1 zO925}UJE&4P%|2 z6T3rpV)!hBB99*w$V5^>fh`f~Hl%4bu2m_cZ>X*dH{_FhRU$g6J;N~yYF?3PiNW*B zNbbmxvfHo&93DmEkPl!6)DIF!4P8x(xj__ygmln-)|4$Y>i7_miuHB*iF<;11TonhaY6)exf&79ihUufS`$=G#nt2kH_2?g%U}H z>;wS`Bpp4XupF@_FTs;656FYP5Dnnefr>aitEFBJ#9C`8olbz)(c;+T;R&hw(>E!n z+ono5M&yU@riLG6)9JbXebvLkm<+qME?Fs!LUFeM?kMQO?48iM40B z0wR2I-%O|#y0~_MC5LzsK1C3h3djy!+PxW0Ib*-T=!s;4+3P8<$D#^^Gw z?kY1Y#}ri&GfB4(Gt3|SWwOC#r^f;=OQ5Ri>yzH-P{LN!7^8tj>^G0x?F3M?FmPWJ z)2a?gje3NTke`R3@L<|-^nYOp>AVNGJ=x2xj!ppT^c=H^do9Vk14Upp+l*IMjlL>H zZ2E0eb}uA^0%khY4tWJRu}Q%87-ZaGxYqUpbdz zt2{x1L*%Kk4eIpQIEqHkOX0+g*H$Q#hrk2bWI^v$cSzM-kWUHGB8(G6q3IdXUI;?TJVOBI4G;1`833(T@rln8LW>1} zKt!defP_mFp>&kN4%@Uf#HgtXFX$1HGLWHxtz_8}wK3{_r(8OapZ6 zV^FvCgRh>0_f?XE#IEI7zFdhKW`r7F83NMsp%E~U$`ySxwT(1eF4Q?Vr0(+;F)9#1 zxqOl)HYrV(G+%JP8r-OcU!Fey%A(&WcKJ@KACOIAbXURY7l5`x~W zoJn0K8xrC3zWQTL3I2GUEab*IWWbB~4-6L4HYvm?a-7B7viK;LUa`jN% z!Ih?cpBJWK5R!_l&agBMklKwHQp)HptraVA9716YSNMVkgLD@TNtEpYQsQ(be6JRV zpk02#&|u6R`7&Caj#p%$ETi$ctRR&M%Nn!>;R(}Hl1`YyE5bY%N_O6l$kJQ?Ju&hyHJdg9lko&LUVEv z#{#)wgXkLm{K-~YoCpLyaJlh9QLQSNCWOj~s`AuwVZ;N$ZxBkV9)=`A?Y(6zZfb9f z3c0>!*ci1<_PkdM%d?0y2ox|)4(YGXq}`JS^cj#zqydT4sz%>*jVM8g`{w+yo@E!x zegyN3t2LCCg|<`<9Jz`ix3HEYgspleq~fqAytSheZkZlMv8LP!EiAF;Q$DlPF5C=M zRXB7>Pl_0b1CXG)aFRh@a*B6B);T;whM@R+cQ{gQH@0gJ)EfxH(&{-bjBxGTqQSG6 z$%Msj4-=ez>!EDdvh|slyHjkQ&A8s)#hfk_N1|=U<7ZA224iWDi_F1*=(smNV9Q#N z&=#&CTc3NKta!B3IUW`wAlXo|$Z`4e+P>rZvHs_C2y96~*GZoeU0|L7aLtz%5eneq zEsl7MV_S_;$z1kQspFsh(Z7_IN`z_FJlMKeXm1ZnsM*zFAl{z)xG@nh4ipIx6@;MS z42#UD>b4?$kU;--uc=@?bq~aaROx0Td@9ulRI`ySqrqp28WfojK3{sT`IP{^DpfO! zhVesII9%VEcoI3PZb&Ru3D!m zS2#%oT<5u>lMGM6+(QpRJ_9kdP_8KwxX~s6XSPHgJ!w%Sb85NvckaL$fD`#?Q2K=YA1`9K5eiX-XAYD`=>f{j zPa3d8>9Uhsc>?QZUf3dfOTiVd)WG**UJ1quoLP6CHQu1Q7la}86RlUf&y`-P!IYA> zCw^7DiMyMK?y8kdTcgOeAYxLWOe&qFG#scix6E`<#UHPjc94h^S0aW^{4^PTlA!_w zq_LclD6qJ1gb1wCS<69Xg>MKE<#sHSOzh?M5F6(9#*>#5SNtOEx*s=)$ghdHHN@UuhXs` z07J6$(uSM^0kNDoq_zyl`2Z4_z~fCCDpElTbpeNjH*_`Ng}~c>!MOlFs??9_3B)Ae zRLwynM|ipi!E|`_T~OV5@RtMhz8{ESkz(ee^7xO|16-yAdcSJ|^RPv41lnG@xAd(M zAu2#GtuP7680E=0ld3FJ>!zJf=^#Rr_NiN;xBi%5Mud|41ge^~Ozn$)!9A-41CDlt z&b9Gch*&>%2oSSJ0yew34V_1@y6Bd%Zn0swMyse4zc40bW%Nn(2qsp*gfrk4j_L3> z7A>3K=)uxuj~qv}tVmE!Qm~3zQG|Jx9v50dF(nPu1p+XYajG zkNRWLdm%-?ZVek17?_zGYqu60x29p?xA!030IUPNJ+rl>G~C#_N~B=WS6#Bi6MC}_ z-KXeLg_*gloDN^4l?@`*nmNk%ub>2@24zCeBs3pl1;i=ekf#zfFr;FL7sd5o5x@(T689fowIW!Hrr-q z+_vXnAoo9k@-R-G-W*)Zm2+2HLSECn-Ef#*ipEc%j57{hY+WX0~Zz)j#M5T3RuI zj-a5&2F#UNj1+tJaYxR;_3(r@>mF1|9w$JS_Yn%)oI;JP+0^NRR%qQXSiv>~wx%u4 zts_3C_wK@LM*9st7Ri2VSb+Hk%$5Q&2t;jwjPfv|xQdyb_zyu`hL9Oj!V*mXsMV;e zw*OJT16&>LM*s9m)qYPmp}ul}S$KfG58nwDnh;UdbVcMcR3iEaC~@%Oa6bd)!HYXF zroTiRLjtsKKo369fiumlMbuUly+pR1rHR4BjlhN9GZZYnNW?cEPb}mf;0?@2>3yD< z!Nj&e00^)O+LnbMBE#Y*7l?%r7+n^E!nn3GdK$FEPgo`;1Q??k@a14=iv+2h;S7aP z;J_==k2Wm|6;YrJ5TGf*eFD7!U@n4EYU(c(U=zWp4laOZ1Ka_))BtWrQE)3U#1Z$W zgGX3&4w(8jjtN{_S}VDPr}DL{P(75}<<>;-#Nt&_4(k6&){SEyCH00k${N$7$Yq&k zv7~<2Jb8W4g;L!9Oti|K%d=S!6dxv1HfRfmcIBVt{T|)PLa1-`I7Vd8I$S~=#c~r^ zo&YplxxhOf^I0&1{TC=|ot#}?WP5Wv`o4fl!q>`wH(d=lHd`I8H=$es1p@v`$gA)e ziw*-ICa;?*_u=+MK|_t8C&`Fh5yu*!AL=AeMB@gt?3f8LKwxP< zpC!IZMd5M{thb?i!G)s4y#oNG8$uG?B+zy@#K40f>700y>6S}b+c+OvAqt6sf>nJk z>t{eVkQu%QC7Fx2hAhxLV-;sUQ441Fekn6NbY()|hs_|$G+)Pnd}GPY4Y0C#2AyQ9 zcZVSj^cNT+_~h;bvNTlFB%sPR(&T9i!Oje@yySq8T2D0KtiWu9W@w^7RK%7I4v}b4 zpte9FH->3vFkUd7TTKg53CH3*g@fkSqJpNLX2fIA?8#Dw@U>V5@i?((DzA(xK z6Ax{{A-OY_8xxnU^H~SALSo;oqa>r7;yYOubn%}0zzaAl=`e~&dMf@X5UhMrT4-jW ztT9s}#zz8z*(`ZEU6EY@s#Ku&LnaIBBk)H4XkAewBLC1 zPeCfsM_1T;j^sCV2}6&t(fJWJN)tyi;kz(QPuWYMj$xB$<6E}c2!_4|3%4X?b#M1- ztOre4rS2Xlxb8%@`=9>$SXUBa=5fF+W=9Jyh#jonCszIebXoFSb{}DPg6A|kVlyLN z4A3WvBZG?Y;p34eZs6q-49B=Ik>$U!tp^6tL_W5!g9nV<^93k?f;tRgFgu?G<4xhu zSe%>+ws$E6OcwO=nl1R;dF*wA{}P}jpt+hMOP-5 z5w?O-(GHX(kQNRHv2o23dkIV19(ol4Asm*3IuRiDjF%3jw@6^0i5&r?z z$Q#BoFaqOM=YQ=i5`{sQh=UHN(K`!_WJLVZ!V7&7atuV+F%YBOQO$vQzv zqB)&P*kcMy0h~Bwajx*85NyZ^XHHPlYpdE$ZL;sm7o}g!Ub{nBI@~{tfvZszwU8JK zD!s;qY8M#EQIyaxEniZh$O{bhpH&RtgkEO*f8+||on-*90-W0>GGN^Zf<~!{Tix&` zV#`zVRI@SRR$#Gyb<})vRV)G9NFfM3PTa5w*1HeDU$#}DVejb!A&_L&${*ruWq!8A zXrU;?qmPVK@@QIg$U6ybwyeKbQyh;fh+dpdIG7bwkc>c78JtMm;sj(Jl)sJ1YgnZ8 zwj>X^0T42=9DbDa04|z!ajWkNXO`6aDnJM)8SvN(IJ#(-lh+op#HzLfN!9SOu3-rT zIW@L60J&gfEOH4k3B_n0TWpj;pwTD`{6S$KnhVh$9nBjs0fRVj6%}HY5;B(rG|*_{ zrP3V%HepmbJk6G@?}Em~_1vV4o`iVdmQti4NLo#(1}$|_>?(8AZJvq|PBRS*06joX zR%x}O2A_bCv84be22|N-+yo1f@n;CKYNRf>E-^?5h`84sER%~wF(nMR9D%0Jp`(OX zX>+NYuSW9tgNtd5??tGGlWk(M8+8dWB1oPscm`qgD47)-mQkW|>0#3s`ScR7N--zG zC;Fr96Q??PRb;nCZtX8hw4lsz+%}?m7-*JMwC=h#=wLms<^mOlO|Ww!eI<;B{4JE252|df+QTKx2sf4D4;|`-3sEBAorCqGj z=*qV}ItaTwWQLZyN?tU_poBTKDi(=H&u@TaI{UJ|^@y`E3U=Pc|`t+fj3)5@p! zfwv`PwRCO%V~nu)owzHeSL_U!&JO9vwnnGO(NgPkL$HsO?g~^aF7CrEY<>h#y`e2o b()wT*kWXAeMzX)G0;aHJLU2Us_FHkFnk)~p diff --git a/scylla/swagger-ui/dist/fonts/droid-sans-v6-latin-700.svg b/scylla/swagger-ui/dist/fonts/droid-sans-v6-latin-700.svg deleted file mode 100644 index a54bbbb..0000000 --- a/scylla/swagger-ui/dist/fonts/droid-sans-v6-latin-700.svg +++ /dev/null @@ -1,411 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/scylla/swagger-ui/dist/fonts/droid-sans-v6-latin-700.ttf b/scylla/swagger-ui/dist/fonts/droid-sans-v6-latin-700.ttf deleted file mode 100644 index 15896c441fd59e5c2e9bdab90816661de06cdcba..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 40513 zcma&P31E{|)&_jZ3gEnK##Z%cX=d=_UTv_5;$g1No_e$)LPLh$tnl`n!f zRQGUm;rp(F*SU+9ue)sbYZDK`-=RND^Rkum=l0Iso`cYanQ)E0%jaIUim?zQyr%ti zub8`hfo)@%2BGWdcl54Wxpv(HFKMa}+OZ5?f4XYTf>jGv|K)mww%-f=orefoKm0L% zhdYEAWI^fV_ye{NZD(za%NDXFZ2h*~HtszJzslf^3^p@;3}G0|sCT_j@G*ifBX}{v z#LZ&F_OOJ-vU?1KL5a}NTR4aqtg!fwh zr}d@g_nN-KrqCevw47{twe`K$vqO%TLoYMUpS>k4!Mo_~tC$6qU7XH$=VUB8f1nr#ikT=# zb1WJ$Ik80oZ|E#ehAS&30aGo;MzbZV#;h;8_};!9<5$g_6)c#)WOmrdWwg(p_}%8&c zvvP2btHX`m9+%d0c=&A6>+-lfru>7&6ZnIQ-=Tx1;GvqQ{f8p99Nxnm(mpLclqErl zGlOSB33`u1aHsw28BwS!Ds&N;;Iww|Oi368aWII2a8K`y=16IIw9K2sb0dm^PBm2= zGVH~M|NYOns;IOi)Kgl7pUv;MYWDOiXB6g5|3zoV_L)WGp22s&W`e7J++Z6tn3ie4 zqW&#cY&*Ys%d$r|BoZ4QUAAoB`r6v{`*7-f6L)BkT=?rF^skRx_$!tD_TewMqcFck zD2^tg-N|u*#=QK>9hIaq(a#wL_VL=v#)LA`+- z!xNZb*#E&u8lHzTdV{_VIzuPkYhkMMaX^i!@)gB5BgGeVvIN)Sm`BY>d`2hCH^`wH zV-RU?jv92f*;0$Sqr$aw+g@CE+0h%CeRXrI{ZY5NX89eR9k(p5E^1y_g;{UZfNM1Q ziVJ5VOl<+aSF{D; zq*KG;VGjR^`;a5u+^9Ve&4FkQLg|bxNT*k4Oo39t{3&})mD##;-j zg#;o_hV;^FpQ~Sz%H4U>`OLMiID7i7&aOXy^u@++@mnvZ-}(4o^x6s1!8$pjAwr~Y>DjxRnC z_6P9teBLPRTr_AE-rI!Z!m*7wKkNJ=yh@y>ndls=We*?)awpXiUbc@RNFhUy@hk&2 zD{`i^6o*Pn;Q_|uiF!aZ!TRXD_%-1cyz(SoA>4YB^q$14g@Kd8?X>U0Is69t3Mo-X za@-#E0riLKvuZY^PN>Nvif0w1TG6Z^n-n_~cPU_pWGM<1i~`-^=gPSTj^RRlf}hMY zJa?}m30p>iLcuR#oRN#zqa^GDiH1pydGH(RKqzASAC1FqmSXFB<(FTvxuWK(?OUSa z`ry5!gFFB`IxDFnoQuQTpm*q5{0Gos)YNLV{4UpA!#vh$=CV<;3t8Y(DoU^w)!& zNln%Yr+yC4Z zHL=T{zjo_$TWg1!I+iS&J$q4fVzH5&d_wrgOMQJWVe=DDVCzc@`(6@0d-D9?`N1EC zl3V`(W10hFDuG^UkuPb~V6|F;R@{xT8~1veMV7GM!}$H4&Vq= zBD{eCk8)P0TeEi;eotUpvnEb%^YB)OQ+e$(cq3h(R={)t(9Iq|i#CsRh0W{IBuN|T zA%GU$YK2ik+!(&-SZU%IfGsLG5g5JrZBa0|(J~k+FA3B#7j&csn>=h_Fo&D#Sd*rR4>}mdVoxO53g<5uxj0eruNpmgML+^8OaEzz+5HfP)?3cmcIg_>=m+_(th25j_A0Yjmx~a(%a-EUazI3 za9vDdS+7^aN=0C)pvNGVXT)yPUW59Fcl2?({29>V#2KvZr^qEzS_^AgOoBL?c`7dB zg~2gjl}ukd;fX`_11E0V@TbnA-rl*vjx}`--_&d7E$W+d`9$a3#o?)qRn~h8=C8Y^ zdD#)Dmfn`YN&dGjN;^!)m=34uH;vF6&8?#e0QqKT306P|c;uyv-v;nlt6DhrrH zv#z8#n}s#0fpwR|3Rff@GDKD=BzH3)!myrMRt|IPWmYTENq9mGbM6I!_dXdhviIV`E-5z<##ofbh=HZ^&|-eL(PH@K+ct(-%3u#UbHs9D0vAFu3@f zs-$*&b&-YifrFZ}#A{XSs)XJR0i? zu0TVrvqh;%Y3%D*WP|}Bxo>R_Z(667kE{#rDD0yktclp?bWk6UP_zU69^?LG)&!8w z$T}EcL`*Fv9bFGxR@b!i-Ron^=FiIY&Y9m^bj^)B2C}N+Wroc$h83h%_jKmL%2f|7 z+xUm=4RV9ih!sY!O|x>-vemp?!H*|rg!95_2^BVinqhoF?lqJT($cR? z6{&d9-4jWJ2lZAdsOJa>l*fdR6X{S7b9U%+_5fyI`hm_PgHkY$9_|Kc zIxM$fj=^#tmO8OSj^$3TvYX28?ia%@^W1MdHoOdq^!QKzPFx{)Z_YkRJ6fptJ(kz0$qfRXL2kZr~Ca( znmRLQz-h@mgQozisfD55;sq&_IGGE+_87RwWY8T|1tF|v3?jONRix^!n2_k~2QE+L zHS|fZ8yTDB8j;_|WPv;fb9r>u15|$9 zAiJmrgt&CQ!;X17&WNrAL^os3#913LYNhmrI76TvSt_A{s1qz3SQW3Dxd4i(MEf#` z_kzY`U83mTN<~-in_ei2uF6=^+%qD&Pk`uN-zKp6btU%B3!=+3EPng>-(Ciy%dzL= ziV2(cF37Ddz96~{E%$x%u`@Au{E{oih%Ss*#1!mWu+ae&PPz`{f0%zZp9$qB@`+Qv z3#KH4E4=sEv>9xX5CIS6T7HTzsL8xwy66CZf;avz{J|`|2uQ}|8w~m5M4du0+dI%l zC32&s$+kz7&nii^vRO$sDR(IEQZjO7ma;&}DDj@0134e&oXufFIf)!%*@aYERga2M z;XC|%Io|-lEhI@uCIcyueUDzt->ak`PDw#rW>o1fMc@REBK!c9vy5Ol=8|#Y=QLxm zUFgzLePtQNdgSD#Bf@AVdV|$+2=qlya$Ff#$MtgSIF{$k98g+Pz#|;TMEQE2n0Y@> z_>jn0RI*Jhv9b9qwP_4?Ghrsl8~`UBD2LUc??J1;n&{&-fQqp-1R93Qo)1T{NrqYN zV2U|Bw1sSc10Nc|XU_^pg-^w~orxa+d`&fu1eGS85+Y;KL>9A;?F7@vO$3Yem>VbY zHvAz5?rlUBoB?nj5x;>VR1CM7%whTsOyI-`Ve`q8aB2ADkH%SrC*h@dGiGJjh6xAk z8OLJGL{U8=X5du{i&dnuYxD{WD!M0T)~Q=8ENH+; zV(2X$=sr++u#b;LLSMqyU{SCA98i;}83%Z;KNf$WQi65ig34TN%jEjq)BA!8mai)B z`^}n~?AX*;LDZ=b-m{0tjgpb$C>hxlnL4S_?C)xy4uGBv_h64W=`$L|1pF&*`x>B+*5ZWA_{nu^AcFGAh&q!)X=F|;VwlsAh@ zXeVolR$E^>WnNa+6t=n^g^nd)h}vURVF4F~LoaG!?DVmN4h@##qWf_y}YQr z@3!YYy7E3Oe{tDhZF8caCSYQe#`5f{_K1a??SAIA*#>VY+ZM_-#TVZ;YvH3?>g8Ia zTu7S>_=0kmvV6|w`faaYpXz)1{4cM6_m)X&YmV_qLCM0GuY4?A5WW%q zd)53vdcs z!fRq?q!{s>TFuIk4FnzmakxsdQ^EpMDY5ojvDS)t#th6iORu43Nw?W-S7SBDut*P1 zbt00Ehrqm4Pz6POFYr9|z`|1I7XFolgXv(}1H7{bdt{ga3Q^4gJ$=4ckS-8j!zW?; zeIVGoc&pvYu_~Qjt76i4j_@1qx$||T2PgII^LxGK>Irq##R}mF43R;H0oxn`ylMe9 zFN8KEn|TBFvl#ZSUx5|5HYav=^1Q787j)_tmbgl=HMi5!8&-IGp0(nKt@t`CUTDS5 zR$OJpHY=7=J{#qBoAo)}aPl*4>!ju>IK$LxZ^d{>s*{Y4Mud4%JMJn9b3SBzGt< zM=~A)EzM0D5dAV6nGBcX-kG zWN?8B%RO11PR|n0Gak;ZUg$gE!@GUB$A?`$%+0p;<)Ur}EAKWK#Np8q(XnYi0fmeJ zEsX#m%6RES93c9<<++hkGo^MJ->ekn-im**1(joEtHvX`;(;Rmtl^_8*kV4xEEOz66CURXCJteih-Qgxz* z5M{x}PQKS_=$0_XZVn`hdf>Ewa1(uSp7jDGrTdhU%R#JzUUJ?AF9@GMA@t)%@5jvH z`NFFu>1$^Uta8F#Fm2$BQ&va3v1ITN z(=_x2{#wuxBX>v;UJ!(9#dU)3Wd!(M8Th%C(%sQ z0)L8Up&KhN?Fphiq5a7CAYwO0`T@(A{!vHp8R0n7F!VVF8$&yB?a)P8;V1_XKy}Hy zKtSo6W84WIL4HsEs(j+g$Bx;`zPvzp5T*&`Si5y>PPZf@C>cLalolEspl%SXiRb5J zWK_k140*`kNL}TI{u^3&1>YCwgN3QO5e zu2;!)Unj-QQd}j)HVAjne{!DTMzj+ciqWM5k%tA$EFN{A=`Uv#-VijxA8|SUCytVh zLpy*IJxXdab2lCY_oVpURY~t|)v$`Fk}7Q~3}3EL$U8Ygj=-AA*lwPYGZ}#?O`HLH zl?Ec}3sB!@=N-cqpEd+2(lnO|vLHFcCv{GD%7|?Z`Uf3fXSVtG*a@>k%3Pq%+W9U0r zkmPav&!Od+HMy{tIh3EYh1di;z&_5hti&b3OegB)fkheFx*0|C7OVhpu<&~rm(%_E zL(}kxT=BbL+zxcp@3b_?qdGkNVR9BcX>{On2bMT2j(~#!z*&VbHIMm-v1_k0>@}P- zFb1t0;zMZ%QmYmE1+JOoFd;-$E^m+%iQFO&$QgN;qSu*Ch*rrc)CO@ATfkgGxKivE zgfU_1bktOy3Bn6HAM_^}^kASFw01V2ieVYI()6o4cS98VVn=JUFVy~!L0F3&AHQN|3FpJ$a>!Z8R;I=@98Y9C=Q6hd5w7GGK@gu*Vo#`i);pMOfQdotWq@Ooqwo_(vfj#WD4cH#HaY);#>$Re8q`?}ER@$v+ zw4xQDAUI=(DD^=d0I-K5=%BGeFpnTKfK2dLHQTS=*gu){S6+Q$Pb2K8T{pdXb&}y1 ztzW+Ak39#jX&XAn9ZIZ!n9doXtrua=?0}|bBrD7u4J|M%I9pP^A11IQRFWtm*|YV1 z;VQ`_$!rOeFTn~4mhf7hQw6&%tfyOLfP93Eu^hpIFtc=(M@SiwjfEYaBjIzv4gWEx ze(6jHCU2aA7Yl2IZzVjZVE?a4-}v^U*AxoNaTCV>$JA#qh+Ld^C)%29lR2@>fjJF! znzF#d>|{-KCSuaiTm}cq#@X8JY`4pek-SsU3x?`hgdaxuI)oP@+$^dEDeR{nnM^M2 zRx%E`sPCwACc-P&%)qF_9F&d#V_3R4@KhPyH@a{(%DK5&N>oW;;}aLm6aH%2m8&k> zIvEyjC%{Q%-h>(9l{5W~6?&bpS2)TPE$m%1b7<4haqiHG4<=pS-DrLC#_Bbl_G|(s z6Z+IO`~`CYY(7L}lSOhSiy@rbe zqk99G^$1-z=z1xvp>QUGM9iDS+U|Z3+l#Q_Q;axg89AC-lEo~#@yDaw%Cl?Nn2-^;$8E8wX(VpG?FEl zW3=SQbMh0hGKaNp!9ef4dzMzHv;4Myn%ri^)OugMw9Jz?b-^}qtog8)s$fkes5Tim zBKwn!td!xfEGZ)z8NNyk|8;^*NWx4K;K;jypP*ZbQtANdwPzqONPk9Q0eKT3!7Z#_ zxRpH!c_9MK=YG&DN>OKqli4yUma*8}i5EbQ(14AoK{^1z@&+aKA4(IaMFlUa7K*N^ zDHgdk*qbBFqmEP7-}L8!YmeWM!cljkE3th&#MoQcyu7o$<>uEeXZ8;+p0ajg&YgEL zzohyC3@(iEG)d@V4i?3R46WZZ4feA1;b;p7Gu3ezfw;`>V^}YHaTkqzli98 zqscgInH@UTS);}Wqz--!P6l|;7_%3`o)yA=A&%qcIGsVyaeAYH%f!(Jck0@DdOV(< z?lyg3<;DghyHvkf ze}kUUYvt;cLXUfO>vUJ?m~vf%u0zN8bl9xJKrd5v<_64U@CQ?XVJl!c1_-haT}3@X za7k<70xZH96?OQo@ezv;`)1|t$R*ZXoGa7ova@AHjKLPgQB>YjzN&m@Ia{tRFR##4 zV6zb$4fGOzLo|0lhBwP_p{!cgEMx34EC*|5V2ZMh*_a1Kpx29bjM8vAfHb1O(8L+) z_FkAsntTMn29js!dlcXV#dafiqc8Ho59UNaSe$UqQp?2*F!Li%c)76WAgfTTl`=wP zN|j2zW#>^!lp$ zrn(C45QG>9KhoATPiw0;o9kNI8)Hf)GWe!4*4W-sXExWjO>3^vGR})~#<8%L3Uo!X zLMF4xh)ga=3Ml~H3`+*0*n?J~oxu130#t2N;gu@POU#gN#PegA0wc>xjZ&f@c0!~C z%Xok+qi_(&I+!_evMx>^ojrI5CXnt#h~8%YfnP7&C%g;42k~>leB6QcSczMOE|Nd= z9{Gm6JoF7w4~fnT?2$*I?^3inY32ZeVtibV!}0@iqLE{10xT9?Z5QN4;m--aiQrzc zjuLyAXbDi5onbI}RH!~nDrLG9;(5&Kr4VPPStbyJ3&MH>A5aP$2mC-sMY1Cka)7>4 z(~CW%152TIb;AE6)ZsQ}4(S}+KJ*$1F&Tdlv@`31KFZL#WXWQ19kd2(N&$}$XabGZ zBU>fgDcd7EAd|>s;3OVbe63*G6nL@%_km+WyPpjoVBtACnLJKjfOrm(OW1BmVvu&z z2|h#J4;_Z!2oSKM$U#5FnI3UOG65bB>l)OPr9*p}*N1kIEytKIF?xMaFY1~B$kSL0 zeUzY_q=7}>@ZdmeL3MKfA?Nu2;O|l5G^jY3iZ21QikI{1B4n+QjsO1Phr)CG`FGAg zN_CbJeBVs2B8}MtgocTx3W`Gv4CE;q@EjAf7aamRjWCBE;q#$WBQbl7BG4a;*dKRN z%b;)YS)P}n|46=P{==RVJ#;!=u{V~84s!c+p2(?F`0qmYYHr#)-$U{TMp7b$nCtT( z8?mG_NTH2M3cRw7g_J5fG>&B$IV6oDak(Lp09?0FL}ac-V2;uTfZ|@zyAmlrv=e{* zweWoXKpkXU9vW;S+wT{ifs9h{;Rc20nAYJGj7)!sW+&j^1Bndu7+j6cRn)_@4(?|l z)8A5mBpk;6#;_7A-BYw$Nu>o_jIDMp$#0dIAu||Q2(WIH z;G<&#C_@n*q+&Z2Pz#^8*rXFvI{jGtCNzQFs(Q2EQR z4_OE}LD@h#H&i*|Q_JaG60`?6v5k-e!zBRUAtT#rT$slg6-B{a%z`G{aH$y8^#7(~KxVbMLUv%rt ziWzkglL_L(CDb!hn#vnH!;!{9BQX?CET27nI=i;%*7tATdE(Y4YhEOKGo%*n^0asY z0dI9t*0;W<)ss9^woWg6?6<%DEsO)BK3KEoAhzT})k(ixLPOOmWY^NDwC8{ahdh%# zD?N;{k?U~lTI5oubk{^? zcT6kHo6@<6xqHp!k&Z;}V0*(&@7;3S|J>Fz;TM0n{PyI+M!&-BF@Irl7-6NT904w7 zQoIX@i>73u+$vK;KBZP`57{5L6RjOvNdqHcIMBwBISzS5trDxo*rLKj+~)}~qcb z`_h}Hc^&!>75bLy2Fl?Z1tVJn^FI!?qcnOhc_k+?Nqo>9P*6Sx!`VzWpN)}wv%Cdf zMjplTcvie1&Xl=6vs_%f^pdG-H&(Ua5t7r#ZyzU0=G<`D$s^PvGJ3A z-M*DR#^=+<+_7+siIs6=99!Kn2@)0Lra9Hx#zx%O=C)62#jPd?Ii(Y)(+M%;bWBVp zr6~r-VBp8%K}aDL=OBHI>ONT1cu>@M@GdqRUugJQXm)e9<_ue;e;Z641GVhAI`cyJ8aR)o+!AQ=;g6TK) zLZYM2mu0ePIA8PnDcyH1jR&W!N!4^F^As|7K_uHK6I&=tfG%0$c39VyO1Sq7rEvu*|C#Sl|ZZE3N$*C#I!nL$N zTz@d`Rja-6U_qr5rxeZ;nO$8-GhPirHC&>088*ItFR78-(`3|(7~{` z?{61=!yAX1gb^9UZ$qCM$iX$dz~M00!|ep{-hih;Hi8IMGUhPj)#JD6#i0GjXZf?B zdkVl$C=+QB#sWEz4c}pW7sl6MAi8k{#(vS;#8~NV$eCW)U5G`=4qCNZ>LF-B5orTl zi_@f4X}lRZ2SKOVzd@bIb&AqXp;6|C#vLf#Jx25+7BOGUJn}&KPZ2Vi*dF0{s-?1} z7&gI~e4TYdTW9gw`-3)XUHk0fgHH=@rCOHH8Ql+T*Fdl~&oFs9y+8WiP=NmFvHLIX zRTyg)s!Hb8>mggzWEHn-Hp&iXZ_CbXSG$&Lq2u`Z?TUW3*CTs%Vq0}vXr!C4qI8?Sz0}a1+>enh8RMSdy?iRCFtx?*!;00WB zQLSCTjuh7fD0eSX*(0k9^F;UVCD`$c*o?X|la^%}PB?xpsHT`PHsj{Lips^e92$!q zjTAC^U0E@WP{lXhLRAt4laQm@#N2i%b_9GjVpp{p3=Go>lk4I94c_UcYM9uPTzihj zt$^7rM49RR8GPpZF$j~4fh?9tjH>Li@)(3807rJlCKa2pA=FX{7)(nC5C^bU(K)fg z3~_ZL9EbcNMrDN2our-hWq|J92fwCp4F)lZ!nJ|#58-zWal6|?;o5~g%A`cOO+Ix5 zC_I{T>VYlC^V!4MxB)Urbb#pX293_dnd{Bd&5T(=A^ovPdIZpm=9>Dv)D~NGl$?Yj z4(PkMF62|C!*1cF*)FHNYJOutIR0so##&K7gIx=-jtCzSmch;m%O?-L#&+!uHk43z z3UuW>z~(!kUokX4navRsc;66thngoEFflct8S&$B9FF5+8VRw%d&LcMS4V}Zmi}@|l(d6Zel&ct;AH%rrNN(7> zDAqNtHLoB&XNE7o(x+1u&zdxGX?;%hihH_y4}zz7eSK|AUC><9xTd`jH_yCkW}!rD z)eo9-Y-)|&XEJ$wp&ZY+wuOzY>stfG#qZ}97y6AZPaxY>)4GU`KLq1fbIpJ$=Ntk^ z31bFd{z$S+u1NugWEi{4KH1)FUuoZMKV|0)mIkF-t?gE>RGw0PtsGWzO0&9EuQg~; z3pkVzFQD1Xl!g^~SkZT(BMM@%%*ZZwm=qA3Nz{z((J&WYZ-cji&7 z70xD488v@1J94HEEF<puuZ?9{?xFy z?Fin)dWqSp0;?5)T=;ZIG#(VPfYL_kQNI@CXT*htXUyh&t?h!+!zCG2h)V{7HF<_m z@7>GDiJ`R_%tF*ZJRr&s+_7!Y9~Ih_bnz;)s$A8l+Mqh5dR6s_>N}O}1TeKUfc6*Q zN+{bytDtD;F^(3aP3ZUN7&?Q#M=}*9rxbgXN0bDjaYvG7Xr_enOU)|HPR#+$pERsX z1GEOcf(k3J#tc*zP5Y&2EDdO5l(>Y-KQUbg#z`}E(jp^?!6IRxBr(20SdSCG*V!1J zu|fDWDXeF^hPKBymqa(l$rd^&k(=h%!k8l1^7LOKpG3ZcvP$~RTA3zQLifeHxgY2L zJD17L*iB99zrFwPp7%0dD>N|DjbedDC-i6Z-%~;u-h5a0vF_hGMn^v|1z!I*`Ug4> zww`|APW@gz(chta2&x0J?#O*8m*n2*-3$4|vO6LVMM&gMv={Vw0iR{g@@Fx!0$V|T z0aGB$VhkmvdrFU#l2SThI{3y?ZE18{bZ3-kq9a6bmZ8K;NzaKgMrP0idAr%(U}x-h z;0-`9sACP(?a;4hQ#dTVgF~xj2Avw#PXgh08FM*qThD2AO32I7=uNWq zI0)G0T3urDtipvGH!dpdoSf7PTi7l^7g)Y>zQa({FfmyoC)T0Q%ySmc&$i85y()me z9+5{m$m2{{I|Uj_&gQ@`0@@T(sR*`$7tC>YqkZTZM5muyj3Dy@(+WSRO2|V*j&K9t z!T0gI_y_qz{9pJ_c&UXC@G(ef1yPeRh*emDr`#hs0^Bg2bE*z$TA9kF3aK7fLFG1x zs0F^0s>7sRA+u@?sl|}sA;WQ40#X2JnIaJnD(1ta8~)hG;AsplXBrriODnP==1nVs zq;eCB#jPtMP7!a7uXa4^WwX z8{Q&3_~x6!gQSXhg#);4=!2nG@Dc%vn?UZt*3!M~MEjD(R%eBi$RYLYJ2j4}v6-gG zap;T^|F;t7E8|L%rNlDe)%K{4Kn@9=jzX9YNCsB9SGjk(54iv2W+BpOGJS&ayBP1K z__G{*ag3qbiB?3OP3n>QLk?C2Ad}N&)!CH@#td~kB4;Qw*C!e z8$y1e-zEQq)*5QYbH>6`&WBZ79)d!X9S%;2-fZp#!oe8t|J3$%|)R9fHFO=-Rn} zQ+^jX5btKQq8E6RCAmd;XpCz2@KQ z5!N|F_t0vDufUAa`b8+>^?RcLp8{guIWTP|9S<3T;NpP0Z!u9MrNgf6TWaTy_YnX5 zecSWK6zHh!CuWbL?06Dt2wrTMM_i3zjl?R1)pY-+A_T)Z;3KX)LPnF!&uT;BR`+d2N@-=_s zZAHnw<=_+vjH1aV80{ zT4oE%MRStcY*(R+xboD7lu79mE#qfN6})o#<<`gK$c^0~eoq;#Df*Z+KnauE?cg3u zsJo#~GzsajYRJ16%k4mC({dl0$c?Cw0pm{Lqa-TU0TRg0$7V zKutjxtfd7lPBzU(V6fHrXO5&G_*H^85)5dFKmZ6s>SJ24BWt2rEIBI@1Fx z2*H^Y8c4@a482NG7F7mNh6&m*^HUP;P8%X%dM-q$N0^Yk-$sN7ao43q1f*-Qg!5fY z^uAaQ0sCnHaN%jlH^@ixpHSx?#t>RiKsABkao-ER<3Pa$_XH0Fj|3U#3K7b?LHF?m zE=X=Ap7j{z**5em2b8;^U_!A?y%SagA|8=9)2BmkMovS9+h2mG)7n?lG|&LFWJ#EV zqX;OGCZ0aPK#FPx+>4GO&F_|&h-(g1-Bc{TefE?7=Ej7z!xXKES;x<*%35<}^G&6Z zE9#AJsU}_h+K!vvxNef_(cb{-GAcfj8#J=Ksr@q^+3mA_?QoITVO^ju8f@fv7@Hs6 zom?W%&B`svWfY%k@Lx3e6)45j%+TDSVN5_Tt?>Nb@x6mAb!>K!N(W|D*r9T$Os=G* z$3iU3eA9LlahsAR!h-*zc5PJL3L@!7)<)xN#`lbjajn^UMuJ}l4h0J9pysLcG+?#U z;;_>hI#$%})W~Iu+au5beke%AnN(fy2Y^)uuna3I0Z$EfVh#32BXqXN-W5-^r5o2L zCQO0|<&*ZpsGU4Gs4bWfvOf7_V&z@j8{wQ!9Nw1h7f%kvrc~ysg~4LU(Eq&|>WA8|S(^`aa> zc#)k4Hph$}PwF)WgA%g7%o@{V({9M@WKGn>SDQ?5U_VEt%dTEa0VVCWSq zz?C;@5uBQ`O}SIKNBJiuuav9LYa|+8e;yJH4-dbe%m$?<0hu*Wy3^n?>^3kOgT{ar zdh>-qwkSK)Rz|d^A<7ANq63ybU79WxgWHhNPpA4~DnFPIP!SQ6*F1BTL7X=F zEo6!8dg0M>8uA`8J$e0;AAA6JPv{Z z#9u;6!{fum7|sooN?ntV*mU_il1~kr444Bk00J(^hsh3jm75CGaWt6F&Pk;$)|R{$ zrsZyrr}8dsVCKr1{WAwCTrQ0ipKR* zi|Z?D6SZr8yj~l&qt=?T=vjzMG+3w_& zzS)Vn5nkcQEzEM4M@noGdu}0Jvvce@=0TnXe%^KX*yb z()d;J_=c9HdzO}$ExvE@)Vu4NRu66|pL_j7M;0wQ^3e5j%f~*yx#zck?eG8VZ+k90 zGT+?3{n}fcju-6Cx+@-Avhb13b%i;5D}^(ce0ImYXms8VxaQao!$rhB86s!N0Mw|` zJcBDKs>~d$qo8Tk5_%^B%sO}?=d*aiX-pbYj5QxQ!c~YYxQRxghK9sEeJ$vQSH@ zLbH>l%;)?Um=I16AdQuOuKGf$P)I{`fZ*NAG;%p)b17NwOPLh%q`u^tuYqd#lAduT z-cd~qr=x(8hj)hHhyyW%G6aHSz!t_J!M@BxJC2?_apEML332|yGg5HyiOQ zS))%Wq!EuWVG?jSk~t~E`;de|%wsHMs({WC(~5nN_z?B5+LJ?G=I}YZjFwLh52KpQ z5g~e>CVxQ+*o7^mf>QD_ltR}hXYdSCOJA1hUxq&?&;TMdt7#h$D&QB|tZEeHd`=6@ROSQ?I@~*9^RDZg>Jl&s4ZS5+5!CDY^yW^nVGmlnwb>r5qSgdPnV`^(U z8clCaxhm*iD_rh)&;~W|(4Uzg7+Ur#pp-PU;7Gz^wL}HkfwFy2$)tKrNsr25dFoU@ z&Kzfk8IDOpY63VZu$5`}jHVWePpIfqU`GcG&nW@y#niDMbC8a33v-w}FmjmAZNeL3 zoiEVTP_g?w%!w0$i)#X96amen)&kKW{SaB)8n??Ga(BB=xxaP~yCs~X-m$>JI5c|z zcc~SB*Z-9`{*FI@xr5+{XvM*E0{eXifzgH<2~RWz4(^ZD3gB%GAfo4oHhoxJikxBni=hd+A?x*P`d@)VsN3Jxi5VUWY2@t%gm8&At3nurUM)Vd?5B40;_Ba(uh znmu4gquP4ybS>l8;(R(_W?}Zm>}#``>`#t6@FK@$j_Y8g4nw~Gfl{{2CC5gB$hgY&pW~yjqIq=!&K-&F=7~>+&yJhq^ZPu z;F;#D#*M#fVpF}=pA4^>QeuHnABi%G-OY6 z%Ca?fyPAe2l{AY0h$vvvY55gIY$#^UgeJkQfY<=dW)j)83*kTyetXgssQQFh{h6TB zyLeK0XEr$R3=K@1PGiq`8adsG*82Yp zX#2@&3*?=j(g%3>BxFy}_=p`%N`@KA##u~8qg1NfoTe|uwIfmxaq@Ha2~-<44HZo&Ho@!9f`=BumqW-n`jWyJ9>7LFG5mZ0h@?< zgUKXqU5<-hAWisrV@++DQfe|AB{lsSuL;EgRUHOYWdK|?>-R{PQ3}8wf0m4bG7y0k zL$^ZKN64e`40s;*yx=+I`P##KD5+}vXtKOpJ^-a?j!!gywS2-2io6}|mk62YBvG_< z3HvkVkSm@XE}FKXseb+RP|?(llj_!XgoWl^`rzFow3=**Xv z(oi?>+mP@JkfAsiL>3(tVMuiA*`;5i$Gc-H7s5zzpv!QaXq z(gx7m&AU(Kn`lP;1-=pV84xP}@LnkO69656$4M%H@o;e}1OCvD4JO)RST+DN$ zbkn8sMD|}ebE37ie$TXp!9IHC#IM)X*ej;S^5M)0oc~j%k)AoxaN*2}s{9YKigJt~ zYSZD&i8gxXMA%h@J1*5FumwWexQWuI!+k=7j8$UgOEY)2Onhb!E7%p@!?<; z!M~qrzX3e}?F~tV_<;_KbF{U%9+zJKC~6(ko;VABwtdgV{=9OreHL_xvFHD6`;Lq4 zm!M|2z9k9U1y1Gyvey#Le1Z-DL}YXf@{!N*=;h{_AQ`~CM;MZwT?XL*?6F+<6r`F( zqd-8w7KI=FQ&4LfP%t+peQiAUvi?C5C)hsjtOLZh! ztTCoECd?Y)tVx|t)%~~}Unj@Sa$F_HHmJ0q>6IG26iWIv-I-G&pkN?#dKqOqAj1vH z2PgwYOX+EbR0Jp=IKY#h-z1h((BgU;CI!SN-XJ5-%YQofv=G@Pyg~-pnNVvFMefA- z!)!cfu6SI;NYOo@tX6SOHi9{a0}^0P{GuFOjt615ibL33m}_;50wA^%i-|>f$s3WE zr$u?eO3n}Q^SuXkR*c5T%cbo%Ko!3zFX9JUl!npP7q71vbN!>pHKx7R;QE>NL_*g; z)1M{CmRWy-?I18%p)AOJV$Av%fWAE`eg|+ZROW@hjvd2&myTgPwEyRr_8W%pAJeCv zOUE#L76N;PW7;1bPL64heliBg$0vUm$B^mIlHpotZ#y(Hgu}zb2QL`~6iJL64xt58 zw=g+g*C}1-NqF$gEL;US*VgRL02F0nb+6m5>9#(OU%;m@a~;Mj0T^OjC8p|0shE=N zZnfS*4^5y|Dpa^KKa@X*?8=wY|AZS&v4O~#0h|hYu!PA7feCdjm#=vfoO-GcBa%ogYskQ43(3ElTl z@&jw~3VJ28Ca{ViSN}SgT_7JTS7PC^ILCmz=o+GFVtbM&>aBYZ7M1;fw%-7;^o-u3 zADEaI8f~4~5413P%=M3={bR5H-N8`R=qI56nf}oJXZrIBdS>MMLAX9CtQ>7U=K54_ zGwqk4gV5e~h#@hmwhj*u9n5RUd}c&)F6%zOxzkFvW8I2F4aP~+p^ zJR@6(2Jh(U2;$EO-3rnEm<(HFSOVAq-zmI%2QI=TcLNeH+9ezp-uWeY2N&Hfyo-bM z<=c1SBH>*)U>)YZJ(Q_Rp+zs8Q1V& zTUUC}9m2=z=ZYogdxAndr!7p&&_~M8Yu0yN(~I+-6)Vs^u9ZDk&KEv7d~hxKM-a-- z*FLgt$Zjw~L3+k^h=r}9TvUw6U5zLg*ec5D{)3KO#aKDLwEYJ3RYoqv4|H2yqpd}` zxU~IIbko@D3kO}n(NBQfW!h6Y%JgRmx&_*U{UbO)^&ev`&$J(-|6nBSqv9B<#W8F^ zog;mc!64dYS_!$J!J(;G1TGfeYay{Jl`Q1pNYl>UAU+Erdo2OkYXeXr!RZu|PHAtJ ztw*8j;T7;J0lJFT4U5q??P)Eo@;xmc;y|P9;(4Q)@_Q3aCeIK~a261|JhJ|Un{J?I zTHJrd!S(T>E2nL`Y{}Y14C#9Gs>uQzF7omCO^*l%7hk>%s>*2*6BmS{jR|Pw@2V-Z z*NYSYuoo2d$KZ{I5?s=V?vmgXV7`U|R1mLsV|Sx&M*X&WY^`XBPpgx-jb$wf95;RD zz>djQBh4~|(uTxHlHosJq7q4q7$7*2h8qnY=NXMS`5DIvq6=cYmCbM- zVxDp)WuE;_Ffl0+v_;$FCCyR$KmIoFmTGH#b6ZZ^Om%)!bz*wdoL@V!Hotf7vZccD zrX5GtuYK<76ni2$x49AmOX0e?k#RrMmc7_^ z352~wWLK*L6&Vb*WLjq=`(i(yyx9I_7`oWLjJAiQgR$*FKfxIJN1@oS45X(S-Ja}{ zt0B1xvyfp88^_rgXwXHYs)7`jL4YC^^37s%Z;PfaTODo}N>6afORT+2Tk`h5nIo&r9y{5?(VwKw1ia`jH(DEtv+wFg; z4^Er6xxH?4x_oTq;9r(+eQxDbS6rx<>Uw-8t(F@4)tG8REVO4ztU@W{g2m-65$mO; zh1;*ZSWFcx5=*H_*4RQrd^S^jm61V6wx)%8us#i<-nkqvf}OO?hrk1tJKp z9K{2)aIQ|2*HlJc{~*e1fhezs4u;Cr)*s31rR_JY&Bzt~z_Nmp(bhk6?W6k9wu+LP zpJ_{FFw>7E&{mYGIF%`70sMCdv>z8_aQM%_1HCWy`z3LV6NKwvjHZK_)lnRC-$9vn z6v0AYZUF`R%Q5XY4AbB@>)^b^qjLuh|9 zPy3(g&l0gc#g+z`8R*gQK{n?{2$`TJnEMp(1^+h&$Hh%iIs!AGq#_p?6crE%-BwJ?%4#!HGrYEK>XMtRnVKmbl90^OvirTQUB0plwQgHR z>Gl#hFkw(Ic}8rRW8dhDkaprf+WXB zx}%eVG(VH$(e76@%d$y5%obutT)X6WwA<5IuQMF>1vJZFP0(Gi`e2mXMFXJhgBw};aET0aX0>Zzb ze;?n2^h`cGlf8r8!t!2hBGfg^bgSjqBmLowHhgOMqVUJV`Ebu8+x;2rGjXa1bDRF; z2xcd~qH1RJb36S?`<7HoQgtVMLZ+LSD#pxoC$rGXGcSePXy zX#6`)7&vnti_~;FQ*J^W)&lrO=dz#EjX!l!Fvy_>ImlHr%1a}EA3nXBND>rTEB+qK z{3VvTId*3(Yl^kRvH`Ko#Te!*BhzbSl8kvqHq^*qxhzzKG3Wj%I?y->C`uod9aR#= zM-`LmpY7l$DmWjww^^mvc3YAueah0%n_X=zHJjz$FXWG`c(}z;EHa)){BE zZNd_v(BRp14JGfcpF53tE##2{*0iBn9;!o-S7~v9R2Cd^SwNM9n$aMYo%_0lSWU>7 zvM|=YimW1cx#~vhrOXg{RrgAA8Y_B-!@t-anS4d}RKDf(EXAOj){L~xkZMLlj_kuN z-^hcc#4P0j#}T0XufoQ6w6-=PMT?Cwgf?N+Iqf$~i=;3O5k)ej1@;{JQja8=$rL?n{FPF3@=lrRpb+iHFXn)_ak6D|Uy*ECT&7E^YjPogXm$ zC~y9N&)u{ofrg_=n%lxMfF6$Nofv7 zNkZvzjWk$ikGu%rbI;K*JNcm@^P=uHd`!a^W@2t?k^(WA3#ysfDvD+?{+_? z75Y6`qp%$7TK!7kV5=4?8LggodVcD8-c#x6Sq$~rt-LmP5mYy+HY!!)!cl6{bTHCO zIe{)Ij#Z)3OgpR=xnIj1GDd42P{vJjWW>*!SsG=G@_s-WUr;?cuI$=UML|Yp*^Spm z3$YQCGiTp8Gddy`LO|13&UAB~Y&&XX-yYBhUt3%={;o4u%DGym`-?B!ZdF%xs~T|| z%st2K#W!9JmL;caeT+USK3u#qM>$>jl#*AfcJ9-3YuHtq4VoR;6^QC@k+a- zyEaIdHcKDgCEc|ddJjz!_CR+>>pDIUq;_w6&fG6=bAHB$9$};C3Cg()up zBHn*?n2zfl9qCM+|GmR>Sek!q?oH_)LT<6n)8f+t8}FCnH{I_kKi< zdUln5gMNpeqbqZx^_<>Y-l6#^Y{R77xzDTHi(Tcl!E1*XXYnfZVijJ2UeP%Jrvq8l zd+qkal4q(dgc2k%2?Ue5C2xig-9U*~5z+1k<}07LVXh5*IbpybY5im9H)`&k3G<7T=s{#c}K>m?f>Qarh(~XJNS6C1I1Qz zUEuP-?Kr%N*J7{P6lf$b3RJ8?%`Wfsgl|e5KBsMh-;kE+h|ocIe$>X8wr<&NoSj}3{vR| z*R5wKhdXXAnQgez)G@8shHgej=suKdD6JnGdjb_bWo`5cRP?Mq_CD#*?B8*xAov>_rgpb}4yjbaDy z_;%IO>%YAV<%LBg%nh~@s=u+^q;1l|G z{n?PHkhLKnh5Q)0Jha!q8(Iu+gz3X}gE;>cM)U3F-h#~HA4-2d{loO{ zEo&|9mM1K)So$p&C;Lran;~RmXOv~MWUS5Dg3ts#e0S z2kmjvOLisS>G8Vk4003qGFh)kBxb&aG^jLWD%4WS*$A?p?ZLf=J#L0<(Y|fCuIR*8*8U2{4tqr+)G5aDzKcLX_W8X}K`Et?#4C=I zPX03-c7XkC6g-_h0Ln-jyPs@Bnl-pv_yXd=&2V1gAA;)HE~NKk(!mXpX55?k{iK=0 ze$f0<5&-`l><#38_6BDs-zz( z4rbtm#0Sa?7r=)E$_p;sc>(uK@G8a42g-}lKt6s=-zhIfL*fN^rdSF3QMz9NIM{2h z#0&6&^1==9;C=AoKHPT!{h-f`VLnh^j0Tl6yi;C`2IU3iHN7LO4um0sZk4#d2vI3V z*(hgvNiYSKS8fyfdH^4dXO=WzC+czvMm=;hXse-RP_=RhoWvOP=Am0vonz`xTHM|v z9KV=7f!fwX(N1a2Op@>1JEWQFEwEl5O3s64S-zjJ=dDDT4_|TAYudDmLLv}BC{h{` z$2!$YWQjnsuP*h1l3QI&Jooo`i zl1{a4?$wcox2Ak@+hPPMh!O@h6xuC2b;xu(G+=uB;0 z8co}B8z@Z593lhrJU8p-Ac$yL^c$ zHu#0G+uEpX(In8YOmiGh5sS0GYqDTCYQcPCO%aU6phC!1jIGVl*en|B42>vh&4L4) zR$6Po;2M*osfG#~C=}n1=x`xhB3oGLD6KG+&Y9-`S0id98)`v=;&zQ>i1C80smSGyzK;nM zFI;qCi90kUeMLBhC58wTYUwg3v9K0g5yQI&C29$^qgGtB0BaA(XCXC{X{o0&FE~t1 zrW#X=AX>{Dlth#~5|v%VkZ3M*rL@v9`fU`kKqe9aYl(6$Vv6>}VH#56ipA2qo7Rw- zvDVYw)C>akRavLMM6c`n(d2hqpZ{NY_ivz1RYAWRG!zR-}$@!}!5H#}t}WQ}CYO9s;W;$DVGX0jugF=TNs zE1T)u?pT=dvYAyftHA{>>02O>sMu{nBjwNSH7#v*HB`n)5GaB_Mr2G=KpoSRE{0WV zM761@K=d*dQ1hwOe5!1&q~Wy}zbX6Xoo~3*0VewEjIXP{Zu*+9KFc(J#$Xx9(SH9RJ6#SkwcpFf ze(5KR@l(skRi9Wss!ILI`cvIc+@g=){+Ja$PW_nuhzl{k`bvoLr57#6q31)4si&By zkB1no$Lfx)IY#$wI(p1E0AYRE)9vYPKh=fczqJ1GakY;wY}&BWu5riCLDpj16VlmSVP4o6EFbTO-g{wF?y&Dp>n$CTlihpT%U& zV&*%4<-EiHW7XDkqvD&$9U_leRf#g&AYCQ6v_ErC#mpq!pDwk}=$Y3ds6!s6~bMgUsojgHKA|D<2HRFd1 zyWZfadF;XyfPF0n$yUjAFZ3yI#&6U`-I05#0o?saE|9+>yenKxDC{02?=k)(mW+lR zM6Y`bp-OPBi&@Cr!32{Z$Y)F?Gf18#uefc--On&ILubfaw3Exow7P^5@Y@`A1tOct z7f8)3>_dp*Nt##YR^;Ieygdxtt+@Y!OVNW7cYyOezE6HgUW11hun$uQ*&+4V36sU@ znD;O$NMk%u5uIgvkdn{IdGa1k{W?LOU@tkphZg%E$(Q(g(+&#LONH+@==tNo2nJ&S zIu@YM=vK-j0z7kQ$HQFaJefqwT+a&33omm$E8&99QBc1=SmMzG?W+}}l`J8R7$sDY zdbnIk3Se?QOs3&^F=>KXBmS>|3o~#fAHuMotJ}_%!dZ#t=*h50iSNxCOB!BZd|D0+xa>m$-7V z66PymaxIw-4J#WCL9W1mGvcDDaD6**y5v}*`sjb)7-)|JL^1MGU~#YtwD-iw%L`*f zZ%|W!xz~Al*=HLSv}?sLNQ|%hDB+UFbyY=2oi~T`X~}jVxY|uhwWAppp}q> zG$bQksZzdae+-9RK-VQ3+s5T$%|{-|CsVK(*9wjlAmv3!wVf10186##K}wOInaIm* zaC{ClBPzhxDr~J#jalRxNX>lAiY_1vF$q`)>Kuj~uOsWpGmyqF$p%RDCh`aJDA|Wn z*v4?=ZgM~Qj{GOe)=sjKoF<=;e~^bDy+4reQJVLXBiO?3Wzq?LEyh*~ji>>dP|{w* z^x_-jP4X7Dw`)epe22UX1>WmWa_=L5A@7qvgC}2;Z^*r5DY)?~@@sHq8A?PexdA*| z0j}Hxey##1&!LR0##VW^kvquks5SnGTKzX<4Y`X9pfnvPedIr|JnI9LildO;V<=hO zObXFwWUp1|uX0ki1Cz3*>-`^PImbz@l zaW2c%=h#T!kJw1|7WNZw2YZn#_etgsbLV_6V%xFb@&o*L$}pu}W%bDQIN_75KBuF< z3%YZjZ+U*NR;zXN-8WSosk@-AR4>JEjrs}oE9!Gz8`Kxn7rZv$*skN=4)3$xdx5h) z$*$k^K5fIli`sJ6uR{u5jsv0UE@1mT`KR#>9sZ^I=DL1GzR&u8gx}}BpCfl6kaz|1 zMWr2Uy&&ftq&OC(ARa$8=#&H-D#IF-rVNyZEJ&;$B-M@*Py!hZhlI|8{FgyOBO#yF zC<8T+$~dwXyi0~;{1$TmJF*M@+Q}YBIhD2P;Bqfq9|v#eljrcOCoe(X8X$L6@{&lD zvlIX05nnk>oM}ThNNuUQf2J`~Qfs^p} z91uui@Rk6i1CxO)XFJJ8%5reeMT~j4-wX5K0S^E>fStfDpdEM+Zg&HJ03HJNz&-c_ zpK2fm{eT1@9heMcIoBif0fc@Uq4&8$A3*2>2z>yd4VUN{zZ+Nw ztOqs#_W(48J#hCb!g>SHB4jGN@t|2BXx0as^?_!6pnf0d(+B$Wfj)hpPao*h2m18E zcRYN@OB8Ph#oMn!@lH_u^uI{)c2K+>EsEA@L5?kwoKxPqsh*FtOf0iEAP-mPOdrxl zL!%P$uW<$&1|ozRpkg_wST6BKH-#p>4?0a{+Xl}A)c`{+-cCU4|J4kx(eKBZC8c+T)+XW#rxgBI$%Ap z0k{X)h`d})^7q2c!@xe^5nw-X0GOyIJc_s`stuop-3j0s;90;;BbxJONK-SUsS~v7 zluG&dI;{vk>_9P40zf|)xgJ2S2OvFtke&hLx(~S?aOHXc(n4uIQO(dPm6wU?#J%wM zFt87J1lSK801g6=0*8RdfWyEM;3(qi#{Eg42RH`w0>^=;VRr&}26z^D6?uIF5GJgR z>~K{KlmO*GC2%!99fbX(z#-r<;4p9m=mwqydVpg~x!Gp3iqHK?=XolN-M|(zcJAyIaFg0=JBtoPk1=-F`WzBGy zMTm(??q1+wU?1=Zupc-8j1FI}VXLL^<+RaQWz*@SOljzHe6$=;`J6}&-RYs>$nC2f zj@&kSNRmb+X(Fk?d41@kM-TTqyit2#zZmqHi%2ALKEBLesUMP$p4xQ8Q4Q1}t@CiN z#htcY6P3jWVK=^iauR-?1Kjmf~)GFpTDPg6IVW_OOFS>G_teo5-8 zoqq$({tTM^4KzFRFQn=WXm(}{%~11V^y}^XzfaXGNC)NS>$uaNC*2uEmTTGz9A6)~ zdwy4|72Wc#8xBbaUQ#J_%K_!O_4e~d>S8jF=5~&Y{5}E`1@qIum%<6!NLfr_vEBDgJ$CNce4FBDb z{gH=}UQ;S^Oh*y4?z~z^dtra2I<*|!FDITz8||@bQ1f|1p6IAzY|nM1%`-cnC8==l z9vjF#*jS7_;-!&?6=95T3#X$@{*w04UFVH%7wZt-|A)4bwr_n>`zE&@?okzsx_6gk z#|=Nd0hefJoUJf#MU8((>Iu*u89VAp0ZOZZNpLHNsl}Tw5b5kjUtO0;!@X#WS%{tu%4A4K~m2pqy&AgcvQ70kkdZ)O7kK>f4t^IIxvc@fsM%z zr~YHN{FF1sbY+3Jw{vs?01Od+Y>gir=k>vJ+dG*2_!?RN^b6+)h9-ZOBK|kh|MlR% z1pGg1_P@Iu5YGfi1&$($->z>B*5!s+qs@Ag&H68^kVeenCYvEROVGvoK5w(s>pMue zni3h|hTwTsIOU zhwQ6s|17=iC;bzi+&toErdMZN;a*_f;PYCxvi`i?clh9sdyWGhulsn>V{I&Z=UkGo z>QcAUY0}m7y{J@;Niwo?w&ZHwti7UtZn)6X-A@eOnu>})xk*f5q7`O+bWF_JE4jmz zRe4X$I_r8P@hQPSNhBud=@TQ4=*uB!iH5JxY=M#4{T52DZ2Xt34!UUlRntW9k{}1sa>0syBmyq-MzUB$@*! zZf*+G7>~*}|%JRL@%IsK(X=wXk|5uzZc$Qf(TwScN z&C)UeAH050bALfm9weHjmVk>}y~u}?T7A^Th#fmWSar?3I8d(bq$pJP4RiLIfFJtn z;^g`xTil(fjneJ6snS5{6o-y>i5Hxe#;^xlP59)RtpWA=N>%53zF82zcsJD7)VF&g zc^+JE!X655u_Z3{<}KDzUCg7kK63hMpK6oA!%o2;eq*nj>6I`sBdqz=sn|fLeX~1p z@mf_MdBoxn+G$Yc0nM|LVt=tT^_)+pKR?l7V!uYk4Y$T3&Q0jHSu?$MZu=^84{oN9 zIc2G`WosTfFU$)N%W??U=(Hycz5R6})@uLIxZHl_kFER5T492FzS=`3XZcNNs*Q&j zKmD>^^>tG@$$=lg8-WkSR6CVPnc?i{PBxh4J&LKcoyweY73Eg$YN~8aijp=MPayR6 z{Hp8hi-olGw(r5Si)UOuP8#b4{YV+_&PW;~J;+q6U4c+OVEJkFZ_xTkp}Z=_1IxSs ziuvyy=33#T?Nd#P_eF6knx;j5<}W_`FS*)CTaS~%Y#`@ASjLI4TX=_w>;0>e-*Cs)#>@Z3=IIU!S;6^V0D=PRYN-#^vhH*hq~ZT69;(97l<>g(5aq4W&Nl zP{#)xYWqG^!|NY1EA1{uRn-0$e-JtQBh|wH8~wZtOl8BY90WdxTJee~JMY8Xv|pQG z2!^SMEktG1>edFngvx$~>_5BDjXM!``vWUdR&rgI+S}a$Y61Mg z?=hoWc&AG<)vuk@-ErO{u-uKnCP~WF<%njH?C*^@v&RIt-eY=IDM=g0Tfa>kT?yK$ zk1wLB>$n}6S4S`MCc5HRMN@TJUMn6rN0J;(b?yj^orUn2VB{k4BQ$o~NwoHb|Ez+#Y@ZxBh`ICh=aLjf{IOc;U2R7I^f?%63}{(8C*YC zpEURGQ$G30N8U@;a!b}pcR%Wwp2hjvFIJs)PY`fFN=R;(Zl%k_Yl+Czo06xG+8?LA zOfnnwPkFqQ*=-|dB~z}+Mt?3c67OoM>pyPyJTrLJYrpDb*mZ>NuAjUgMH%hu#omlF zu6Y^@SJ=wDoC^wZ(b`D$N$wX7D5NH)4^IWN?0v?Oo&E-!*gJ;)k|>Ziie0 zP7o7n{-+kufd8%^$pSS3?f`%RAi>hWev}Rf@Bea;8GtF!$B(qYK!Srnf&v4B?f`(V z1powEKR*8{FMv@{N%%+02na;L@xM?2K@g5W7X$#H?EjvD>Vh@>WCQ?UHy|T@eSKg< zQvx)g_$d9f;9Ot{X+}tZXbZ*}#uqad6RuI)pp&?uh@h09oSh*W=;mF#MSrycj%Y=$NTejc{e@2wNK4%?;C^_o{Z-VOasu5wsV6JKm4bCl@Y}H zbP5;()2`VzT89>5T6puS-z|8i53Q4IYu4*j!l^b|Hcy<-K3$*j5bB#_dvH(9b+)W^ z__mgA!PM6BJzp)4@m7a)fIQ-%JJG8!FhJzPtR$}m$~b)@G2*;WEVB|#Vg z$N&sT@Fuw^a5<=b#siHaofxG(X=!MI6_iF_f*F#eAzDKc+CX5b$Or!nxf_tPl6b

N`?RC`@lwXbC3bfgCS&z9};Se$Q$7 zw^q?6OCK2=B5`zUs~6Gp*io{0vFidU6X6_1lFY5n81E%%msxq&@G8OkkHeacug)3o zDB+m|4e3k;EO+5qojV|uphsBmL#H@_ULP6QU%cTgx;MeumPcof4cM_k1;+8NCp4kz*?(R}K1m z;Wie1)$%qWUpWXv_?COPuRR(ms$1XNScr*hDtI4xyl)o6r%j=Dr1az%&FZg&kMOyX za&gZm@_Fiyv!*HzQpqnF-HeA6sA$+c?g>Wr7eUob(eaQ06ZylSrV9*c z!n`>|XL@{idZe}UXB!r4pq%V=;)Vxf<{o?ybz!#OkL-)m>A-)6f#djVhu!@Bb24+? zkYT1O(#{;u9%7&GtCz)&J&H^sju(k;EYPNe+2){bqukoCDI^m73z4S)u$FR)P906< zjY}i1!G%cL(x9EJHqUm0fDR+5D*ocG2plX{oL-*9N|GtJ{77W?X31(Jb#HBOR${5t zt`T7vBf4iDKbv@4S9Q&kQUgGY;(T)@&752@(w>@N$TF+P;th4EhZQ21Z5u@I)AKD*j1fW^ z-02P`?7phJjWJTaNl(#}c@g4lv|!7sl-{UR<8xBn_1J&YrR|8Z&mVOlyk5ushg<4d zNv`V8kXQd&8tELM%3l==p(M@2b<)gw&^R)L!Ohl?v0ue;%Fq$nbBvSI3|TG1p=K@PhPFlOs%LSbZd~gRGfl%Em-u1u@P^6164crlsy7HQyZI#!JItURZomnCu@Ct3iE||29*c>b z7!%V0G<)$TlFY{!Y33LkfdLwZPlatybk(aG`3nMijPc@|3W#?Oq)MF(Lt^;u-sn_- z0okz^;$ZTgJP$cirYJ9bwoWE|RXbh~(!dFqQ-EyrA%Y@pbpHxP)Fc~eCNm5JQWXIR z$U7`(E>1$E9xCj~RNhRs^u$66D3@dgeM9TxH{%!U=8d@6lp%;`tyK!)faHhLNN~r= z;{=`1-5m~FtkwZ&yN*544#!t`H4eij(67+#vq?33smQ^CMi62S+h?WjqCfksOdK%Oyv5G&$T&Vn-T z8WrFIN!9HANP@(V@DmBdKn|zySV*cGlQ?`D!=`#2GvrMRVX)jgqkRm1Oh2oxJU(}R z=XvK{aeU}=Ol{EIKBJ^gj6yMHEMj72%OOhm=BboJs5}QAnwdjO@w)qzH z9lgJn0~McDZ5FG9irgSSbR|01o-=4%61DNjSf%d_s9i!H;}Wk=EqTAD5va$oUhUKm zMwPbPb#zPe9BnExkw&w&?RTy+M$jw&KfPqlKW5uooZ{eU@*TB=8?H)rtq;j;{Ckz< zaaScWD&)H>3p$^0Wy`S!(UVr-!K??3h~@-1i1s<1uEOUFm-k9z?^ipKMBkZe6Vl|) z{!L)mZ8*Vr#kT64K-(y?TGNn>Mc8rwQ{5rTBZ1H*N%A7kpB;U<3r$6_x-T60 z`4Ej4I;iUh^-94HJ-u70ktbrqF~IVRA01x0KqzkQ-aGf6a9{c8Q>MVW-OF~gK4o5Y zH;He@CzMtP5}))!;z(1#z49>_KDiw6q3dPu)bWx*v_)ijB$m&A-l|| zj41vRprR+qHZ)Bl9c^Vy3IsR#{&|)x_7sGGBi}E^g``31yw6)Ex3-q|wb%OUR3%N8 zcTe8U_T%JT&yek}0Ffo{cYK#w-+xK(5WnEAT0E9tFS%8D>qs-V?ncT~LfP0^tocAI zkJn#_``s?)LsyI8W4*_$yU+WIKVX!*Oelhja^9Jd`)Nf78$GT5*pwHODo9l0Sbq2r z+gdm7oE2o%_o(5$bRP4Q5Qg?hwTe?okp^v9nqUJMVyc3 zD%(nPR{Co451(_N!Q{ zJAgfFy6ds`yT+>c5BqV^ty(BBrZl)97_d|26&vo7I~W3JkOJ2f$zN}ssX-F+nG{hjJNwvPkwJT4;t9=NU+ETyVV+D4>Rn70wb+lj_VI5b@FiwI<8^N zmVSw7FU(8|3V6h*1*wOU9z+yblBt>wl-POPPeR|M>@>1Tz9B#d(I%{wd>__na)R3q zNkvMCoK!R8TA||&)*Ft6f~)DC&!)kqXIniNU77UxP5)W(qFX?lPIPnWbJ_K8CHPUL zl#+0)O~k$OSJSK;MM7E`%(L>UEnOww%PU-!I-ebat08!TfIBA52~YQxU_fs1?lMbAt+JI{pk?=%UFEPG zhuasA(BFRfzj<76CK;iX*BaBBk_%}~hKrYwm_RF4W-&dx7_I?vAkGzGdLKHmGYIW$81-)>u)~HVj3}KtpSG|02ZYToIp4@GC!85 zFb$FY;Hb^?6{gqG+1o)<_iFupY9<`9%_ep7WOjFjrBo|ewwKV`^){WxomLI+EhvXi zyVIy$VQe-Jzn9NTdfdd69ZhE_g%rTmI3LT1z%*8%v9;Z&J4lYXX;J z-0yAI?fctYSu=+Zsd*muzuV4cGGqeOK=fdEs3;hyB9{pgyTvr{@mYRh*2dAVv{3;X z{hI*4Krc`W)8q5J>m|imlXA-PP?%pC>P*TkUW(0RD2u*u%x?vzW`tl{N%r8BQw}+R zRW9t6qm3rwrm0QsGCl_4igJ;gZC}0RS9^L;9w1TdsUQp)M#QStxFv@ zwCc>~pV!rRYwN?c*1K$cIt6QZy&S*0^*A}TLG$uwN^cjZ!jfbq53+BH*N8J6C$Ic& z;p|LS&@x@&`d4v;w0RfXBP?6U_Em{G#Avr<0f+=7)4d?w8mIR9`4sx5z)_%aZ| z{Znbr8VmI8f-NNbDRsN7mpukZ(aQ9h72MBR#9?BKT_$!zenV3gEiJ``-+|>&0iyzS zqcyAAN6l9mYy4Fno}Cug;g$q}vx7s)Qz?7#twS7#4c-`A=UC0Cej{JSvKD#2ap% z2>#4rLItD@p~?}1O8j|nF+#p!u;cR%mq097sT||;g#n$fiKTWmS+NtHQwJh1fC?H6 zA%a@M10;&wP>Ff83ky$w@4Le|O>Kyvm7h&#N>G3F*^>g~MPnb(TCWZ&YE{Jjp1E?W zSX!3*mQZAuW^bZoG%tXRn4VJ#m1I-Pao&`Zf#g7(V6>_nK}nRZUnLJgdxywfP{Wlz zFO4EKj4(Id^IX`tj=V&ILsLG zvh&TD=7B1>>AYEj{HL zE@Vqv*c(zDH<^t~(5V>%p+TFw5v0SF1a%B^r7?8SU3IcED6)c{qDzuhvnsW?#%J9m zFFDoa*V&7}rq<>)Dw0l1>3&Vm)|He^Ii4zANNC2@)?QKIGi{svdHZ)D@^2B6)Ky&r zPQ#W=8)C*{1`*bsp^@9vd|EPjgQtcUikr5W{iyz)86h}L661cMyP4N}F#Fwo*x%5; ze+$v&WoX#aBgvVtOeTiR7YOh*Q&Yccjx6A^ax9j2Y|=W@+F3*jN9PmG!@!@=ap>Ht zcdUWXX(iD0-!CPsW}d9V(XJq&7KQ2sf=d3O6^DyVbPpRNTrDyn6dMrG@j~w#?-$_qxoEx4v!g#XR1Oj&mCRdt#mHAtuTBni!Dkio@Swzs=}bca!-G z%y7d~ij)RwjVuI=mgg{-mu$6PlR$l>E=RmW%otA?3MGzE?yDG)uTh4qRF;)zXfI+B zWYB~jL{&rO&kWlfpHIwvbG5uPf{?*W-AUk0GVVghBXG+aZ>4=3B|aeaOLo`K(+OXG z^j`pg_bd?hBH-B%gd`NW$=^KSGayRAzw0PI{{mg&WdO?JeKY|wcl@;?u(SrL38Wa5 zTwhlZGz)}4r3A_V8=5dG4@J``7p>XgVqg#h<|X?k72`M|uJ?t78Z(~({TgqzSHfS< zANg!rDul39_GsRH7o;xLFo&TmpQA_kZo-|z$5T-egD|0uwR@|eDZkD$o-Mzse~K`| zWscYm!{Q=8M&|QHo#+ko6Z7x>+Bf)u2=1SLSE?9js{%&V-&*D;|CXG-FG%AC%a>K? zUq?gUJ`N^~C6mNyJNF}Gao97%D==1*|8X{w9#qRLfGXKEl3sI-11CppUAq>ePiCo} zRZ~R2kx7Kog#M~VrhW;WC=bf68zD4s6+1U zBYIPR!IJKAD5z&r*2Cyiakx9OCL0-SWjTm%scE+Eh3m{6WgWEan9LHwE zRo7hNuV&tkXjDz%Q_#@2+`cbgbj@tF6T5%xZf`OjKY9$`g712EJe{VeE7x^6XTH|E zCsyF?wrt7e)%lIzgcM!K)m=qxFl_ej<9zPrNRW0Ng{G-AZkO5o5Qi&E6(~w-VnZq< z*{G(}sqZ;orB-dHa~FC4?ee+KF2BeLO*5UOO!G<0k=^k&O~29I<#StINMHD^-Au7j zSMsH!8|F%zav9F|0{RV$6bHBYZx&LBuFo3L%^$UVPF1`-p8iPb1H}h>`X2lZZ#@k7 zZ_l|?1`_8gtNE^dW{doRNFW6lIIh0EpMW(ea*U4f@Gf8aWFZKyJcqH+sT)_Etf+-Y zsR2+#bY}_mKl9Bb728uiQJVN@6r0fo7*ohU zJ&p*!1KVa!f#hQeIHYq5hFco?w18sz@ZJSva8G>KfZ#cIAbzQz`ME$WaQ2)6z{<_? z`}opJbKv75?d=`}cjI=i&Gt0fat%WKqZCh$@g14ZDTZ$PPjZNWK~~ z3=R*m`$g~o(Tff0(`6({c;@I>##gUl8vAF!joC}0pEOT@C(l!vJerG%?WmGYX27PR zK|0|6$g99@!L&G>l&bM0nN`elN?kXiouP8{t+ z`|a#RMc%p$)0Q_xsCJ0AKD1aJe40=b_O0E`;F^o37G z*?Z){#_BIrq*CeN>dikDX9xOU@c4u_#Myo5@rYV4#PJUmB$t8|&+Q_4-Q@-Q8=8Zk zpd-H<)p!ehv}HjX0N~1FUGKre!Dhiqz~ZCoo}8p%ihU;-#6}sv%DOI^A^n->{><^* zLCxUwhpjL>%&NZBWSC(wi|*Etwe-}%10vodmr;%E7+a35l2L-+i69ZE>>n^&1NdgW z;_frp4vqkx)0Udw>CvEcjfDJP%(fMqOm4!*@?vAj-qkAkei4e8cW4@}w5ef8 z9y5lQf6@M;p=iVN=MZ}HD+sg$pP}q1`vrWKg7(FV6)Qjdj$XqKp!^f+}@ zfb$Ke>TouuuMF?EX4z5FCeI95^&0!S%6Jc4q9gQoL=L^5~xEt-)IT)?#4{ZF4e3Pp>sGO zf;Wi@A>sLBAZRaE;iZ|0Usimk0uok76u$$pMRJ+)%g%7D1)9sCN;%}cUHnT9HJ>?UkeJS!+63-_(N>4s9MFps{M{Dq3?8_< zE*`k-ZMVnZO9!)foj1Csc)aF+gd4F%nm2J>5-1_h5NaOOqr%2-K<)+aIhh%;l^ggI zoryE$Y&0)2BV!Gt8W+nkk|JssMAM&LLo&ZmQFGw zdVouS@Dw5Pfn2#@DP@Qk#QQt$+w8}@w$D{O2PjH&grY0LJ5{VH9PhYsc}sTECQiq zt*u*b49c?3|(dfq81R=5Lgf zdB-Wft&dHb`%S59lbg71?=_Hb4!0Y8{w%R)c-)Wo*kZW%2-B+!W;=T^4j!Z94bFB` zv|U&H_^~oQDFyO)TQ8BlLqolK)*CO8I?>VXKK?@vgw1JZ0)mYS z*-%hyJ06L0^pm09Q-sI+a(I{?Q6Rq-LLYLLu~EFq4D~hq2)`q5vsFs#|CZ;7`mur# zJLnr__Dr9<>NPvtW8qbFUJZPqohdeNzq~nP649)8c&uK(!Tb8Yjc|TRc-#xin4;M5 z%9|VsDW(-#TJu=ZBE&%G8bB)osftj~p>p_kSx8pMWl&9_OV&bKB8v19+^k7tXWl;@qKpFlk#N9Pv|G-!!lsWRmo%RpuPKCaSis`Zt@SnjZ*zKlvr9>}8h1-4p7LTgKG zLNCc9$ORaG&vAW~hU?*!h4P>G9;mduQAb0o;}A_t8L`ux2VhDqt{UsBk*2u)k2vIO zxz4ic!P^6Pa_f^nhx^d0!=ChBQgS!dwK3L3GNN~oN8YOVO_YSzx_-$^?&ovP&&zQu zz=fWYHWL#mkEp2jZG5D5$({@|~R+Y&X`8 zMBFmA81zTo zPp|mzj_DN1W*;w@dOSkTu(|f^2qSays2Y{PsDC8{8aG5~v!PCG8lie`g%=~_C zaIV$9I#|4}XTzQO0jn(g*YHf%K6`^u68&cSR%hzd3O9R4OLh5vky;f|EF67y?H{S$ zOxHNy1S7U}`{G00S?vgaNXVaMr}AMjn;H0X3QrsifQ-kyCUUYgdk2rX(jPR|lG^Zb z#gq{y_|GSOS=H)n-bp+|v%|u`zwwp@!v!b>Q@3LE;m5LQ%-QGpaM#Ro*VoVQ@9#zF z4h`Yd6gG2ZsA`2}MaqAq1GEA-a6kzm$6J6wmB$I)QiU2C*O})-44?R{jo06)k*=4^;U^M1- zkr8bbj)I^|DGC)ig(m87X(%dEipc_+?Ra=#I8d^LjNz!djS|RJ;<%S4f=ikB3;q*5 zxuUoW(%4(}L4h%McN`j+jAr&5$@>@oocJgH!BS_gXe6UTvGPTGd8udV_uM^zD3V8J z2tuP+wPM0@&kdHMm9r*m9KS*Wh=Tw2f*N}h>vF1$v#d=tYe*b)SJ&` zt`4mvoFr^_m1L8XGs~K_?7UgyGIrQ;Ugm8^lxy|W0!5M>scMTii%VEDzD}(<3H!wSI_$HW=fg z0qRn@KpdK&aiY6uT&O(5MwkJ>&M=s_2RsUK8F}BIJZus$M-g2qf#AnX>Bmr}0Z$jN z?;Wa~5kKCLou7^u&6-~78Hn#181Q?U;uF6Vu_yto#mwz_gqlOjVZRqzDqjr`-gZox zhFk#FRJ=UxN)&N6Y_VFqc%yiunx_IKNs3E?r|L^cwy4}74J(;Kpm$`tSa0WqksXdd zCVLRnC~4&J7L@u;-}=QM2-ND}NM>pzW*CB~6+AZk{#W|VSX|)2V%+*|jL`wJK=rS5 zW312uvl}taEHvW@oVVW`kv#y67vCMnK7PNcWsXd~Ns(Jju*$nb;33d0io<@rEl+t5 zF^=!0SMe2R7RWeBbk8TqL<|NP1s3IBLZ z`5-~;a$sss^3Ws#wPsEsW#}P3C3aG(siI$X5w>(1`4yN*2mqi+XpTDgt{0&<*?9$G z)xX63Hg3_+A%%E)=ot+U89f>|tP$ooKQMBcZG0%+u^Sx8 zN1S?N8%jv_t&#l0a%RH%s#(7KZmcS*WLV$XjCAbh*n~X$RrSwMbtQ%*s!NUEN!q3my+30LHpQrI{wwYdBz>dB= zEwzH3Kw7Di#XU}ztWM-umHvGoEM=fNUl8AOowrEe#br1Q9C~jt+S)pmnWmB1zwmA$ zYhA9N+cBgdJ2V{GgG^z!dpuD2rciI88jP7*5%()_?C8#!0OTGu#CeW@S&%&{)b6nu z#L(~1S`bCuOwAZq_gwv&A*lQ8N3E$EFOpkX9kGyAt@dD*YvNLeG&kpvr5h48Yg(P8 z9zRy=Tcd57KW>Y>$!#qhQAqEaVbD%pd_(zhe#uabzf-L=`IPw%O|2HjemxZPCwZ;g$w-jasWzgu!d6gU@?V6O_@?<*W~*zy*k;!Xo7A`xtiSk?9bZKu`&1C{ zspcRWpQ_CRbM&59d-+@^mU?Dy&v~1lZAhw-F8_k9$Z%BBs%b6kXF3f*cI9zkp zjos`G#p7z}rXff*mxU*sFyB8LHp$4Ro5r$koQ_FRilE9+#hw$V(2yBwnsrM|sHHiw zQykJK#=Zs$6azBlkyOn!8(|Ws%ZM}S_(*PD?Brw1;VzuO)QgByvuuJr2`UVADjnt9`Ak<}#h2R;AkEvLDb)Qv13}#X*4C*E`PuT8s1~~bXClh0E zcnDN=48Qe3EyEUcwB7&ociFch8UMVP+p8^+_emkK6i`xvpwTq5%tea9=`gh z@^9D3T#vyo)F;cz!7R1k`|j35RqnM<+*|F={kd?Z@kj28R<+xfbQN^}+8az4zf6+d zM8m~2^`o7Ou0Ch`3fbGRs6ec+(lwb+DLq@kAdtY6q)fLt{vL!g{xfqvd)0qG!t{2n zlo2(xHm|%MMJY|{iWljsuB#Zq;%dn~E)q@ZC?VK}tPo{wrq#)NREZkgsfeh8QgqU8&6wO zG3-D3>$u-}wv*K5^c4RTjvSI?DJV+l#{rggY6uJt8tC=K1Q2sXvk4Mx@HSXJGXVLFxs4; zK}~O@ubeG@*jjHDQvy1pzk|WYysEXzr&PJrc$bmXpAiOyXvl+d=jR3zdm71Zo-|pe z;lYXoCnjAk1d3=NXfo)LI3@TgLDxNI&Z+ilLT+;HUl4U84%0)_(eNvZK}{1Zc9!X8 zwNhpWe8U&gRI)0L_eMSC0lAlHjx2hOphMDv3Zbe)Mf z4mb!lA%-~tQ_$*}m_?+r8{pUS2kTDGtT3;M){BMH`5--^fMa>NxHZ1{P^>m-n5V*I+t%L`TL( z?P8G?+Zl*w8Hlwee%f#{Zg;|R`=n*et6Q+zdI1$C3c${2(>&Nng&w8&qcKoA*ANNRa`Vj zD~dbh8!#QP4Q{a4yrcR1N?f_#B`9Vm>tMkF`(Hk8BT6791_*4$;)6_SEhUa$;XV(U zh*j=Jrttu{;{YDd4OpO6Vt6TR6`Z#>=oVYCHXnbWl@Xtmrb+wc#|0-4DBJOZyp9~g z9sPlDeUj8aB99aeyN0_avUO|h+EUB{{^XIdi6|j7wj(gHrrHdzai3dptmbh#in~rc zmCd~58mEtZrq28#i(k!%kiB#7|A==|J3Q7#sI{+qB^-mPLrc+qTX&_*C5c2bz49~} zPHx@I9}!7UuODU^Ss}ID4qgSUy7JgMEe1r!+a) zUcsiMeAm`=uo&OI^}lYtO_@Y}Qyl6IHxrX<+KU|O^B@;&ttx^Sg{tf=$X@*yG6?`TQL25ED=uh=8hX-l|JOEfUUtej?3YZ@z-8t zQAi32N@8uLW$P%g7Lzo#KTFJnDYNC|cm`vTFk>#JdE;N-0ILsGHCSeq=KK5zjDsg* zJ9uzz#1i8RYMA`E>6X*1TgG8TzC zG_LDh*};MFP>6#GR$xRaH6y&fQh6B|8PX;hXtTp>Q6J8fJDts2XVw+8npd?-F>f}H zHP>76Dfw(ybr*uOToQN(Nk(TIDbG=lW)Duzw>e2U!Qcw)5|g5(mMeX>%9_`QOX!Z4 z#bPmMd2@dAe(QhRKIg2)W2m7v7liXj2i!I2{mdS$K9B~FORzDj0>#vL#A(NYo)HO9 z#{t+S(g{DM^AjKkEO?+XEtr?N>$n>`^DM2Puk~d{d7!W<<)Lc6_u#|fk$vU=g1(ys z{@(uC__%l32M_(NDgmvrWla9fe6#I;4}@etV+7-f!u?r{T*O>dts(d?%WM6`|JF{- z&{*$ZZ16aV;xQyM$v;q(ibsf!qF%lqtsF7T{+OrqLvXicIx6pFHC>hs2pMKokTrS7D5Q#M zkBGe{j;!rruos>Ob^3Ib?^J9<&3NBqqTcL~Wj^M=LAVuuz zsMv*r4AXgs9va*WrJHSaRM(hkjpAMGJL4HUJT8fvTf5w)iiCo3iSgIbX!H`VwOf32 zFuE`r#ZiSew9B|zu|_A3RdRm&MS$Q127<&KUyFs#R3jkscjE}c12_Ggh*UuAqXL~2 zYOWD!(c$>nPx(dQML}^Pq%Hw1f*_!PG*F5Qt#E;!ja)loJk>{-u3GeHnZGvNI1mFA znE6$dHo8@)$_0D0bSQPAC=^i!NTKMZMBjt5;&?v^m4=)kQjkr9q9f5a;kg$V(LC-#uVJkBlp~z#5L!GGsDgL%p#vd_yRw49iI+Ls<7=6to-nt= z14qG{vs&#f7uhI-Q>cui8K`=$b-aGeeevEId;ivB`H8S&;};0)s|wNt3<>c2_wyg} zj0ayqe}3zRE0zL&imAzMI>CyIQJilrI=pHEf3$PKBHS-_v}J@QPJHn~K@2zZnkX0x zMpM>6bf{u!`_dmvy{@w)L$IWdIFnX-zOB$oYFS+>yLx+MHzCiHht_p~>OLzu$lA&Z zVa%Eo8vpd`LTQAjE(5`kflE`x!CPzekNPNv>+Bv^rR}vE|4-LgwoFa{`5Lp8>UjjM z(CnYk(=-tLp^Q`JvX$xwT<*N8P64R1U6~tgMiY$B@$)=dKyQ^;uBhdI5*GWr1C4&O z?-rQ4eNujWq^5|Q9^3__EG2FyFLyYZ?wt`yDb5J8dwhSZ-%nHBsMRtAUA|mbBdbe; zk0KB%sk_X-eyiRze%ZA-?lYe|mdElHTSQgrMTsa-D2N7QCLd>NIS*K(tWc56j?^`% z$Pw;rcINL!Nc!MtI+?r94lsq@q(rMBimwjjZHA%+>)R}&n{tVxW3462KV^bPH**nn zemV&lY0p!4;+8&H?s#t>pxP*eBJ@!IM&TzoQMaBjza?a2bjKwGB_>H(s<&SC4XEVh zasNG&d9OF*xlFJ470mw?#T27aLR$nu_PDyE_NzMUJDW)O=DHfaZ>_JlFIR}E5CIwn zYzpFE*v!_={hfWG_2A?llF`T?M3+DWohxg^&sYTn_`94H}OMF+y#}CiPY( zH9@;^cRHY+Cc$Wf{WKy3w|#s~RpzOaz8)@ai&ac^$0BURI{QghcZ=+kZ3%k!7LMQvzIn&Ql^e<83JBM}&5#6hPG6!N)uQpt`0t7Vh=k3$KdK@yndF;MCsd)Q&z4Y0F&V%e)f8 zH)zY$+mVLuMK*lEK&Lytt)yTfDU8;G0<#E1pdey6_RZgMJ&eF=Na|TyQcLo}2zk#1 zGsq~iu^q6pv*tILMV~`%duL?h*lVTBopsMc%RUS$?<4X8 zHZ>*NYN?N7d8vVpcwPJ_FMXGT)kjOB!~?Q<9l>zgf0a2aSDn$1gRlPm9+D{?<;m|C^dX|;w^(5jiPLeARz7UJ>vf#G?o>`v@@-P(+AlpdxyB9)w+9KQjjp%VgV20V z5L|Cr{G5pyidqE;A=9Z+mhF~ye6M|P?+|0D(R2bx#k;?9ft6(ut+zpa+q1bIjj82M z$j@&<=j3ic<=Y|T?Hjo5g_t!0k^78+)}4Iz5OJ`|b*m8}L&tuldTH;>_&8yF;vpV1 zT7gmtb@%VzBKs?$@gB|H-3}*Gc)T6H_+}<^pSK>5%Rq1QjzXJ009hLq1g(Ya<` zNVaQdpsKh2?7SGTXG-j_9-Hm;I%aK#6H6v3d(RL2cKLq-mn3N0jnkcFwnyWmXs^+3_W5R>qs$Nt&cQwP z{pKC>SA?4qZg-+7T-22CcoI#r9Vrmv7SgzDLyu=ndb3$0*ubW@PP5)mXLfQ{mii`G ztr`W#DFGmZ_hT$KrbV7u2`1__DhpT)NR!d*((Kdh*Zf{1XjHoMdQmT!&YM}|`j44> z8q`Dt$jr)}X0Q2vGtry%W>A}KV~uRtc37@V7*5BHYy)z$_;frQmz&#(ER*bbSDvO2 z8X^+bfR(-cRn(l=3;ek1D@aMV|MqWx`)K6(9<@4ibn_!Bv{ zU6Q9nRG2Yi;Sh<58F8IRL{{S!146pGPg z)Qw)sc&jR!DzL30RDt8MNil4R6~=I_vCW8`#$qEbW=TLvD+h7|q>v%WZu+jUHQdOv zWBaaHfu2EN;lW8{I~f_4$tmtys>jeo1=0@P0Qw) z4pvezIe?;&EcK}^TIW4WoCWZj7jZRc12FD-=f#&+qBlo`OPMuQ%{1WQ`2!% z(Uz$bSGE^NgTL`5%45!waM+&TICF8s!1iF#^0v0Ev&xG@$1MSu!Q?A(I>Vuod|zh9 zlKJT?5`x-YSeoanPEnyXSiri|TvG$^S6$g&ua%0V~$i zkk1zJQ8W>ce)ip}eDGWEL+5Nk=3ybbJ;e4w8{BFOReJIhVY@w?$e+-~?+7IP27^Bl zD2TItyx`LJlMq1z7d{mlXGi*$3(T^ZT^~*$sA|eVu)meV<*>+O_sV8gv?7jhh{<(E4g2#|r!O&zSQ# z!x;lb6uAYf#xW<%wVW^M%tGMOu5Ac%3ye8yZn-9RdEg?0SFGq?nYuQW+S0Ld|H|sh zjmQKkXmelP_=Ez2|0+`&pN# z@#;rcEPZ%;V`;&`TItMX-`%^gs%qh0dOZGRc`y596raUI{44T2gKHS7%-vW=?VuZx zZDfGi$ox>q6ELs0>Tx;f-}l7yv7y-Gbn3yW*cUM&mQm|lgYVm>Df^UInK4>hmE|An z^+i_Ght5Ai(MQ}c`X)CE;Y=bo34sAM=3KONDbG@b2z^|7i?qaVdbQT+vYbep5&K43`Jeakjs}SG~?klwHa4(R%*h`Di_D4 zs|#FveDQG0;s&W1>WRf&UN(lsfb#m;rP##3I&GG1o8z+Q$L6sa~t*qy80r^-i(})C-$%d>LFk! zW)Xt9T^CfHJaOWr^ctKWJE9D}LMQi1KiLW|<&7U+yoU)%BOci@N;rf>g2Lx!3?|jNJA^>C4olK)FjTo%;WF zD7bK5uyBI#E+WWy1n~gR;M`4E3@_)V5plvdUvV12a*Saz;v@#D1J-+VXLOcB$a6?l z?0ifgc#LN&=l|mjwI90uZcM$L^##?vJ6l@^v(?qv!PeHDz17b;N>V;wipKZc-odSJ z+1Z;+_U>$H-I=Yb%I<9S*04`&yuMV#>5Qb<`dx%L1NS*nBRyMjByyk=wV2%~2NR@y zRKxC-!wGCu{Wv*JE)bq%Xlf>i`7-ODF}G$1vhg!y_pQ6=u#x>GchZq=CP(oDqj%G} zReDWc=nE){+wgCwOgwCiYo(zl!S3W`pp^#2d&ut7(^=h7U!U)k?+f1rpUAtL+>71B zt=|s-I`v;nf5Ps+z`H0HXmap~CGazdW^IU2dM;gY3*)y&@#Ei&Y`oYk`?EQfyFcJO4`r0S?ER8$olWDHQDSU)KhAPl) zzeK7FAODP!96QSPR5}bDGT=gx+pQ0rM#y$rnb0S^iD;rPaVqge;zB|!*{9!66KR#9 z$uQeMLIx;i43nk#Tl4qill%{lyJ4C8O7{;ar*3m`=!3F9nLjXSPJQcMD%lh*smNo+ z+znOeSyK+HFn_Yjc;sTysQR+Isf>#Q-#ueqqOHPl_*d=MPMmb@l(r^AC=*+^pfXda zi6^47Qi~hzSlY0nqm(<{G~m{%N|KWbx7`p3{HBOr)EaV|_M5z6YbYKnx-r_|pU{2j zTsb`|$~Xi$vxoxvCsXC988KZ|vD+S5b-W1e)>yQ}0Nv`6>V|Z!ojc)jx6hbSU^P|L z)YZhz2;?5%3a%HqQ7Tg~Ne`^EeQ{cY)7_ju%cIQKyIeZfnABL=6mr(_1-1Yy%Qh_i zjCTjvd?sL`8eiM;=F}N<-KPaA&O|hU_S zw%qu7y)?`X$@2@Up5#V&`4%inb1TnlVFX!`7PJZlB0n-Ao=PAt*Jh{^b1TIoXXM+( zu%~blUii78A0K$2es=Ym6g9KHYdDz@ zJp>-`wr-tM*6P*=!$G|}(zc-Ud6&1gEdP9}r@^mL9^^!Ua|MDa-QD%7f@nPgw1CEw zR(bw?GE=BGc}-DMpXrq83)2OYn7eh;d+T5q>;nQ?QU70v#*TF2b%>7MyeZ2xFn;%@ zU;?*(4upVUwptaPtD)>!+y=j~)HhUW6jqx>tRKvcn^214s-qNFnUNRG`z12wqJoxz zsj4w(Mc5|qedO2shy0KGpY@;ef8iJWtgLEzf10XKHKZac_lNpF*+1lmM{jYJ&4DH+ zB>K3#{=;9=V#i0zW^HL}+B`d2HgoIL#*N)EsXf^+A(@=ekmMH3*^{kcYj5d37Nh%t z#ft|97A=x#pG9#{? zKxl83754YKW!+u7y;56uZ})d^-nw^t-QC^YwS+e_=Ws}Ap1+4g;6HO_=KId%OTO>_ zKQE$Cs*=N!#9^~EBVh?$hyVe7ft%qu-ZB-Fe`2UF7E}7lm+0#v?k}V-qGp1f5&*z1Y(>^&f=`%n^_R7MG77^|9AmmM1F=!+VqZ-iA zE?W>F(p()Cv&HJdrQH4eG}D?`lWH_nR-L9bfvje$*P619T4$`HqB~)tx)Ug(%1%s0 zie)Dhg$d=2i)&ebAwfTJTIo>u*plo-Haqa(rd{EkRCeN#f9Y{|Y)mu~*$HSo^TS3e zJ2AKmX?MS{slL(Y0T*_(#})9C_># z8pzv_C-YVHyv7gV7$QF<@bl#m}N4f_J6LIZVX4I1H|s+=4bat&L7;BcgYQ#))T;58tC4uBvM1NUK_x(ux+i z$6^+1Qr3(Re@gZ>6SAQ@QhtGlK{E(u800|Uz|7fro6b045ibM)up7!uyBrSs;phjyr7CdJ@sNCda!U3@8`!Cenoli*#6IRU~(%{j8J&@ zFoUyfO)f6jsYU`>lLoeqm%%B7kp2a5Hu)qkTXKeDd}3zLpyDqtGJgSs&;Zn5ED|8a zLZ$HYXi~N(YcOoEDd^(<^+os-$Ww$j+dL)kSTm;LXHkW0e++KoFrYVt0Ijfpi1O6g zF`DE#6uwhDhx=E~f%4jaUxc4ue5hzlGb`t?_&bESrXu`_#Z(aHkNXF%>_% z*h}ygFUV6MBU;R_m<2`=MIXY9JZM*HwPm7gS8ZP{j8{TeB~(gP69F@r!MNS)Hsz$_ z@Fsj0xLW|`0EPg%ShZdmbyHlGGg@6a%rT56T_xXWyf+hiM}O$?8NY~Q*!-`xHm;^zU&Hen6c z1tZI7R0@9r9V_ntFZoDUiT-C){7LkR(tnulb4{Hk`yW&BvnbDO&fx^vw#X3{^1(q} zvCRRU4+icOe-!D_1*x@9=wOUH$YBTP=VBZua&Wfxh!$6BL)uO)r`5t3DK@|k(J#hW zk-^#M2*Q;pggOzYMZkRz0iqx9>WMIR4qc6oBMza82+iL`ma4Up;tX0yXu+igjTQoM zxbWpe&$s$6rHhM**9HBI3pL}(YVDodd~ z04yi3u+BexP3FlBTc;=o`kZ_E2Ol4uo;u}?z{dx+G6M92;X<-*w1twQ6$X0GzUiB` zL-19`gs%0>&0PE5!na?_@5R3e6B_j1XD(WB+w6oGE!&N8TbW;UD1O%?`>DkI^jN;y zr&ldq0zZ#_Bl`twkdr(rx^(|1(4EEo7xJEPiT=k_{46@KwEqR@=D|sE{mT81asSvc zx|K0aC}Yx+gKW^6HCP=}G5_9-Do`fXB$~i6@c;s}%k6RZxpVIQgd`zY^qPsX?UfZX zddrNcC*K+^ry*vQlPKozo4-#@%XA!QqJ0Wf2EKy^Ju1PQKR>$f%{y8hG zxOUT}m(1?n&Ee^1Zx|^MeUTrpzVw+we$Qp+?Lt6#OhQ=n5IfBMYd4MUtuM)8zmbOh z3G!gD6URhC0L{?TyXS$*M(*oI{5 zP|W>{A8tF)Ee#Bf){TxE8`pRDZH_q`d)M_gZr^&|xrNWJzxAzsdtbXT!+$-sb*Pi% z$nAB(XoIJJc(VK1W7TzkFnc6ZPp>-^YOV97`UZlrY;Sw4H`tfmy8EAp`Mu{qG}|8D zc$sW-#%&X8iOtutm}ku+i0)<)3*F5=ns4ti)F?JDjh{asQ4e7a4n*QAdKv3gu_w@? ziuJ`;m%tiT?8!yB-!_Vk_Lso&Dt1;xa%9(QY2a`;GAyGhya@jx75^?$@a+^&>JEzW zh@*J%S$-dCC*CuJK9HK$89}E9z6Nv{xSDie&=QN4hcckeKqeFMZ^@2i=d!pfT~R(Z z97_h)1#mC`fl>jLLn43$SkW!#=c`ud^jqtWEWn4t;(kg zH#0UBt}Nyn!tZ4M74s=tZ%w&+#6E*cjJX>jWKQW|GzgBMS12dunTjziRWqCV0jfgYgK$INqx!?$}qI* zgNt862)@Spen*KdZ$WhAz+2?lQfodfwmg6k{$LS)esKV?(Xa*+ynaP&d6?k8RD?fK z_*rrPg}mNX-2dYD2>vdSSMjq9Ph(4)MTsr>x)rhIdNSvXSdRiIf#y^1(fdFauhqFB znEtE@Ok0wlOhP1?OJ1FPAj!9+->ra(v5wDnKy6twG-t1_*$0Jlb=M?+u>Dx(ytFTK(;M@Y$k4ycE|M_3d8Y_z1BV zpk-R6=kbm!&ZMG?`bxmGyHNpv)2aMkKbi}>}7uZT;AX0H2jD7`JK!gSc4I7 zKt-3rp0KNZ)3F4iCHpq2_>)$KuV?sEd0$&eJg?%Z@6!l1LS3J>@b*Q_ zoB5Bjx~wpoNhORA)gN zYoEK@1~&4(`2~{Rw4vt!MSe~z&17X>s!&DUhE_Z&`lZ5uqGC(5O&1P38=_TJZ8QVk zR#g>kaISogw}11Sza=y;v$?)A6eM&2i@y{9fPM&L_K`m%=^v7aBWsf9xAO;(8`YyG zQp>vWrvdm%04@$33gG%cG=S{^_*(<~+ygIppvlwi!Id7+nL)tdcfxnI5b#u$`6OS5 zFXQ8VX|!ckZWU;%TvdsxbQNEfHjXvd(6HQ3@)!+Xa#Pa~Dyx-hJ8CnveC@E!tO>b7 ziBLMkhlT~`a@IT?mML8H#-hG{s9(#1G@5`~JfT$lD512t6iLVV?UW9b+})CVc@xpN@z4Hznl{jV6W#HrURdn0W!>aP2b z@br9WEb9Mn`MOml@w|$kjmSG=XlZA3Cs~W)*qPNavtvxk=Vy;RA`dypdp!-PTseZu zlM?O7hhmH;WdNZJqG{chMA+jd8IpK4a=Yy0~MRygiXQJ3*P?9Xs#Zl9))xNo2rt{1QX-6UBrn?$eZ3zr>MpkaNLXQ`v4V zbd{mPrPAt}dVh8$ZAx7OUR{IPZy@;{$11vT;ORl->Z+;yLw>q6|Ih@b`G*8g+dfb7 zSicf$k)1N&5VHS{CNf0LKRIaRfHRfn%IAncba_!itBvO46#a)N)qkJ`4jNi!n_w~! zTy6A%_W17TCoB8UPh{C?KhYRFo)h`Nf;4TQpQFr`A6jqBf2>qMmo?v>4GCJaFrQJ z-e$kacUxi33RVvB7xI|5W(eRe6RFCW(_XFpuJ*TDQL9ZG z!FU&Ma^+mug=k$Gt-d5TGBcc7f`+8^vNzL!H7duo@U+Ko_@Ef4~`C9EX~4)k(#sv5u17+BR_@l7T<&b{qik{G`tw&J*JQmx8y znNYf-kZ9F`tZxqjY~@;SU;RyzNtkself!I`-9%F8;sZ@7US z_CLLzj(RB_c3GYOzw7B>I)82-n|}qmBsF1jd)&CunzXL7zHH^KwnL9fc?n0QH4>he zZjug493|)aB~B`rMVen0ctJe$s6Ma9^ZJ|g2lZT3zebM*y+iLOLw*nelJxgl1&(FO z=ol3nMWV~b0>xIf{-p=-b7N4oTH$8}k16$k6;Cs0>G?b(y(-DYzL!e3N=GD4 zCsj%zI)_6I*0^=h`U08igEp}Jrz`&govl^4SJErv&Dt3dVwHH()JY3`sAi)n# zvx)HFvU?PHilX7q-Ov1_0zHws{z3Qi?zi0EbN|jQPy*xCZqDm&VKY=G$0k~8h9kB) z+ts%HHs1VSohl)cgllx5lf}L`55czxZwd4QV>~TC$eT#@feA%E9cFv? zr>9CODQs&R-qX{yC)?bd-P6^xXSfOfYV4As`ud?u#x`Fv?DGv@BG=bX<83TY98NiD zwVc57KvlWQNGiA}4@_2ust~uGR{gf2RIwXEXY@<^r0Hg-2%^%jcv`;mGsnjv; zw{S5+_yE(j^1OKZMX0zH7onn;>%#}wh4&OSE9!R!ydM8Wcn#^0gRoY*V~zm_Hn}{m zh%4v%uIsn1MVCg4;&>2a{5`#rhegzN9V{m;ai#K)NEXw(Vw=GEb06dPk1`P_+K#J=?QLQ_`OJT4V2_udKxT z=w@^`k5EWD zY8JBbUzovp`ag%dnB1m6b${g$1Nx$>S^SM=p_!>_mfT}EeQQoVDnL`8L;|Ez^nbO} zkV=spY#Ven`_WX1#Qi#o-p2n10GqY5000000000000000003A3h5;l2@&cj*UIT^$ zwgcn@8U#WFh6JDl)CBkiJ_T?EyaqZ3f(G&jYzLMIQV7Zk90`O8$_e-iJ_@c2W((R3 zTnx4h^bIf#at+cB5DrETf)3ga{ttQ&=ny;*f)Mx-Y!TuT6cSVtq7#S{<`g&-iWIIC z(iHR+C>1~za24hjN*0(F{1?lSk)+q)lR4I@t#3~djP%5q~0xKvhQY&;T>MQ~*8Z0s_N-Sh7fGqAU0xcRXGA&9i zWG#Fxlr5|+#x5={MlN11b}o)CrY^uQ7%ze^nlG|1$}i+E{4h!|-Z1tt3^5`xI5AK$ zXfhx&HZo2!WHNd(k}|3?!ZO-2^fQ7pnlrL9$TQ$G_%sqUf;AvDI5ki;Xf=X0m^K79 z95y;OdN%eqMmJnHayPCw>NqYqmN@=7emTN9?m8MeIy#O30RR91(*S1x1prk51ONg6 z5CAU#S^xk6%>gC=0{{Vd0e#N{Ze#%%0N`)!jV4^Z7-K{mEo-}t#y0QLgY#F|BMZ^R0Z~x^iBLR z&{g%@L`6+sRn1r+DR{6WhM>LnReWH7NsQ4QXAi@QaC>iL?!?{`zmrzW9H+qR97Zs88pF7Sy6_ys|nFA^e2DQOv5Ie7&|C1n*=HFXV5Eo~iLJ$(a1BV!X& zGjj_|D{C8DJ9`I5CubK|H+K(DFK?fJLGZ8z1poj5a56t;Z`ro(wY{75vR#*K+qU^K z&-vwZGn#0sndVw(sg>5+Xsey}I_RjA&bsKTo9=q(sh8gR=&PUp1{i3N!G;)WnBhhk zX_V2%7;BvUwwY~?3)cE*uB8^)V2|xJYFb#MmRVN$<+ml)neUnpe%ffSzyA1Vs{`)1 z<)EOD<-+cXxb3b-?t9>&PokcB?1@8Sepum!XP%4u;+qAM5>hhKvNp>p$SW$TD61Op zvk4}dXtJrMIBkpRrkP=;uf99$rB~j2?Wki;I_0|KPPk#7i!M1|qjtSYHW3K4>)0Sy zjTB3vTrtu(nyVB`m3*X_$QAzsKs;zm0001Z0Td8JfLlRy-b}ELv7FRye>Xa2IjQwC z`L%7GYBM_3<}p9D%~SSX+csx^W}f|CjW_=%5p=*;*!=~hPZ0bV{-}H;-9GI8fNS0j zc-QofNN=Id8~+>rX2Kht@)RaMf>Yw-(T6rC(|AqjVPOgT`^QTkc<=H1-n(4-Q}bu^ zA2RA*_8#`!lkVZWq?F#iRZ4GM_tO7YN~v@S&YmfyH77@(Ty~P25|@siwAi&;FHaoH zFJ_lApoukm(*-50+m6r ziullBgEIh$A*R>x~XO-@**3FgX-6V)|WC?+HT1ThS3U7Uv) yy5B_CKRXj351DIFRBKbCI@r%x9uuajIAyfuLyr-o(llW?N&#Xznd9G|M%I@id9cF( diff --git a/scylla/swagger-ui/dist/fonts/droid-sans-v6-latin-700.woff2 b/scylla/swagger-ui/dist/fonts/droid-sans-v6-latin-700.woff2 deleted file mode 100644 index 1e726a7cfce4130243a3de4bb2bd412000658896..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 11480 zcmV;}EGN@5Obdf300bZff@BASXB${fCF7WtxE&xs zRc*p3N+K!A{{LG79WsQXDWLODB*KIh(S&J+COQza7W2$AXQBRic7T|Nb()j#@Ctrj`j*A1nF zCwy1>G#Xh3Oq_5TDB!kVY4^&&dt3M`(6cPBI0A;>YNRE87t=(M$U+9fO29RGNnhdwDTbCnDS7~tuOI842pJbIXK>@ewn$%?og({_-m!TSJY+w zWy>R6%eGD~PVzI^j6NXw3xKnSv#n;0#%xZP6{@NeN>nLlF%@1VlvQAG0|I$a!R?fL`9hJie6K31-a!Pel>1KmkG^TFw4%S(&4d zi6X4Q>>E_Y(@aS<+FlF1dFMkXHRFCEA78EL2(e4{b@^2VHL7p*bL15<7Sb9MA1_1F z*EJYFGAp^B{F;iXivb$E2e127yyd6}WUz<;>1dh(#j` zyE$^A5)C2d#-*NJ2iDtQ+(r{&bGF!O+wAty&1}{R0-|U{VIPY;QH|c1a3)ETB-a{O zGFQjVEXu9LDHz*U?QmV#r;ZztbST!gjG@{@{NHP9dpp|Mu6B3r4tA)+9pQ>(YBg3k zYhxkyiQ{lMlz>Eoh)KkqCPB&6A^@O(`_0=ytcZ6NjoWi}+aMrF!8h(S6d?`39Lb zW7eE`3l{CPWLNgUJ$Mc=IBeO`aO@s;!pU%I&#Zj$emg5fuBsfq;trqUL;WJsi}R|AirSXbV!a|FvObk9P((yTL_|bIL_|bSRbzpr zmblS!QUMYrj6$5k(`P`mw$ZfU5ITyyevLFWA?KvhhTk}?R%^;PVY)MK&UVf!N>iN_ zGTt4r9v~ddE^WH%iEOiy4d$YhrlEBqJ1iC!R#tc3Vy<6fG4%ptj+<>W_SWTt3%Hl+ zGX@FqVzJ4BBB_Nc^v+?Ry#7?qBt{u5NTAn`l`hoNZWHVwbshZu_bLw?)==DZQeQ|jx z`h0|=4BEq3-#msG^<=YoWZXA_{f@t?#DAwr$+Yo`kx7LJeV!xC3S1XEM@;Z74ujCR zRPv{V$YYLMre5=`jn&w$DC#(byVKY&eDwN2*lTb)NHo-{g|xWk*}0j$i&GZoHOT-kxu=;XAbphwPeF z`oc$jx8Ya)od)WASMVtu#nQ+VOvOFXq4(UCzl`|JX&UAqkSvaDtRngm^5dGob^)*; z0G3*78!CVSoAVj@4qG~W`Sf;9u;yw2=(Cl_K>KOJUU_N7jPbC&7yhCu89Ae(m(j|&*9vX*tG^LAAG$)F) zr=guMMxGiwF-Ynyg>dnimmeD*>D+0-dF^Vzk{8Ept{qvYK$4q7r8$t+zi6u_Q#C9x z&suHe;(>!g+SsPpHwdJ3ocrk8EwpfEBgbMI10lql=y3we zW}gVS_EQ~^%gT;hvk5HISmQ*?IV)Jh?%?`OzBidq%l9ERS!7B8nKMnZ%?5tPF_-&fvf>%cya0;jDIdyKZ~Konk7^b^;ZQ(d1+4aDE!YU7AZnr*Ch9 z7rteC?Z&59!J-zXk`41s6*7NK&~mdMJ26R8y&CASy!hb#8ir8I`xci!2|oB%-88W1 zXexr*rMGe=E#1COZrnTz7V87b%uWj|s;{?q!2?Ew*aH~_S_X;gr7O5=2=)13rr|g1 zVQbFJHv1acRStu8Xug%oB09o(w5i;}3K;b^i?xV-2#tN^-l7(KSv^ri7lh2gx*$q% z;?{Y(uJXgST3MxS5n$!&JbWPIbnZjofEY@EGpi3uz7__8ayIx0@}FtX3h(se?I%aO zWfaaUjcNSIvMs!p&O%;Y60(;~HO-Oi#)Uekh0`_B^%LB8kW;he>pNb!F2QD8UAP}3 z$v%C4#J=wH+_I++rF~_$25~owcfFz}2eh#F^&5YvMFkakRn4aRO~eMn&EpE#8~5I2 zpSWS%l2p6@8m=I7v(=lsKD^coTNZToVnqCc5WFT7A*X}pUMOqispT5mEvJsD+;v`NkiE>L7a3q>zIJCAF6#%s9NwYx36z@Ss|SBgUDv`%ueqBfB=8BMXDY8JHlCxx&76vT-EK)ZH^JuNO-n!3&7#oedbj?x9ugj z+b=4af!G_Luz&MGFz_yEccr5x?|@~C^X&(qOE}g8PIjdM;}6{W^eI;K)b?1W0LEk{ zR%zBu)Ct?77a~UQx!B4ln_hN+(*1OFzsKD^cf_*T)42irIm#Odl|LjBfE2Gt^dL|| zsDUA-MJ3ex`k5zHQxl=hG00jvWH!`7x;tSM3gcL0b|~WY{HN0Hzxyc>r0iD-A6+#!Vowg zrkCo_foBjXAJQ~i+w2@bd0ikhS0&Z{arRcA1o$}#KB85`$tAF-*Pdw;F8P&#Dvuw% ze;HMO@S(OPFjd!UV_Up8p@WN6-aUsfgcOT6xD_-ngJS4oMA8s?(LnB#>g0qy#a2?H zOWYV?33zNds{J5obFsT)I@BL+vW2c*@*O04K5y+?t1GEMUFd8273`yGdqfV~6p{1; zX?JUJ1GhHV(mvy^N$3``*#{}F$HVS*W7xcMJtJe6Hf|crLl}P;A+)h6vOP9K%ki`8Q{oT7LA0;21|9zYXND1f`jp^GVp~_`{R%j- zNhj{u>aB4(94SLbrXstzaAWjz&^8jNJD=CjauC3G70w?0XmCY0=JnK%1YcY4ycBFC0MC zJ}dob=Iq{a>UF-nD#5T9ys@dybhudH@fb|f-pI}z2PX4D_^^uis#`SaZC_be_{PPQ z&eGM+$-Q8oXb#LuT`}Db>J5Ks{zVX4j7C)x3%o&1a;U%~TvDSuT|?~1>omWMu?JXT zR?3+HNr}XQk$crT1cogl?c(`B3s{4wjOGCwJ$aZB8T!Ho%tJnlXNA}3*&zqR;4ex- zw_h)}h5aKY^-c4^40y(^B!->O-H(fAwOzG@LOQPMf_>ime)3DF=7fY2D!d$urM=T} zCcQqY%)o6;g;nXJNH^-+W7nQkBbt^7__b?^waqJ;5Z&h{%r3AHwwh*f=7o(=UESTA z&&{E3@!%$_IQZahXiGzT%$cETmyxEG%Q)Ks=SWm8(&Nao5A0A&KG9KtZ)C(POJ=bh zEG+s<3t>FPsADPn0hz`cTn^=n{?OL0rE2F^4^mPwqcj%o`OSdt){FiZl9QCMoKYdc>n4S+q;>g;z0mvm*1dA9I*a!0!OJsm6@4|3sY3 z)u0(m@n)q?+O0e1kQ~vhL3;k|w$%Aaceye>$mCR%-6-4Lp0sL3`}VbLK6Nb|Yocyo ztp)<5b{7o%Sd`ZwEF%9op_9B# z5fEzZD{sYI$*EhH^nk7M5B&c#^pkg=ROquHoCoQTPxJXd`ooii!=C!{v47y_Kko+a z7Ys=I3(cO`MhX(jc_A;1uA_MC?4hYzsRl-SZ>q1(9=^Y8wduh$LW5;|T< zih=V%&>Y-AB!twr{FO-zt~=VGNA8qVwkIehEeX;}%uc;NXhB@j8lNg{iW6~_DnpKNeO?h7e6&}=PYLX*PlPYS?#`fyD8`F{kYN%P6$p_NH#J(b1 z5tbiUrrVIzem|j~-mc&B_r@8C%22VIxlL_?O7W@FsfWJVDSI>`Ncwxntw9 zeRhViQIQ$U2HzO#eyGb8j@~Z}t8C<9_%iiBU_gj(^#9C$TJn98>aYIOL)U(-_J6~C zl=lNI?Q4>v(I6S(|;wYO1@`5_5TcV z%^=9)id@*=DX#aCk83tZt&Mn7br&ggNBpVJMHYjLi^Z$OrK_byt3~=%a9RC?s12zn z)^*=~>(%bj25qQt195YIx&H-r2Q5Y(MmNBr1}Od$0HBPn?u&rdoH?1@-Fz&&{!AQn z-@XXf>N6*Dx|@z=H=c=iJ+$vH$=~_9)Ib0Ecjtd};jBb#qw{e<2PHh?7}c6zFHcXCv5t646mt@&DXZ6L(37JP@aBoNTZzA z0;u2N_TsQO)h)yid%ec@-ur3`yp711U`tE);I!*sNx|lkvuVx<~YWtDi z8(Cnfc9MICTKg;EYB!A?wyUqGR=>7n$YZ7_Ca{hi@zTWXtETDiy*o@kp7j%d2C0%27_3}xK zxt4}>r{K6A#JpU!5Dlm3=(xbNLrh|@C)4JKD8Kg3YJYV9?v76%DtBx}`d6C$!TO)wgM*P)GQ$&R^MLtKjZkl|DxH}z!-`GvAE8C2Z4p)!6O9wYt{7|mlR)3-&7QQ3FrwZVFLv2#Su zH~7-DxK3%Jr#eC`;uj2)R(HeVkdn3+Cpn5QkepoXj~CtO$Bf`3%F*7`bOgnzY;dPN z(&q;bBsirdN~Xh(Dstm@mN(4Ma}Nd5a7T9pW_I}J;{HxPYNMt^RCcR{pJ6d|`NNDpT4k=0TyeM_7E zXmpi{v!;A9CJFKM$t?j%4IuPD~iR4usW@=Ej}c|$KL zQ?Wlt7hv)w%`?j}$0By&kAlf_NzMzi3dpW+|1U}v{t}_VY5o=>W}<XOF z+P+p`YFL^yQXW32wvIkrhu^Cz?~0TSEtj1?{{a|rY65cpgOES`_n{U7UKM|S(J}sm z)_(l=CkjHtX}^zF`ZPg$t@+dcWG6R-W5L7`qe^C7vvFSNejPlm_ z!=-xker4-0Gn1Y(qA#07M(dFX!Stn>MFGpI@W?8ignchCcn*RYMa#>Oc|sTR=bS|HyGdi;MC zS(@^UxyB@u!RWnTkh=Y!GR+Q&lx=TBSwpdPKj;D!>U#@(=;Vdp3+CtjfbF=zcUfHI zLnhDtp1&~vTmFvod`JS=Ssr8%H8oOYiYBU{31KKNkB3Om+(?znnyI4ZAOrAuViw=t zky}|2%mIa5bT$B5}T*FJsk+oBW+sodS$*bkLe@E+AQ( zbQl~fZY|Ey>Z)Z;$=!sT-_Buo>)ZFZLA8{0DdUf#eX>3!qcl?CVJ(ltgZPmRYRZg@~SjM+qajH9Fm|?F@T{m6BF#4WAd$O-V`xVJ0uoT8k#z@n$3AU2Cv~*7w&hR$ z2m&=Ka|Pe?Tw`lK2qLlM z!EWAzAF594fVsJ+#xU&j#qfdLM%yt+r)~yek7%NP5kQLXc&3Y#GOpk+sPq$BfED1l zJWqg%8Mm|{laf%G&LmnnS}etNDEB{^e{8YOR7_7Mvsi0UG`M;tCFDwY$#?h9U3wK_ zsx>~7;u+#XLNcf^k!YqD%UcuyaI|vi06Tjm>ek?J)a~4n5w<)^>PiiQbD_k#SHKKH z$oDhmIUo==6eS~A;g0(f$_T))olDkm{De-87%>YIp%gF8%!sE!iF3_q^4eb3?eRIz z!?oR~qpPF~C{Cd41=*`MAhK99zc8=aULYF?X2bs@(>)oEJPYtLv~A7sXx(Vo2!$In zxL0H)Ws=Q!emR8hqk0uHY)e)#Fmm^Z&k@{!5ROYRfGavTV#P_A*h~eMdOC-oP6+c$ z&1*Z2TqXZ8{lVDonicv;zu@PQG4^W5=uviA@qS7}E9H3Ix<%iB910^nFg`TZN&-!R z9eP_K@KQMET7Zz0+9$Py+WE0Lp{O9VKO?0ZE>Sgl?PET8O?3>3!}H=8PXP5t)$A0J zV9!EJ!u#_y+avN_#b@za59r2i6+XL#%>i;Qu0oi>-MAtDLDX2;0=iTXRmzG-Dd>sv zpx3M+ONtlFKTZab1?J3^rA7`EsJDmeAP}?h{T{Tx-1%RUAkka4$~cKxsN4b1yNLm2 zPjZx04V{&!QKly3v3o#1VW5^%!;ekI;nJLY6m~uI!;27)HdZ0xF%Ed?I4?7$DmN zqD!jni8jtH_(~k${A}@pLyCOt>8_eg`(0H+Ri`xu`+tGwO#qofU7T5y6vM+|!==AM z4m#FR;**0EP9fzx=Ca1_b<}uaQ0mL(pyBMTJ4D*5Lgmt=L>a6aL7Ybj8=_IxDV^}5 zPHFsX#m}Y9=jWv@%-&H?7v!m?FU9PTxKlIq1+! zi&OiWH|g(aCo0hAb}&70xxvD%4rUhMR%LOxfmC`!fZruu`0Mt32YH`!j%$^fOO;b@ zy@SG@e}kZCV>$hpjw^ZJ<)n8gvQ@{K8R;$QR;)cOs9IM+g)7}&>7cV#Tbm*6vK&hw z#a$KeG0u>5&=HdhPd$=+JULYlZ-_~~npbb=RFKX;Q1QjC(#OmtchjYuJrx4a>38va zf7&@2#GOqoIstL zvf$hPsDxyFy%K{lJ^BQ0Q|3@xXq1M*R9+`pmppD?@KsX+^Ek1NooY^wZtIT-tXk@Ebh}T#OHw zKF-&bHe;OJj>wBW1Cf_?-D8|oo2C8J0uFpl>YAgKy0Q0fHUsy^D6ri*3rzVzURuZ4RKAv*L14bvN;^}TKf3?2>_MMIXA`;Hq z8z)fv!?Of$K}o_syx#tCuiSvpRl0oE`&wJ&KwRdL>7eu#v_n+#m^iC?+ zPR*|mU&_x7UDVZWrRJq*RNsT z+hL~_DJchHvtk2G&zCa88`IJnBUr4a2vyp34U=U*DpgY$i`5vGro!(@rf>D8wJ_M) zP2IkmnRsiMr~wx(-|jIMHWPO4I-Hzfch5igC&5pWgW3sf4lO(!`kU$# zzGR0g>{EgPif|0Yr37|s`|odv&rQgK@nSI&06BlhlZWGm2vCAgQt0Hc?3WiKYH-s% zx2FkXl447Lg)-})Pwb9?^>0niyZ_#a*T4~se~%8{uqhagM{in>#|MY3AK#k=qt}hE|8C+y za~!i`E=|~(sZj+TTaVof`Cv%drB^g`)@@(g%XXs6FFtij zcE+#Nt_LuFFiI!xtY|;UALt7_ul;}Fv&n`7ki{i<-sxfE_dO9O@}V?j zI5O!TEA+66G3DW+PmZqCY;bjr)Tt&1x zv`QMj%8>o6Z) zeAKJiv@4HEE7;69`wSd_LONS$dS*7 zw447O$GJ%GWDX)!uj(I6bjiiDxybN*VTs$h& z;Q|3z07AE$F&Rq?SV_8PzFWXYN`&0;E0jdaawUFOJhvopNrG5`P)puMQVq)O_XJ@x z-$o(iw0I>fBGQA9;{4^2k^VALoDvop9#1AME=amH5-UQZMOf??JURt<%usx3ht``W z2bGh!vVd?@k}nUzfd%-6URZC+N0yU$(tt2zGK{;Y2@oE5u6n+eSDpsXiBZaLHFZJM zV(UZex>vuC7{0y&xQ82_{fwvyX&}~iudWhQ?asf!z9*ltTR#P{uG#G&l;70dfJfkt za<@m&@`Sr@oVT)+M=szOWWAAMs(D-;JU-UvOX3DsOC{$^;_3WjyJ|eYCye!@|~-Rk$4((kkR*d8;4RBHdMpzKJ>#sAsuip#T>i5H%$z52TCkUt>sEMnt zgBbLC#z?=og^bSn#6Ve9( zoh}+P3eQljTF0?Cx;=?>db4!;W7HjgR;7!qy(4g65A^%Pue2Q}6iT&>TC~VIqqjP} zX*&H@XE289nv=7coH;JB^;Sk26f zbW9Nfh|Y6{9De{${iNLERsh^GzC6)pYNg?<7#TwJJ9mXg04g+gkdqZJxIHvL-bEC^ zem-A*0=uXn0?=o5(lWrE&U;pM_^vnLN=`$f)Nb6y1t3Ke^bUkcw>1A3`5`S#Yo|52 z{C;wV5PMw935252x`5;+;Ro{;nlu8C3x#kA;IH8y8P7t-H302+Lt0+T;v}c*Qcbwb z1rSX@#YliC#GCvVocOqAiH+9Fu#fwlIbs0t;)LXXz_RN}I}P>Hhu`hXtszD%!3eTH z1Q1h$)jIymp7F27Fc+kKxN{q;wBK@~!+=#CwYOmn_bpqCADv@>a;b_w_BJ=0t$z|F(5bEn!(u&nh7 zrWM?kB6cl+^@bv|p}L(&7)$2)mLnRfwP8e8@=?y}K~Vzfs)ce#*{zSjcYssSI{>;R zg3K}`F0z5>LzTk9Zk+#go8i?)qt15Tt@<;Iw z-vcD&=@nY$8hXLjvdZtUw-vxa@XJ&HM6&?ho7G3J_7)=L6?3R=`Zg~lS35vx2=gKp zG8Xl5*e7(ljjz!>T6w_|y1R~%s~whY6oh|lA+0307A%38RpjW?CqNH& zhy>f0c*K?4p>k(vl(!?1mstSnN3S7#1YLp}KB{eSbl?!6eSsY6!7hbB>dPzu^k~s- z>{S^O=fN*1VCxvLwOPg$LtC}lK3qZ2z>lUz>@5co!J}fVh9k5f$J{q~^_beugs5?; zzC!z_*V-< zSjvg`8#ejf)R#}LP^#39PRsTvho_gfk1xy*?jI0{2tuOJm|$!O4lnav2n`F5 zh>VJkiH(a-Nc@Ta+nYkA(HTq@o5STL@skBYQHoe1mB|&UN>y4q5gD0TOTN*e(+L~A z>@uj&Rtu&R&iak{{AIwnwa(rDw9TU5e*OLbeuZB7b~m+VnD#$?J>8cs-?)11`m3BD zZhp5%?#B^7|M-*c&AT4?c?E^~q8*A$ORV9IP+3t`{ko>Ew!X1p*`(&C7OmdCKkC-) ydw2Fbz~Hd+2OV;u+exP!PxqocJf2Lmd{$+bm1~O2vTaUrWm$P;$!X3jE&~8Zn1K`k diff --git a/scylla/swagger-ui/dist/fonts/droid-sans-v6-latin-regular.eot b/scylla/swagger-ui/dist/fonts/droid-sans-v6-latin-regular.eot deleted file mode 100644 index ac2698e85aee584b0f47c0cccff8a8b9b16a5c66..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 22008 zcmZ^qRZJY-)3BGtVR3hNTio5FVS{--d2|12^b95ez95C{VWNP+?Y5CBSQe}RBK19E^E{eSWQ zNg)(~{C}1P(rfuY`+o%+AO&y-xB%<`mj7Xz05gCyz~esz6`%pI0@wh&0FM9p-2d}1 z19blLxc?Wm`;T}2k7ft30>J+%Il%ls&h9_*e>vFyQ_lbR|6gVRKtfYi?f+H*06+u2 zRsr})0V+BGmSxPDvzQ6f^)HjQitaA1Zj{;r=FDsv923fz#-h) z{H*jrO?;xb^zvtyVI5;9sUT2`KnE@{nfrUC4hl z()Hv{t$sUkY_@_+FGhbMp-@sjTz4vULZxPNx4O`2R2%gAt(s3Bfyr-Malq$Y&12u& zxezl)Z>RG;s*P!zJ_3GXHDLSvTg_WRQc!k>dyUqtVArJrq5VkN1Tmv+1YR;BS1C;# zQdlynGv~Bh4I4=(=B7^O{>60@Ec0s?fu=Dubk-8GEKYPHzM3+ju_E)ai&Zz$9Ii3egjVY%oC&|%0jFUL0nwgzj<3l4%-UQTqDgTP3XUqDe z$r@sC7^53S;%}yi8=e4{JxvsttSCPv?BUE=W#*{>Maitb_%Evnu22fmdL&K59cRa5 zK~8FZ9Lv^XB8936FlI5tf5Hu>BnUX%EviH%^FlHpR^eodzEOv(F~sY|+s1o?%^~Ir88|dR)kOp`u5u&-RWNlh z^0>g2JG8vJ;?CCXC*byBTx1z2tI%FJKK3Gwt)U!11-+#mkyVix^H0_gZ-8@{q%l?O zta)%k8mD99kO?`{I4aQaJ3cEo&#Wuw5NmyIj)}NCq}{`jOS8CC-)b0b_%%HX+SC#s z-i_^axGmvuO@b^i=-RSKf=`+Bp;QKiP}po8b;R}qR@_K}8(k}qhaeI!w&))-L-sE| zx=oL)rmyuAcwxy1aIB;Qg>|He-g4yc``4f%D@kTdy)xKHbgU6c4r=^+Vu22xeh`Ap za2meY1-vCiefVPiN9nhJ*RjY5l(PY1;(aeAhhfB?wn#h1XEAO9=1I^ep-_ZF%!EVB zWw!XTMa{&gNLn%kl?x7iQtUNBQe2AB_*NtFsOzBkH5>o|mryH*t3hQCu~JF8FIq^i zdpY`cx$;{~oC~2Wo-y|>Y#syiePhQ*=Do>e{qG?Ee{*l2jw#;h#cnk_pX+$GJHk z`f2`;%f(xyOnd;B{lWokgD*^&%*AxE8l#x0@fW_X$ysMlkJ@y@x^gdOHvLDotu8yt z%}ng>zFL^dAzTL48ld4gW)>4`uQLq(1w;pIhBGqiu!MW~Xu zoN2xgX9K26o*mJXb9&oL9fkYoM=-gEC?db`CyMnNCez<1BR#twIv{gWJHpqSDNRr5 zSlMW3(`aa8DlMJcVis9JSCJ3WBJr2z@(<_p-1+b`0r}>CCj^|M94$`MhIVAB9it77 z8&mGqL>ZJd52k*-F+sREy+(tzt&^Bu@$V$;t*_Yv&FH$YgU{dRv12ZfX;XT~M))Jg z;w`dt^OHC7i#K0hMOoLb>0idgIz*Rk0$cwmqsQC&lz-#A@QTYHEmW&BJl39m2+Cil_Wssk@OE z(mSWGA-7#lHO-IBmWo);VwRtvg1%#1+CI@fen_L6bV8$J81qki5b71KPkzwI&)1J> zV}aMJx`-Mw6f}hS5;$l`B&Vn_)r5zWhQnXJl3#RHxv!fk$S38wvZ3$c5{hVm$`O4 za-b>c4=F;!vC@8&$0xYL+#=F};Lt@?hmRTF1Tw>tQH9#Jd|xnM`Gw&(i-DFUOOymB z=Duu}0>4$y|5q>xhFlN81E z$vTGFB@Gk`rx=2MP|U;;l=AW6$%Yho@?k{4{Q2SfUPofPiU$1n5lk>pd2EdvnT})0 zFu@!%%kdh{KtDl44`n{NgXLrwQ|2T4Xo$u~!A0a+$)+^?GufwlbdssKa+*wQ)g!)Vz&;>n zHGj!80DC;^HY)zL<`+)A&I)5!i3!0Y1DxWv^D8=Xn4AwE$6Ylq97%tu7 z*34j zniS+Gl^`jg8rKDV^kuM2h}0HI*&d!QLKIXOJ$h99gz@2L+l_2J4tPn0@_+Mj=zpbD za|UkA*Ja>4U?To2KRx@#pi9F;7;0IufTT@+tEVe1oj6_sB99O^NoAJ+n=Ze8acV{U zR;wy#RuZG)#f(_N=HN_OyZQYh-+snRzFwW;XygZ9P6I#nf%xm*pu~gOpfcu$le0Fv zWwXTz6AMl|!AIP`16xVUerrrv#XCsBRLi_;aOtTeW-&&zi~>Co4tI<(*b*P4p=5__ zNr0i^MI+=)f-fU|;XrKP)zaHrO3wj3Xvb%P6T``GI!}bM51#JV#EQ79r%<3z7P`zJ zpIqm*{#{iyD;8U9@)uu#UR@Wwmr9yXSyWSz(on^)&kqeVGk|SMZ@^-14HO|(prwsN zZii+0RsxlsQE!@;?N32Vh6_Cws^B`-W&1a@wf4<%y;u1!X(J2ikE!z+S&1DbnsMS` z0~R{@e1NB_iq52h94!u7h08aRp4#7$zLPK+r+?{|*V^@Dig8?#-4<*r!bH`X(}g;U zW!Hhpd70Cqd9KP`8Z<%Q6e^X=isjG>F01YgWArzhXn`F>xsU8#jo2clEABU(zp5b) z*f{K_%|Xb&-1NI+!}sWwBoBvAV0iL~G2sq85Jp+6Kn*^2_wGixnl{ke#7$a{b#sOUgEr;u-8 zKP$|09xVZWVjb)j(#$!`>B00HC~Z#Jvo529Z$F47l-(7xvYdZ8-M4lm>k<0Jq2b=A8m9_J1oFwliK=4N^!Cx zgx;l0mQ+gM8y4hJ4rJ#yStZ6^SEp|l{Ie7m@s(dAC0i&sfn5Rpsf`9ssW$|}3{^=g zYmD|3#b%10ei0>FrVxn&e@L2}{}z6g{wGUt)CCk25$K_jpdFDhM=*Uqxwd$zpgA*iL93Cpg@Ku-% zQz&-tr1leOs{f0Z z;4;kkJ7#we%Y_Q=FtI+L+-sV1G8PA|Su_4~-ihbqjb2rH-uNua(4m+#XPpJVz|xob zARf$Ugx$`nD=8i-Uu2fbtdyve_H=Rz9EFP1FXm0DraI#SxR>Qlr8gI+*0j$Ue5!o+ zCFBI7m?^_zPXo762PD&Zz4>x67;`YD2$%cWvpGY}DVarx{N3sjUzd@T8N$5O6;^=R zrd;x8M#Vza+u{ASkWIzjd{#@4Vq~64_fk{{P4mR33>9goY$*Qk<;>Bkxy$2NK@f#2@ZR49 z0>tThS3V|g;VGbv;ipJQ_r{Y&s#hw$%*QO@?dU$5jV72xfit{6alfdHeK4L9jP(qBk!@^QKR*AmyGj67i zz;MDPGrj^12O(l}@ZH~rwLoN+MxF$LY%Qp$s1-2yA0fgV^M{TREJk8EN` z_^b2JUoKs%QmV`%!O3_a4zT|9w-syc7Z3ahXh7Cy5&IL*_I`gQXcabT2SB1GUL*kh z`^40>bNxvtU7(+Aj#QH4ZXE|b9t$Y%BRgk35%4w8hVd^b`!!HS8>2$7IFGvE_ z2ZHHCd^M^=@s5dna{y^C4p%O2gEa35&v<##KLyS6vN38svj&Mo?`qSj9{-5)ECO+P z?~~}A5MMaoL_T!$edMzU_;oZ>VobjA#{F3OyBGW+K+2u*PzHXHt`Ux(Zg#LchPXph zz&kWT3@iRPpog&%E&^31Ug|^xk}bZ%pzUjTVUf^(k%tolLlnyq5}7Ze=?<4rls; z!S!k&tj<0D={o**MrthRR!MObWMKlAmuXwxr#NXnz0=EUNXyK0Cew@!#PnCz>QTq0 zy_WptYMoLoFVxw9>{yWur<#BqmBAqR4B=A`459(|tGqsF|CoRq=MTk!2>AS#wVw?8OV*XHpxeB1a@I_#_?q# zp-oJI2Fl4aFTO83^;(e>=+Y-n@Oy+TQv{0DGr~$}>rW0e>Ui{uWi?y&B2vRGEI)9o zIE9MtJXQw|nMw(-B64sCavp#8Kf*8?uZ5_i$`I4_RngdK)x|L|ov0IYf(7sNUl>Xa ze2+S;{t;7>iRS8P^0mEel@pKX0YN=7$&8Wv=GXKn`rkVwx9GHDL!=Z^Lv4wmrL^Bk zDz0?$uL2@ji?-#o5lg4?7iCGn69gP-Z<~q%y%TN_o_bDbIszEY`NNa!PegU}w`DwV#XPA%Rn~iSRuITQwZoZn*wjz?Fv)6sH|Tk0$u%6ATH|<6;^r2IYu15nS7EWIHYdH&Y#9-@b|x<+TS~ z89EHt?5?$a;c~GAd4=oMpwOiYb71W}1cwueE!Dgn<*7VaJkH07A*W4e zDjUmc4m}8CelJAlWS9^me#CT-D*dItT~jBojf)(yAi<3kI#WCOfo!G@(RBkS+rrKt zE8{mX#n_(i`SGpTux7!neuAKi?S+3BaODiy_yx|{z+p!?R90TY-e@U5c*9yJU5AYa zv@TPet+JNcB%m&QfJ>Z~CPFYZHR3NO7`=(qgRQk+i2F_qQ*TRBsw>aAZBy{Gh*x0I zR1ytYC2J&Q2C41!84G5Z-J<)PG{%|OB$*?upc}kURUqPw$#l(oG0{WWHtz70Pq?=Ae6C;xg#7k zUNK>!P8WIjQ>k>MBS%M#OG!B@;w0kuWoE2>;6<6yG8hNHa{kLo_R*q8f;pF~e59M& z=V5gBeqyCBd82bi-5d=ZOd<7K?$sz5Rbj|`nKvKGo2>c4R~-+%UtlY45qJ zoRpHpPJP&pB4?9i?qM(nef>wC1KK->b`asl)MMD!BslQN)&7|E6xv5wf6 zuX5P7G7lZm5u{s{t3Tzi#n%-WG$T}j(u822U!g7q?Gm0^txN@V^?a~r%#NyTk3brv0 zZn^>91(YS}?|<#~jBOWdU>Li%rs%qbFC|dHR!Td$ClEb$VT49!)Fj#bdHaDa(*_K< zrVE2H0dX_Lm5xuD(f3gzGuobT4l$K$M+r z;zcwSvHBpQA`3y$?}%uzN!u`g3lME=74WiI{R;*mi}bb_h8sVMCs)X(f=IqyIi) zsgT*oQb;tc&diZ75fil znIe*VMnR}DF~K%dqyEGADh&xFG07I5Is)VHoScoY(Rnv%Jn_p}&M#HDLZjnZuxvB9 zQ9f(tDh%$=vqvS;!agR(CfrkNd4FkqgW5BnyC4m)ZnoZSlI{;H8;TP#gx8JBKqO_RX)wqBOh zkGg*oRUAdx+EzmoN}a_>CejXDwdsmb9s(aap66afsBR8(4P^yttwe}w+8VhNIb`@-Vcv%+GL ztwJMw6Y27Lg}Q!La{Aj9|K3}!Ro2Ks47^4l=7a1z#d$mrk*8c{Gt@g6l)x!p8#gbyMRo-T=F|T6@6K8m^EO zf|q%=4G2!|Fx~JQqL6Awb9SPJEsf$L=Cl&u`OmMa!nQeX`4p^-+_Ly-;@wqdJ?-j? z`i2V^pv=LVRcxCGOr^%fE5yxA-fu^R22#NpE8;QHND@ErzyyV$l;QLCXaDjH_=Z#_ za2JIOhSU!z1Rt$exkn*oOIVh*m7mhyY*mNZNTIc|D@!DDxC%NhXY=`1IjXU5r@nSc zN@lsrlAPF^e3VrCq%m^^CZ7@23mkC=8Njmma*>;Y6$+kh&LVxr&D`=cewbkb+O6T5OxW$$HiNX99w7b-@`J z)=(DXSi8|ygboT7&rqcN&VHSI9<-n&h;DJnNM4(_VJW{l)VB;JRFV%rhnGm7b_xgz zyx?qDT2^GJN=}56aFfnJs^LGF`l3cV`6#%qI+d6kqP?j<>$H+)eY}o7t_dvT^ zhv>uE8i&*4kFYK3j#|pcL*P%(7F5iCKQXTFJ~_U6JF&mp2%Zk?s$7x8} zgl<QOIFvYC!9fEYG3yh_5wCb6`Y&;>2-k@ zR#%F5S=8u4f>mzy`cv{?Uo6r*1q<0^stcy1x7nwtAVr(&!8Z@8&BUKseLg${C!zlw zS8n`2C*)Ih($aUK1s{_-tK1)$oO`-x;1MgJwLplNofk1GQ&dfjDU9M21$1vB0tOV0 zYaMcBN_rcAxwa-@q`Gd4>tZz&t!@aarb^xOJp{(|`0 z>=zCQl zrnU(#JaMG$)Xv4Unj?yKj(*h-T1)Z%o8^q_R->wju4+`6yDDD`lxI*+h~C8G;V!zZR8C2fLJ!4bDTNO;8q&1_{ymKbw!ciq;ezH0oQEL>ixBrypRe31>!EzX zApihFPWl(;A0s1U5eE&#aSeK2`dtUPCn2bl47N^~uiq`HJ!TPk!cX7!%W@c!&x=8C zt>I$v2-ttw%C}dI@%Cn%d(pEAk6af|;97=^Mebn)#ii)8uKOb-Wu{||f4k0cV=4^V zpxb3?6QawET?wulkbq-xfy$qt9t_9A&;k zAN}OmmelUqt`gTMMds zUBoiXO-Z*;oCzIjN>}giPHhga#3EB&icVTtB(TbID%`F~fIFXzyq-72%t)2X7ID>%-tYyirBpp5sMwW`_hn+P)y8hpFRp5KN*~ zZV~_=?>TRFKM%iA5P{DriTX3|EiiLDLRiR>>!)C5xz(GX8X!glzigYBVh|OR#c+|R z2!g^-k~V?;-%tVR0nZ@!&lB7WHU_P-tE+IUROdP+2RE%Lxe-}V_OQ^0UO_b9@d3vG z9Bl-0SSOyAKASAPabs-J{`x^DSxa!DEOJ%yXOB@%`$3i=F(F%2{iZqwZ(JB@J1*Ket}ecCiL)($C?@0# z_6^$MmY2ULdL(tdvP0qLsS^Yb%P|dCumr*e?*l#5;8u`VP3G|C-FHXBAi=SE+LGwifuGji*9(%pE%_$izx}O(8GW8?73@~U-m}XB|iW4 zp`s@(2k2P$R1IbX7F7xq#Er=5$e7anxsb6>gav^dLl{C$Si`T$`CcfIEh4=$&5)y=e;8?at%!3{Cp4zn z`Z$YP$+GQIOO{ZE@-gIRV!u#Uu28P{9nUx{#fjZ5UoRFn+-Ov!^Pqp~Ru9=HmRXwZ z&CR8VPftiuDiZ%7#g)ESvDefdt3k}+`|WbDve|Tg`&!09Z+KekUVNVQ56QYkW|)nn zi#P^e4@rH(cry2poJY6X`Mg=_R7`wa7Zd|4Z|*-Vk~f zqfVZ3@pS`5FoTRnc|QZ4;+>B8c1WoQBDFQoSUs68ptuTkq^s<=QhSFQILQ+n8P+Ss z*JfCvAwt%n6p>Vhe`a|A`riDq>qLQ?`+86`T8h6us$(LKUsaw{VWXo zb5DJ|E$xzc#4?o_KD1uaF%JuN7Nc@CX@Pmm4{lLrdnT&W|}DhC0kO zM=n_-Aiwn~waJ|gpEEHsG`Q}FKZSdXMZJdCAWeHAa^7^yJ@2_8H7oJ|9E2<)bJZ)* z5;VLuTtbxzjGPQ1qKaw1OuZ}lH&k*<`pArvlgII9xDUiX=i5fy-l<&spXKc~@q;2U zN(@EJ)6M1ZLZPG`o#YT~8WfQ&8-mggNYq!_Pw+XmsPiUc`c;YH z(x*fV<_#6BmkDBt{AqUixI;iF!TP~Q%jEP70vd`E8k^^dNd#@rH4HUjRKA-a-7;)~ zJka(?Xdb`J(Os~*VllA-Wiqp0SHwHjIRVL(i%# zK$J&NF}bSbXT1soz40^=E(XQ0nRNFS(!RpM5cE}QLNuyKRRS24c|m59TLV@#nB5z} z`9?WD5C5s%#439upk8Ku8yTAA!K~0JhzfLw3uzcp6^ z4f#t>!fs9=i(;m8Zfq{uv;3tv1k8 zEztaFt^U_DA1*tTJu{ACt<{GX=I~iTj@V<|27j1?LKlrN>2S^~4UkeaFTT+JoVl5; zG+-Ld5JR6U@hy2R#%IapOg;73=d?_47fsbx%gm;%J55^d;g>2Vo;vgeA_m%ue;r_G zLWq`D!^`&mNH_GEG^Wta`h{f?H%_MSgYdQR&t^T8f?;58{f-et4Bo?8xZ2ZtCa^t`l+-0A^)iuS!lk4;% zW8$wBQ;7MW>$Drh6^XOVXQVVdQxD(J%!#vDnB`dU7|)1RMtzH%kPczaiKshA_?u@ziaNa4m9vWgPiXBo7HJAnB76Fd%M&IiHZVJ?KkRk0vr zWi15l;LMcfd5&wSy^}Oh*$9o!KZia9&uq6+!5KPm{1YLF=%F)S_qc&=WK&pgu_=C1 zBe>U-61!LQiZ%+_d+It7h)B3I8(wo@^q3!&{jxZOs13Zq9K&9estmUc=~Ny&ByF zZ!pa~hC3%u_DZ{9)^3r=O-Cy=x^aqc&0a4)p3)F|p36LrlJ2u8u_Eo|!|Gm> zAKY(G8i$iWGM9FZ_CJ zwIjAd1c1iz(ZjW$s!6x!Im>8^OVx@bM8|d1ftX`%SBUWvV54~dw9lT_L+N3A)2h0% zW5f;}J3HYemv{8EYWEXzW4AnJn3qA%!bMb>>X;CM`GJq!jit!beL5TFZ_~<{KCvZo zgg^^BO@m-0YSv>#p$6}}U!E-kkjbdD(o?sP%MOZ>gsgncKI4E6SV8iiZA<#`_Eta~ z{gemRyu@$uJXuN^xehV`Z4$+e5CTvqC0oqhI|Qcd->7*R7}}4S z{>YkQA-T9FM3ErEx63U`_2;ek%0AA9nOF`EcNA#X$~6CRi6AAT-xXx{3Bx zY>P|Hk04QYUn_!+e;}eYbXCJX|4Z{6mH_z``?#@xAsubOT(Q@!DRqy4=LlRHzuK{Q zVtt;dn-i{BG!kd3yh#`P>4>SY2~5_}fOds~BSNBzz!Q|@r9)leN$s{gjLkY7&y@f` zoN!Q(BNFqfK|^g=H{YgY5#2jfQvMblq6WKGLCDvjhwi>Kyq&C_>djj)6sKLNOwl&UrzfT;hc1Pp=K5X)*Y| zqYjCBPSn;EHcs*8cWh!h$$Pe@0z6sF1FVT=*eug{5E_}aH;fY86|ssLSbKZCya%!t zJ5WfpG2?)&lLG9Gw9e?3(BufhIGq) z49JHokHKxrgU0)*boj{J@7r2S>m!^&L6Z$Zy!y*x9&MiH7i$0QQq($>57`k0YD_7p z6L)upccac+p~e&5@a$eBw7bI#CyQWt%v*eSmtVX2!+K@eu-3sw-SQ4zUQ5>ighWo+ zi8tnuZ-^9b6gWGu6yDY8?^|eEx{O-QB{{8xmo(d=s;`Cxbp@Tpw^QLUb9OM3u-~kx z#FA;*9%Kd0-5iq=nOuVC7mx3sWr}Aw)x?%Gn{h+1cBS?0&*7EYdWxcDeRjF5r;_g( z7Iw(-!4$TZM#zU2E6mR!dpU$?=Rc^=adMc_Z*xsXr)Pt&&=( zLZ#NGI%J$CY)9j{^Vc@D&p9Nx%fK+%nN6$#G4>@2*$`@fm!O9#;!S8IA`x_)GH zP|f3fSM)8&If}A=EL;I>LRjKGPJTK!#n?}O%yn=vs%sVCvl@kj_(f(<6T!i%*aA)> z@!wHp(RevjcE~!BIgDP6B`Q$c)MYhJHWm}UpAs3E7|k&8)F0i(Y6Ml2$?6@$mOh7J z21%|^QO>$1&)ja&MY2!WqBL>8utNGv%L;psd4G4WzRVa*`=UvY*H*aeMb3CGi%Bil zrN_2u*oS$<=<6=m2(^r&`S+2I9`~ZALArfEUjFK*jta%30kZXxhu}0C1DrcOYYSCG zwA5$zO3?S9vJxmAQd@M}7U>B<*~%|aCrun2&wiRo=7rUF3&A-p2G5*{>t@)r3Gvda zT_eqD&i+25KP#w#Mq&$}kk9qoaPk~hb*F*rBA71;bI{sNfjj)lSC*Ekl ztHQ0CThwZ`d0D|^kGK!jrXIhcq>5`OC@q(nBc;LpbajBV{tp4F z$Sfx^)Ey$UX7mu8KE}^*@20Q!bKG7RoM$#z-)Sh1?zSi1Q3;*!J5Rc`gc-XqrK{aJ ztzaEzzA}+f8Z*LK(RuYHWOW=vPhI74S+2BmxMpNb>iAeQ=~Qr!kVnQ;SW}EIB~IW$Op~V> zH?Aop!ZV9nuysQUS8)(%i4i>ZWf!pq#rgi4yqnlK1b(LNF3Kp4Ws zGOynzeH(!J%WdACSWVo7zPjcOez?P}YihMvavADhqw`4P7d=6V(G`;b;$Yn>r>E`Y zB=Ys|Y5&fj{@9c>z0_W>Hg`kcd=(Un2MVUu4!K7ptcMh zduEifSlBu)Id|prEYv;Q8srzlW%f2DYECLA<7WhO*qEXc+8Uy|D8RrZx5K*9nCB0RyJC}M)i>?J=5=iu zsJ9TwmvuP=3(nVXKR*g@dPQ%A4}=IwQTy@z9J5sf_W%fkkvxd%6uk53HC7L5D@bZ(qV-22a0U9aJHn>ZN;CJ6N+UqS40`~ zm;|FjP?<&Bn8vA7i_@Dc(YPe&WLoD3=e<3VLXy_ZdnI8+ZqTIOC-qy+5PL$3GMz0Q zXw8|3Ufr@&JH1dZ+M^6l&$MFY3V(fEep++f@W*u*hR;WHf}>Qx?i=>k+SQE@Y3#(; zvcyd!&ouwNTYYHhx;wGl^$Ca<<~NIlI2!rn1UnyH#=qQ#s`c`uOrRMDct|HVafiDd zWcKe3%s2(MrZOHpXO%o({`9RVT&7*aL30`Xif&4>WYePDOEx(mnFprju5iIU1#ir!vfFJCursjicebShp8 zB|d#K?<0dlzgm95E_a8dThpUrwUmA`^(MbT&<)#STP%%IA1cGrI*vpc<{ZQc?K~7O z|6L{WWhjSwr8vlR0t5*vAj|m?j&~o)I2zxvS8Z>kzqZOhNW=+u6T~k+Gp{BSCGw>1 z!|(HU19P?kVqTd~*>iT)DWQR_eU3#DpHidBFl9W9@rkyP-gfr8HB$|bwu})G64*+E zStPgm*ijJ(hqX|O-~gCPEsZCse86RU#d}FN&?A5-L_*0$GQ_XcOEBEI1lZ?UMszR5 z_jvnrWoVURqRdJM%lx5RCNk6n6+X_x)>Ik_LKD(QE{76U9EH&$p|XXuzI!BbGla$G zEo`R4TS*u_jIz4)8vL4z%E*3 zj9w_m9y3hjg9-+q@*u-hv}^=U-%m9Q@9H*HXU6y{%j56Y(|ZPM^LmUi)XQ2#WbF%H zC?TumDd)Ip9qH7J=M~q(EjKVq16wGy&897IG=n=&EUnw8#Ld4^9T<)I9<=0~Dn+`h z?+B@zi(j25wB_98j^P(*-NLBe zzwJEWdpPj?*T;U|HDVfj<;yDaF_JqbHTEoEid&NsV%0+}>PN%d>y-G*?wje+hG&p| z!*Z6_iSekxCe(fo(Vp>X603zA?vN-EJ~2{44Mu~gD_oFA_G1(u^9)t`?-1$}UR~GS z{(p7O^tDV)%2XH7`-HvlE}~K~UmN}fZZ6U{(YT)EX@o!+j7Pkj7&m#RiXfkb3t8c{ zGDFO)-S(eoYiUYS6y*qvPld!CRZotvv2Sg!vGC>d6CqzvTOfCqlmV7eBZ8%SrBLyg zCInw$1T+fz=iE60JxZ%mq*Rq3oOY5+8$1kd10+N8 z6N@e{sKHk&jLo|L{gxgW>^uTW>Lt5vXl4+!ncwf86hd_9En)O#khd7QV%YZ(_6MP* zaThsQ2FQd;LB@G$eswt6&(mni>`tvTEThq`z?8Xl?9Cv1$thDHI>?MNjurdJ7lrbz4XV3=g0p#o3CWqjjNGige zZyHnM$U2X1l_qB&>PW!fofneo%OuOvZb)#c$wab>$Ot%^t;ThcZ-VR!Bzi>+3q+}z z0Dj@^x2n1-4;A*&<=h$Xc{?fX4bEmCtZN zsobn>;9Ffei)mE=+42%fBr+Uw4bDxn%9PX*F8`)-`&Lhgnw#_?L9$LkqK6SQ@GfY; z!z{sBk|sj)N?^euO7xCZ0WM05klOhGcGdlaZ!v3&%}&QY z^+eP`Axh-v!mfIAOU90_)1KF)$W`%y#utXVf<>i%QoF~$|J2RgSVYi?Q0pY?9a{Gk zgzUAwDC1SLiC8jYXnC2iJA(W2EQfoAA5od9kNGnKccn)^_|N_U&p z%e{rF%QP?ig*!?~qoU^WW{!skd^TXK2(xvv#4+7r#uG3e4Y~6!d8a+;s#`Y09Tg$h zyi+U!_~JS0;4io6?0%3p6Fi21w?xRxhyJR+Dy!MCRqL?o)S^xUgR{9@-lbmy9D2^Jdg56(yBjeH`R-Z_nAc)>p6WmP3w_s2Ip8wDA#=Y#xOE1Bx^pq7r_U zh>JSdB!IPg=BY~DaT9_Hy*GssFbb^?xVomE2BE=$v1}W*R-~dW1Gq4D+&KUXq#2Bt zs?-%Kz^n%yj~bdDI)<7%K?=@4vY~*_(L<=TPHNB~G^O+M${K4ea@PD@zVbulEJbvE z7r50D)nx}cvXItnB04sPWflQqV#@zICq-oooM@G%6cgsK!W7h646Kp}W1paj(4X&V zQqxh%I9m?KMfVcHgV>Jnm5CI=&^iI!dV@o>V_e;q?nyP!=!jX$T4^z|KzjIXQ~UJC zQWDRds{f|0M2zD5N@Xu_5k&KBr>M&MDLey5K`Bu>+Oo379@1TO4d8EcLm`}!@`v=r z>LCSC&bHVPMTqjHVWa@kJ~tA875R*#S0Lg}1md(iVOp?pODup~qKJq|X{Ev+mnhLV zs?(9*!15Tl9(ZIRj1UjFaPXP>)vlC`Y6w2HD;fQCx#a=TV^|(fnHE-x zO?wzJiedZyIW~^;4*HTanrOa9#hiyCn zuh?3bG$$G7o$`7xBnCk)g752{a-z$_f>dx8+qIwXnMm9x=vo=89@)5Ccw~gc{i-Q? zSG|S$f~J6{nf?JkSr`^JfG}8BuQ$w) zc8eo2kZOloOSUGkzW;2ShtNe&g=-PS{05d>0Xuta2OIK{;;xbVq|6xBCOVP1KMbO+ z41CoS?R?Nl*MrmYAbV{};2u+r;lc;N)~Hr?BYJOr>`qU+8JOeSIh@fv4Kh|NdFUAa znEkWbwF*z9yFym=H|(w%JAx-Ci7v09YOPi8i&zbe+iyBl-h@+&XlDk<1Cs331mm*Y zo=yy_ER<*bd3tQxB39@<%ex4+D=-Cxn?eK%o9jmLlJ+@pNnCQEK?nGq=rx#bz~Nd5 zUT&A<5qd{@WE*Oo+f^YuX*3|#@zuivn&wjjtTBMt_0^0RK$C$$96*w!ydrmmoZ4$veC3{OuGBr%2!_JwOE8vIP;5 zcOh-KX3@sc4E<6Jb-jCvBuW}6A;#oNoWy(p#gswbgLt4w9w8wtk$-78G}V+rl;!MC zDNn#5Xy=VCB-&Kc-$`GkI;AP3A`lU^B)^9#E~Q)0e6b#=7wpvP^`ldU*M&j;#-re( z)Q6fB>?9h`?Nah!kvXV7;}?aDWR21mFSLUVX!8Ie8-XsSxk zmkSn1CWE#TC$Qj5stds9Ps1;P2D?+ztVlqD2Oq;#xnA%ip~{fQRP$$qRZZpLFMmD~ z8iaVubJOsT@HCu#hz5U2u^awEIB>72cr#+|IIue}5VS%5co-0nPd;hzx1zyV&QzKK zEf#CN@3A&S@4{(*O^BK}NLCsv3|Vf70hoB6_t?E73`rMOEU^d%10T%{0pKAD&%PT( zdldQ*$iRUv6Gzu8AWY5Jnl=RC*D6)Yb6nNgL6sOpq-wN6iLUJ`u3Al{`pnp#ap=M6T+c_THsepEipr$Z=sJO(JIT!|%f9E0_RWduX zDcS|p0QTARSBZYgG!Eh98XW2K&}kI8L771AP%{YJH(?3^k3EP)R|E@q+Reny)qD&~ z0B{vClK{lE=hm>oPQz#TnLx7-T}6R?M;?vPESnJWVRz85Mj7YlkKVf?{n2awR}3%M1!L? zT6Ei4W(s;~aNSzR(Ab~?0a&~OG7svaf;Za41tF}*fSKkM0}yPV!&7N z9Rgen?HNPdI3RXVRZtjYyRV*1-z85x(D@AOe3a2&$$%+?{(xQ%;?QL;LFn&2obVPb;R6yB6zUNS+#mVBCwm z24YEPmdQ}@I&SQPDT~-7uME=d>}FYV?iUT4-1o8FyE7Ee2m0OcNu({YFiny+Swju zAxKekYr_qzUajC6=)lj*=m|23sFw=R`E##9joh?4M1v6TYU%I?!EPZMx#(FI^)!h+ z5ZxP8#5MOrj=jSCWX%LBcc^FtMiY2ArjeA)Mnav{Gk`Ea00%<#m3bcP&va*z7i0~# zN)wIuq=g+9e)C;&kAKVIFXWJK73~iDDXS@G8u2W|jY*-SzAkqaE{jzXq>~0WZdZi5 z29HYO(#nhwh!MQLsDC#^W%(*wYrgx2keF*6c*g~OFZr&B&pb6lHF@%t{F-J z&cJMzyqOFm>&S{z%KIUrk{1w=ok5K3tIq?kYMTZcYB7hL=0u%)mZX!Tah2I6Js14?6?rr(56f*sJ zy*3r+=*?v`kz^ghI7%9myyzB5vAE^dt;vIr!|@sm_6>vQ%39xpzV!$*6+!sixu(z% zEEQ+K_$+w)$wJ(P{+mO*Sc1i+fOQYkWUB83!90U4v;`F!@{%>Rlh8QQkxq&<_1lGRTFEyQ0#D799pB&BcNi~X z!>DcF_yCbWY=O&^#@j+K>dYzafc=okA#)n2U@!^0UBdsprBZTRW%vpRb0moHUI1~l ze}i42Abm)|3F{h}FOD&6mjNmGuOuFLV(FyO#cT70n%yxG?EfuP+;4y}sR1Ktwx@f{eN1@nctQXzE-DKfO6~Tc2BBsHB zw;GAom_L6)UaiL>=a&IkuX-|9od5zF=C2@(5wJUIph}SvL7!|u@n?qG!@EUFdIqlU zB^LbBH2RA8**BP6<%PCvdzNtkekI80z`KmZ6)3fh*0A0orm zmyS)K#n^?#v=N!;O4F$&bYw*>8L}oSuSngLt3Yg)y?sr#*w6tKT&F80C}UIIL@;>K zMOG*-D6(u?O9KLOW$xcQQp{5bDcISDVm2T=B%u@A&jUmZsqQjM80E9Glc-x@1BfxVc;t<;cJbvf`b6z%8UQA?>io&iS;r=BQ zzXUKjE|_-+>bV`ZZ4}`aW-)}?ex!2^qzUK>Du#ba6Sf*w%8FV_U&dO(sv>k|kbV_y z*kDJ>Kcr2;sEe|81(G16Nu-#QjkmOzIdqLyKoyuA2g^J?hZ&^w+C7|V?nrrzPGNP4 zc1ewp9OFBd7l%h%B$$_LD$U&;mqflvNZm7=Vqz2-895=RY>LtG536sdcblE72o2Ny zsSH5ZpeU(ImrqlVIRv$&*#9W=L)^s7g|x(c2T%$_k)D5dRcjt9)W#HG^zat@ZxQ+} zU~aBKQ(Ed`q~T(nZCIle*Ot}JcdiOwzRBr+75QbR^uc|Ao$%lunS0+AQO9V2AHt9T z6~CgYw(i8H1kAQ!Dj5he0F&d!377$240XbIsz!GbiPWP&ZXoINstU6YV6mVvw8w)q zjopeOg>k4G?jMENIRCbToIYSR?|LzaY8_27|eJBEW?$jEFT5t##AT&nDXMUiA5MFc4}tZ^An}UboT{ za$6#CXj5F{ouNRG#m<}3(e<^HS zK_@^i^wBJ40mUpykEv|q;V(7rZVJS*%8-}Gaqk7wpyrT@TVoyK+m- zW;n)6pSBT!Y|gXr$z@GGMlbH{V4~myOzYbyq&o<}r5xZD%?~>hIR?-`RXv4o{>K4b zmhZMxTpF8NQT9l6scJ9H0Tf~V{X;(<} zK$qppI>Ra+U9#4(2L-y7F%hi7XwFY=jp3piiu>AvT=IQjWY-ohDS<_`&YMeCO~aII zH&CY<@7h-coq&SG=H0bn9`KN#hqNT5!6e%q?sKzCpBgYs*B6lrt$aLOqTnPiHy#e= zXpwFYOp10}rW#_Ft4t>gu3Rx;MN%z3!?2BGoX2SOu?kQC*I6k4Wq?_?)uV)IZpWU9 M4w+BmWsNjR(0tp4%K!iX diff --git a/scylla/swagger-ui/dist/fonts/droid-sans-v6-latin-regular.svg b/scylla/swagger-ui/dist/fonts/droid-sans-v6-latin-regular.svg deleted file mode 100644 index d9f2a21..0000000 --- a/scylla/swagger-ui/dist/fonts/droid-sans-v6-latin-regular.svg +++ /dev/null @@ -1,403 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/scylla/swagger-ui/dist/fonts/droid-sans-v6-latin-regular.ttf b/scylla/swagger-ui/dist/fonts/droid-sans-v6-latin-regular.ttf deleted file mode 100644 index fb8cea662b2434571debae37fa6ba09fc5f24776..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 39069 zcmb4s2|$$Bwg0`}GW+t)zA-z)Fbps-0}RMAAD}QIvI(dtFe;+rf{JUbL1T^4Bvl(@ zjM;3Ordib_h8UYhjaj>7c{c6K%S#QfP4n7jv2D_3k>P*tH-iw9_kaICW|(2-&iCEr z+;e{Cch0#%7$KyAUku{vr_ZQE9Qrqec0UL28tYq{=ls2Eb`Sjh8-%py>gROSAOn&i zbmS0xu4tYU3Lm@p^iqU2o`SyG|3GMy9{#>%$)*jivg@lF5!wOm@s<9y z%T^Cv^qz#zk0B&xmn|Av3-=Yn_2E6FShi|w|IBj5_n>VCLW9}MmoDo2{Bzeugw`wI z^OEK8hVn&rAKdQ%{GGXc^@c60huX>K(-EQ@R}C&%^!&beZ$fBoA6(<1)r+>QrB`71 zd@X#ATx%AsUaCJ>@Oy-AfbZkdwSz+&?tejDj?k`G;qMREu3x(LhW~Wy5Ze9_Lexiy zLbr|o0Uu;X5RFV|+Or6cU*>f(eVnuGupF=)voN&kC__=q>G5;COe~8tSbCR^qERgL z`Rh0ojD)z+aCc!r5PRteT~tLy!Vb!yS5aP1K80JhMlM~}d-848kVmiggsirFj~?I4 zn7?{9tJtkpyNk1Y#U76H6hr5sFT3dnsV9ZLh*6aHGDpN>>6_B`rBp~dC>@s4=m?@Y z`hEH`&Cnu>64TI+NF)@&q3+S{Zf#j;w6FlvUfK;0+z=QFPUCf9~CXoeUhGCyDo81OyhJ=yo^;)$`&gh zw)W(EZ3mi?No=o9eu9H-x3ytk`+;^Wy7E`-<&*Ex+momFCfo62f6Tm((j10C=ZpO7&^sAib4S#2)VIav^G?MD?$$J(7D7|ELP^P$yCFvQR4`9 z6BeUtr82taeG9e_;V6*EC%K#X(@ja-MYLs%RM)-G&Fx#XHMaQoz46D9MOC)maVq?@_f&SZ zx*D^mWKW}0oZh7`(9`;usLJdci9X{V$+OTS>Sx3w7BPyAel{vRVq_C z@hdGYEYRtVk+6pIdPHn`Ym)gPTNVG0pGdJ6e^uXd_uVb^_?66tq2`%u8?(GK*EP*r zH`|XNOMmjR7d~lNJ1hN3qkkM-dHY^Em!W?06WoB~KmN(8$2P@cn;u)W>XD6ARU01} zYxv0nc&6~dqvV4}CqAHm_*DaJOZcJD>hX`*?d-3S2D#B3w1c1RH=7!mnYlCLvig$h z`WjaaE~zQ0v9skB^-LVqqg*bRlH}%kTyY%lu*W%%%R_mps|&TQ%|_w7>iUXumMNTR zP`4DK&}a;H9sCy~vP7ngd$}84^>mMF;C*4!a--a+M%a-=zCdKEK&J9Rt%-DD5G|~t z4e+jyh*?V7CKR6-SJBakiWXz9UquUQ@IlU9lv&WtfnU%Sg`Cp$fWtTYd9qw4XqqJl}b>`)!>-su#GH1_SQZ{SzoM3Ry=2`se z?&i$oE%u7(O`)2>#T|j^e!foPoo?@;~e`Y`#wvD*g=+>s&FYv zP`AQ-b%V+$idImvDg6o>0ktKcqk?c99kTLDk?QZ#%Q1LY^(FfSiV33mN7$`8GG`GH zSP7U_Z$90Y*|4tZv0D!M>IY^#(zLE2lPcW)j}I<(pHGHA+x*dg{idhu_0h{jE-zjn@XYeYxzWnk@c-Nr%g8h=*fct&-D7k~654>c-D1XF3Q>M&W>zW>!F%Zu@aO-1D z>t?|nzIS=_^{$@Z{MSdDKf|w|@4ooKKZO2Mn;E@Gk5nj=H#j*A!&mK49Z=B{l~aU8 zBIrac9O}j(DIW+k;4`s0u^jt#z9>smM}qiPYx2g=pFDZ*>AxrU+VP&CNT1w1cs?U} zCV~5tM-#X%i&GNgtsdZkCAS}@_8i8$liLm_hoOgX?O5^)ycKjfvLYED^lwx_fJ$e?y9)rtqufuW|yA5sl zGJfkJKpgO)fH%-rAS1=RZHM82;h2He7_jW{IrJu?iqLe_hc*Caf-a5#-azbKYN2nz z?(UCC$D(P?H32eiyB1#zUJETS0<1=TD9E?t2}}_+D+LI{Ar?coV<8QWl!im4k-`F^ z^r#QV-hYa6vm+Dwh(e3UKVl@Hk944|vv`AJ5CymZ)(^S>40wmsV#^R~0VNzGV-}Pz zkwHp@VfSOIxJX-tK|5eA3lbjz?U7O>DvFUr);&4A>BLZ3WbldWH=o=vZA|B=Svh<5 zsu@n_j8(H|udH!U?>&+H?-%;}U%=)kp1|hw{Y%d$|NF_q@83SF;n3(Ehu=Tc&~WH| zGOtg<7+--gaxkxMUW?UOr9x}Y;&Dt}k1@g=c3~bfBCJ%4kQuhiuZb9U6T_;Bz`VM> z8nQ7}v>N+ygnH#9rfICP+8l;M2|Rq5J{VlQXud~lHQTgXx8b0$mI+YSm%)yCQ3dbv zN(OBr^qnPq+z;MgaaBLJX5loU`?>`1)6dHwD58z@@nrZba z_E*u9x)earF<(c2HVCf}AoS7CnI#y_GzfxecZZeH##a2IH;gHrz>kuyG9 zZ;B%%Eq9gUW-Zo=Ttkn#@m4pMyRpRWHiUAAip7Q@tqK-70;>#TNKG9X3J7^P6eQMy z*p0OE0o;+2&ny5Wu=uGp58~p9mtaRG21=jLpR=LG8UK3#682dc=M`ij6CApv`NHC~Oo7z0dm3j*XN zrjLoeeR8auy5qUGliO&9#Zs}sVwESC;y6K__tC$*Qh@tPw{7hA=nD(;xv^uZHQEJh z^iPoC9O$Jv%R0z{UB~{+% zFmyc({tTE)hU^EEAs_UKiV;U9_lJNkrYIC*Qr8)Z7yjVo-MV~$(cXE%uDyM=IeJ>5 zFKg^A?>)MzylTS({e35PG~jnDmc;$;nk6;0gH1u-%r(@y6>pw>VCxKu6|;9K6v3ud zyAHKv#`3J?>whw^eq?xh)3LuNpAF0zo?AEAloOhjD5%{%Pz&S4s0-vv1nVY3!6!tp zqb~Cr7(QktoCH%WwSi7)9Kn>qb^)kGlr6XzU>h*DP?0ee{n*&sEcf8Suik}O8Gw01 zpl|(X8fpOiH^<=(uTjgg<7+Ok7g_3Q5K>m%$Ti~ZHI}M1Z+r2ty!Z@}56(L~%C!~zixlvqr)c#MR$=TB3DAaUv@EK){q}V=*>#E1X@hf%Yqvh! zzvj`6m3fWJrxkX_e6{Ouf5kO@dAx3AJkvk3Kjyf82R06D_g2k~6m-NguV;UqJMa7R zYFEvjk>PCY-86II?aNC`mi}&CC7z!Q%NbueW+(X;FKecd9Dx zqT2_SmbLK_v$3eAy=3VCkwr4649H>tRf2_ZloF>Px_GA|s#BaSSsdiut)j6!#qO53)*qX=6eV z#MsbJ`j;Nx(U9~_2;%fLf*{g$E8cqkCtE-eMeN;5MbD3ZyE(HeKP86G5Hb7*uFr0t zDuyd-Qu|H77EHfjr>po3F0dnTAaE?eIAn+Q--Z{woZt)ZT^25dF=Bv#0A)lzAcV%X zFoWo}3A027{Vmw>zrhgtpugCyQn`!$z9K?-6v3JhoOTfUs{mcFh!1g|9i9W8V;;tI z_?+@hCDo_gprl%qc)AjoE3s1>KI zX}$sRG}n$~D(0g;zIGI3|M#O$0L#7y`@e}VKoVX8NCRU~04yyUM8k+GAO?p4Thl_X zrFYP80)xZGXh2_BL?gkLt^>axV6L{TG8BnmGrT6?(Fe%DAjJ%IrH{UQ%uQYT1V7Y| zM~@{BC7*}+>&IspivBZb3^7{BSBfbq4~<_&_-6<|jPQ*JZ$o%F!Ziq&K@XV6ootAu zh5`1m97~B<7Cn9$15z4m z8apeD9b5&L!I(yaqG1tLKs*QPe;+%T*>8+pCnbZ&{nYzoE^=GEAJ^bj$CA$ebO=D16w*0`hZX@0s;fO z2x=JKB9xh+&88wSf>#9^PpqQt_*?Q~QTuDc1=E8DM_EVdoWpgo*5?*=gKn~ww3HOp zWT}33tq9?JePs((rjr zg5k-)Vr_;tgD;sw^D|}wEq6Y4UU2byx_i30^FTG77og2~u(#{Zga01_$__r@&I?#B zrSY!ObS7GbGXUXD>N>B7NqMwH`9%!``e%ajFVdD|M#79yEAT8;R16D7oD%KS`N|p> zP0O2Eq6$vBX4bcIGH@r)qLk4ZmZlp z*ca^l_Zyx{et4mO)4Mo&{%$NfJ8;FYv?JOSwlb=W**OhMV;R)DRomAsiU%CwYOk-R zFr#$o4?34Vblr4`N+U@oj5ba}7f;VtS1!M$=?Cu}X&!tf`RV<4{N-4i%IwtKnw8ss z0b8DW4$t`Hrlmhm{w?|T?#+2~hGvXOl+${)kTroH<~{Z~*mZjJeLhpf)={`lk10Jp zQv&1;mVnc*q5c?k}XcLTt|LGW8Y6 zy@{=6EH`6`*^Dw1Caos{*GoeT&?%#3nENOA@ZH=$39bMMTQ^GdihE+88PeDSDBct{ z4Ppy+Phh)O@ctbacGdVBR?nERwX3Ll+sVPC?aXKG>+76HAH0NR^ES_Pxo3TEAv@xq zd+V~QRc%qJOp({TrL*sz)zjz(NBR8nEz8I57`vPuUs_q(Q|lv3h|zYy1V=!Jcu^y- zKcT`Z-e8WaROT}-oN%oLP(ZLYSv8l-cfyC+1?K)tH|n)BvR<8zC=~dQjEgJ{SWXyM z5bO-TiUUeuClH8m)d@BdHkP)mJie{AeACHI^N%(8n>rSiKXCb;#<>rFb^odto10>X zSbyu0|JZx$yN8>+ru)?!uaLJVm&@u~W z!TK&ypIMg>)A|H(+JZ|?`UelPn<)|Fb!%YGv6j%mL9B_m5OXCllKj_0$sgi7?)>6- z?@Kk;pWgV|{UE~%>d((6|NOHac4Xnx$JH3N53w*r8n{0ZC=4?ezyha3cpW$W z{Fs#b*VqNhH1=nzk{x*sZnJ?Pxnl?PJuK}6~Af~ynd9l@X?(50uA<#Bn~xj@mM8|;N(mbF*I zWcG@Q;0QMpCIOa4VDw>m+?q*YA!?Hd3rNo-sEoqwsPG@7Yd766>z=z*5~iEerR#@v}zQET#Tni-2|Gjy^Q7qIF0W7v3MMQ+ah8iz?mgzJWX zzq^x_$XN_gJuJx5+rnB_@?I(KLQ9qBl=!R?yOdbMbg_L3x>rq>SRxbAY)Uaj1iCoA zsfm2SZUzbkh&e*gX(G(qXU6)^oS}}Mp&l9Q1d8+swVd3m4TQH(xYt}hORfwmscp); zl#eRERx<5M98{JoXDMl?5>pbntc#^y1NZ#&_!m5<l?V%^=p8sJ^jJ9(#<^KK59DfH@NA?+<*vJV0b=Jf7T!hoB!i z6yh^38F0d|)Ehl#&kv_OVVFB%#BsE~H;1Jm(jTxS% zJm0L!te=&yxGlL&DL*Ea>%#M<1BuqN;^@-Ct9ypIAMU9@eS99J=vQC`lF2!_9Ka!m zC^7s(s*@@lB8+8f=9LsD=i(46y38x!1tno&2@ngzPleZl!HFdx)&Y76zayG5P)KAs zuvl1*D2`y}{)@@Vo6nrtieG$d>(@d<$#rMpm{ z2>6j+GT$%_M3o4a24(?v>?>!;2L0;tgDDvjgLe;*v(~E4znX)&to3?s)62IbYo@x%&)1` zI9U~p(}Y_2fGQhcz5(`&<)j7gw_pS+nDWE2($X$jpV>%pB6FOQ(@N5NcqzzjI2?>Y zC(}I_`b`3P!g^ipZ3IWM{NP_}BCP6nV}+@nzD@lKEIZ*Pk27axH&*|k>3 zxN2*wth9@W8AYrbyDrVv7V$0zn1;Z)06rVQYXdkG=nYW%1+xBZZ?BrM^s*^+lv-Lc zhbcqxzjC+iy4GVK-}U29?`dqj_wtYL`s=aQuiq+K`IGhQf3h+h9(Z8=`Uh7PQSUvR z{L9O$zSb4x0 zX$~?gh?z+A`ZtaoK7$8teBt_vv8J8fH!sM4;P^2{w&0u0`_`o7(guWOYk1CvG51jKC(qU+H~FIv9_YHh)kkQ!W7NX22iTF<-m1mfZ&cm*GPZEMBziXNqB(7)VzY`2X?-MaP?$+3Z#et>VVJ|ST6@nzcypX z>7l{%)tpw-gP%R>W11E>|Gebdlua}5cPVV z@lZHKD zLkV9D8;4{dz=#DaN^CjUP&c*iXUQHqne1h?52T^aX@ENKv8O==g6TANVL|&1T25Ff z0HCTad7oL+E7B|96-KWilK{^tf0LN{l*b9F0+oao0B|hW z*U4XGYIF`7vIC+zm?Ii;O|twVu?mZcKbro84!rb%+HTQ_7zL-3zxqn@;B!fpRj!cB zpvfs9wh&&6cA#@0k7Ty=g!BZU zrBo{kqd2T&Bw`WF#8Vv9B}@XPlfF2itN?VuOqt<* zV#6VXDkYvcK(55a+t_aE3Uy}eEh;nyX>&wR!4DIK{!7ph zZ}}8Ls=QPYmpm%QN5q&HW1AR*%m9IKnGcD@EZ#xkW(wOVOmP&2n6Az29`+8FZcej$ zVzvaXi5aapCDqz8Azn{haOgU0ZoqEBGrS_+D(NX`UkG{HaQmytnx7?Weo0-V{%~d2 z*eg_s-jnJhz;>F*vlJ|#OA2Op88}%nd9(aJIU|uU4DOeSnO?{NkfaPsaA?nX? zEgHF>p#V!_7J)Bu`4kN@0UJI9nH+$XBc%i@>tKJsM!B9Gdw(MCjZq}|9DYscOD3S=%#T3 zgtU+w#A1Ag5>g9Qfq5rm&b(He!PDB>NX#dEsR!)m-AoU#BVp9e zYd9RJ_jVMiiw27*Q9)c`Jw5&=?@(&uRu1IVYE^VbEXEdlP8-MBxy%+yY=lfwh@OrD zi3LHWupkb>AU6sU0Z@PdUjJ2Iu_Oj0tdL#=Vc`_}s_}t&sKmg)8uUim;*+>*ni8=Y z8#)R;{OGhj+FCSce+}O-($|0cqJm_oa(QELf7f+0U5k$7Rp;pat=rlkI4Kd&sO^fH z3p{(XO9S)wEgs{Bvu1B>ULs~(Wv!8govCpi2YVKStQRM6UZqdx#o4)qg{Q|)^CsC$ z1Lv#PwioD-){xVpmKj|wHqafhFu@2U;f9POm@;i?B8ylzzgJAWV7*8zBGF3IRTmy` zLl$*!&(YNt6|0Z-lqY7Fn-sp?BNY5dOw(p3EG;XmZMN!Ft$T)tnV$Im-`sNR+XrS^ zgVS6$Ln6&h-eymBmbW6;az|wO-7A*fvoiW0zk2g^7z8GfR(cTD$_W`vX1mhe>mGJf zoEsbCY^O`tD&s7z$e5N_;=~E8K6^EIm8Kd1!hlgGtQDr(mY%$!p>g+<{Y|%rOH8E| z)2#O#IC!XaS4qh>eQfXBhwpiJZ;)%1&g3d0^Ul^<%C@Lu8Kt}=42wi5C&gJ<0Uc?O-l+9 zi*|{ze0HLu`{`vuF5N42uBnUT1J(XB9vMXM*<)=%R+`FcH ztdYI*j>z2BMt}C~*13uGyXE*JTiHB)!w>##&rQF*y{UfZ(`(k9ACc+o+WU1GS_%E` z-M3F)*ODv90c>6Bvd9V9y=(cVH|&?}RKmW)e!zar&N%Hcd$XOk*@n5Z9K~@~svd}9 zHA`YbCs+o8L6BSQg6Px%`WOwy5VOr}grL&*}45V)Xf0(-SLi zZTFf~x5_oTf;rVd5`n2fzNDVM0&5Th!l(!Rl(#6u?40_jHk632jqZrj(NHwxkK5Ty}-`fX}dk)OyIMLO9|>&0`qnfrFTI9!Pd&+NB5*>R3otC7J?U3i`@R8Cjv; zTe^+8DvjHwF$YSWC%bpFW>(+$>zh{|8z}bI%njv~@H2vt%NSn#!$rPkTaveyFN&Xe z)*S7sKIv_Y1s1Ma(zk3y-;x2QVV%vpdPd{Uo?=YPEUs*`&8-!MT2@!iyJb;PPRqLb ziUl<}a$Q?-<@_9NtThptf6L+`x%6ww$LHIIR^?A~E4bX*bYf`j;O1=`wt(I_PW%zr z>lPFSudCkDVdHEG+YTG8R>|t+?N*LyF>nfiW*|Oc5TFU#L(rL!Fe9X(5PBw}NhNj| z4H|mkinXn4YV9K<5}9R2+rolpo~16_a6?i5p~ToLpa;VTg0UR^*?$|0r1Z@(P=FiR z4?t=($j9qBol8e4P(6d&wc}%23xU^faw#_n785ol3R1 z1-2bX4uly~&b|e5Dlh9(*gIAU11`-f@4`u&4oBhtd!Qv+SPIdC>Z!u z*au>Pi4)TznSlnO7t-~?0}`y-1i~dbM8K(mXak8zvIA3$B8@xuVBf{$Ab#wvw=Wpdfd(cA|d)j@C{STK~@0T z5nBc_CE5dfKNl_KV_sSPvvPb`{-B)NE62;^o8^>TZbbFD9r>5@@f-QrmEV$2sq?YK zVyHK^=XtGGM%|L_ar=jU&^FH2&bIX4kL@dOdm(1Jbl#_pdKSf=3+5!ZP= zq3LsxwUG^3=XKG<5Ap@ZYQruwL zgHPv!umWO8X)7kLcKefq%Z?A%*M9FQ;du=0J$iHxc~JhnZy#)IJoxtBn=T%lHS6HT zo8G#3@vXPscq6rkk0{#5JMIxQq(Kk3tcT5Bd1a;Y{3?m;1~d4 zWK~lD3^K(?&xo1zPZFr9nMp3VZ+&UjuJUR{ef70}|il&OEcC>#I05w*aB(~c|h$c`FZ1hcd zbH_ygZF!a6Ltp*^zpa~Qb9o3jn%H}^1jb-Q)x7*k`URRIXn+!eYc{bQf@|gibBlS0 znbs)lRqZCVT4~Udz(wqQdMshW8bI6(RK_F-5Hvhl5o)hJZZ&6=TVu;-1x5}TC6==K zc}x$>KE#S6y}Mh-PBIOz1zRe8pa;4DhY^`5MsYsRr>j2ziQyEQ8;wPKqxVH=E{b#O zrT5uS*eQEwNmigmm&t46LUIdqX)24CN=p47X}s99)khPn z?=E*Xw$IP4UQlM$7cY#*hgyQszFRsM{U{!*-do?l*i_n67hl(sgY#Opv}Suf4-z8J z7%0zlh6}?^XGO!J+Pc*ZSvj8D?7`Y9Z&7~8?x>iRAbQR{{t>mGt%DkvmM1KN#tuPR zC(Alx&SocNveoOI+K`sgA`ZYZcal5Dy~&A0oK@Rw)I)Mh3#dV2ZPI}3WrECvxrcEI za+HcaQ^f$vVmM+T@k_lCf2GaK@k*811x1#sf!Vot-PN=L>ysbFb}}Lfy-%c&vaY7x zOR4RxO@F$wcWl?9#e!Ts0GM9FG$1R{SJ+f5K>p!+DV74|ok9nMB`G~mtOJN|X)t(R$VP%#5-8jhkpaJB3opuZt>;aL=d{wkoSAtaGPfBleJ z03;y1bUDczPANymA+UtJ6y8vq+x^%NOdW3kGrqu#%`SNMOU-7fpVs8%r^ia3c__h% z=X3e_g|5PaLP}i-89y2_M589p2@@mLsQng^rQfp2LR%0AhF2i0VuZL#GkvKsN76iB z5JIYx6rb=4W`U%ZWcj_{Aoo&pLTJA9?9i7VViXFfeNl^4O1VY;*u~_BXLrf;S`{S7 zY79o%Q!hRyH5s)K#Z_tz^1Wx0f25Xc^TI`zX;ral;Q`;+-7uG@^HwZdV7Ja))a#~J zj@|BA*gFsYTE03&#RXx&z)2wbU6@-r`hfq@UMap2V#5sn7lS`wa0!DM_P>Dyr$2;M zkV+~11%>}Y;WD7i@JeF-#Rp%S96WFA4rYf=eLe zE#{4uxR_M&trTw)Q**_5nz#XCA`k>AP*Mk!CzX^MYFFNo;a6q&j0{VGzAKl{l2h4o z%*qV_R>TlfCQygK4e%hkfkf?I2bDUIph!Fl$S)w|s+lBRksnTY4-&UwjZ&`*3s84A zuyi&7fDsIKBzfN(Uw!pP@*ceLjpS#^zrR66C{wZ@A07Mi*st-aWSUo$fe&>F=E#Hk zMm(HEf>b1Toa_)GhujWQqvq7=%(IzqWPX}Sw`5{lsOjAa6=Z2P| zMw@~67St8_)Mc>jcV*9;*XFf!F6tp$$ltoNt57aF$cjz5eBcLjoVqz`$c50s@n&6)WOmrkPb! z9K50=%^0@DC`4xA0ZRoHnP9?*A%kZa8>!GpbC<3e^8<*iiT zNOcwo@B0zY`>ZVcFPHysUY+`%90Y{D>d)WdKt=Tu&pTlK-0}tcxhTbC0%aiNM}@lo z6p8#T{9hr^nWRz%65b*H3xKp3;!Q$n0Vs3>VUit*Lk8+p*GHS&%%nVw+x}zYemZF%(hh zsbK-1=kZS97YkUPUCZuZ8R`*22k<}#oTFJWP1#_KM2aDnCj{&Oi{L*WJ}lCwQgeVS zrH;VZ17z&V%*=m@(ci59)YAv-*v?o#|I{*Wq27T?S&J(3bMKeR&s8b%R6yat6qAD1 zKc&YG-chue&^;u^2IZ_UWN?rT5MLwFBXKe0X^xU{PK@7Oov50e0bD+C9gJ?<3fw5n z!|lR6B%dmZ=jmZ4l1~aVaafoMkf(mC0gzG`=#vUwX(!b?9PD7z<1!~vtVT`;)w}HD z86;K^Q#LCHmHU+jIAqmtFRb1nC;VwD#CH~xZ&hbr~t&5$YG1AdeV zRX>Xm>`qdpUH}#9CoShJfIE)@pCp8X)UXXy2&&W(2@r^ZFYf8SrdS=es#iA=ic16e zx1m=3Q=J!T)$_f2y*D3f)xA3Efv-Pg<5$yC(}~+KZ_fcXI|H#*AL`j${QgUiPMp5y@;rYOUHm#8RxFaujN9T+OOkniGR1iV=twN>w`(E-58k+{+{> z46saLK_-a0w>ZU=TrFk>#d4LVq&KZ;+&g#Q;-cb3H+Sxh-;`erL}j@pO;rBILB0CA za@Ouw_TKu-n`S8=dw_PE1itbYzxlTdKJ$-=KA8^ug_nH|WuuL}MyVO}s?|<4C3eDZ z3vD9m609;jzseUtq|9W>;c~Fs!pl{0izUFhH!4G(Vb6Zg3C~&2CC|7=?C~Ju28&rd zwZH<#0^t=U!fJ#}>8cuh^|t+NW+D%^NW z@+Ol%VxvB~;?A8}?0EU*@)bX1U(53zum^4BJvGU9_Fk--&j+~&kIDnn7uCMK7f+wN zw>Jzjv<=(okC>Z5)}!b~-mBVg#71NOqXxCYEONLZFtHYDzo6!$3^T6GM|QyMkUdZc z0ab)bO*@>}sSueRY7rw197i^6Q?ilcZ%99tQU|2Rq$j1cQyPO~3TQ}w0OK(FvJm1Y z#r-dLr^Zj-BU8`~7VmH8gz71>*ZollsuIF5Y7*Epk=P{mlN5<8zt|tm$5DYFOM!Kg z4R^-t+_`u-w5p_i$oo{z@Q#^M(_{PVYWn_1th|eoaV?C!<;C%{Avd4G@Ee5!kP>aIJFBs-qAn2N#@+as`=pz4FFdyppIwOS z>X4~UA`t?3f+tPtRxU#%3oXcMWIIirjR8g(ZOJzfQkwWbgq(%@g2(YPgr|XzCIw6o z-+*EYcnvcpR3-vEft>`>^bK};B4tC^b(733IYR;TcRI;~$e%M^p9#6ud#!oXoU=Lv zC-9zlY4t7jOZv@az2F2k<(LCS88aHoSKXCZmGT>dGrFTDO{AxB=1@!EXhv*l+^Y|` z4c6*aZI)1$i4H{O@OkR2_3f35r~9&9H)r_E%WV0%A$vx3)1s=Pj_HM5*6ODE!P(hv z*S%^;-ZaD3x98;*I-ONbee->>X;Ft-nVDbe$>Ph)91>^VwA7kLSOq=6z61E!Lh4}^ zH=Y3bJSV!5BpULl1Oc(4#HR^yWg3#bU6E5b!{Yc6ETUNfNAoN9y95HbuK_8aaqz>+&TXTrJGxx>lA`_8=j zSIb*p%_Ef@A>i*o{73}@EH{QV|2-rX3-7@O;eZXAOu{CHSV*i*7itNS37o1bWMJh< zg#&Y~lA*U%_V8|dWpAyiq^#6pE-bF__8(aqDOqu6U+nwkRYi-gJYTYC-%rmiUw-bV z`xccNnzpnB#R`pdmt3n7*If6=vZX)WQtP%4TQic+ zUvrVYi;IgF?}dv`y$W1SSX(rm=BOI>6^KveLA_BZ53Aotb`DsOkUHcHHHU^nCqiuI z`*M>h0cjBVd4E)^t=RuZBs!=2qxJVNd)EJaakLDKEbwQbn@L_PiD8q7ECvA=@^gxW zfGn`{q)eA+XQ1VUKrGo9l^B1Rq4kGKy}@{q{nq_UDoYExatb0FE4FWbEiyfz4d-~K z1ue9_&FHl#t&!Q0{#7jFjF;Jseb&r@>yp!O8Lf2$?U6h~Zgp-U=!iBnMBhfg0x@zW zO7I$rQT|od!XyCK$QYSO4(U4QcsGQZrE0bMKJ^JTE&A(y0RBaYR*GdZ7E&S=EL2^< zu`)u=PWs>!p^yqj_i_+01(6rf1;kUqCR!)PzSL72F5dXbCmWN$#CPD54Ih8JA^8g` zfbVTeK9GE%3EyR#dDtOC2b%3Gu{bxRA_vB;v@mB2=*wLo~wID0xLZY zHK7;y?GG4F8DBKgr%W%JD3cMl8B`{f@g;*wZ!oA-Qe=6_Dt(DDFc7vw)^pZ3t+aSQ zgRP8}an87a^6YkKBKuSKpO7R3?-DCkwucLH==>fw$i0ZcVs(Qg z5dt=d$vG@yNX-S!>ZM8G5Jc>M+mqCX@deFu=GWmL+VaYrwZ*0!w@mib>%I5<`JsEi z{_E5oIQ8|nb|a1AGaAZw7R)NkR<4=rt1s1Pa3Xn(-hM;!!vDuU#N%Iy^o&S!47A+; zkek7XzW$6+i1b2k2H-EclOiPoyS3h3gQCQfqzS=*d$l)lO;m@uwb~=meK@KUXZbYyT$w?h*G>vz2 z8b}kBz;PW4jHjO1p?q91<) zDXH)ij^+xci^$>LQBvPGsnAk^0X240tH)k^t1O{S?%0B|aA(JY=snOsCsjr*hyE2n?P>*$z_%4SMj@~@Dhl_8 z_lK!4>8RhY;w`SYinjbQwaa>~m#nPBYSm_WKgj*P_5-#2l8^*33W|$(*Qu_Pj+5d< z*kDlV1(OGE|5Si-btA!v3b3OlGG~O7BdDg<8T004&$+&{Zd+OG`^AgrtxPOlys%yG z4cW>UMP}G5r#I%#SW)L?K5QB=n+BRH`s%YS&fT7@IWrq-*ERF5kV6&lB}_i6+EXzH zWB`9NuLz1}cBmD-aFzolhAtMBGhy-6@BF{g9`vwNv*3@Q<+-8#%99R|~fFn&PJTij!OcKhs&1uii$;$S2 z<06rifHSXd+3NZvgzC{x4pCl5E7Y> z+o(7`NRxP6gtu~r)R{2M|NTIi4}?$4*%T?@$4gTcMI4tr(UNs z3~Tp8s6i`IejnLHR_!ftn2joKFz95rLV@yVh@?9JT7dBq%7$!fK_;(~e1Z)6qY*eF z03W&aurW`qF68Q4tobot*VZzE29l$TR*C3UG#xB7d#yU@$svg~0B99JJk@~DfS;v; zY$10uHw+XR**Pi|HHwDm{WR1L7eEM#dQU0+0R1Oy@LvmTY6SL&P;MuL#Q^20ppqGm zZrO8?R7j|@j6HCq)0IOMW2vYpHEOaoGZSRKDO3aa>1M!BB>tKIr1gEl&DBexD#Z=) zPyPEcm+6n}Ir|^gDXawJ{w>kCL1MbEAx)-6KdGAovsX`#4KxJ6*w=PYLRoEzqcLi2B!GiT?#{QP-4=WO1(b@P^?A=00Az&?l5*r$^Zr2&rkuM8!F zf3#exLImuPx_BCcF?YY)US+O}-?!BnF9xkaNvtp>5zk_|aQb0~{zR zM*l`#8GD9AkZHAZHL(p;{uR98abQXM32pW`{TEuJAa`TbCD&XIz1*fo(D)SC;oh~zxJ zP7rsh-dylEQ~5z8-OeNwgb3~lSgA^UEj35AaQ^Cvdh=-e;Te1zsW)F%YAbFP>dnu5 zo44~#ThQ7#S#KWk{xD}+B(ZPa^yNamc~xytheOvW23QyPVcruDX|qtKFo(6{KgVsz zaEdapSwN!SeadEF#6BD>M)2QvwBLkoh4wmL4hKkOQL7BL$=1SJU3>it$T{`;`y=vka?c zCuEd_gM@ZSLs!AcL?lapG!!EU9HB9Z)?(KVCd=dCYfkh&b3n&`( zVuHGa2dRM*b&g4rzF(f9_S3IHp{;T3FI45%8}V(cgxc_Q#VvlBf^9_RXFJ$ysFH^H z$r5DZI(!8_kdNTzoJ_Mz5C);0P@XHuPIg*$o)Oj-)KnHgwHHolfg775JJ+_~gkq_+ z#b)?IgF-vm`a7?GVf=4XuRnGQCiUCbC$f<0&%pTSDOmt1Ai{$cu2lOevQPz!*e8qu zG!*Eg+T_zy@A1&JV}N)RwQoxMP2*dpjA7tgW6(hRho`i^Fs@FwCzC`?9f2@r9@=jf zlQE?FGcd0F#u(7GV*q^>f;~)1!#S{T-TXAGRi#-VS>|@Ru{onl#c^2G=dy9VW;h=M zz!e%qC(tD{PAE1~7Y#SeNT{@?1UckZaQH%`C>mfqNopf(fq=q5nF00yI8Tbn4e82q zuMdzY)Z7tCi?~qeNUEefU0;5LkqEWrxFvZ~s4u5|)Im~Oeo`nir*A`L3lW&yNcio^Rn|NTYu;EFQDV;_T&a^!IY~{X;1o}>d#APd#XPLLVqrw zDvV9u;c9=#TBq6%phuv+pt}kM-IZODYCj>bU|ny;dGveCHt@~%Jc~q7#R4a}K{dPhMC?UBs~!$ro_dvgFz1xqe)kd}$e|6Y!$9kDr1%%PWv$LRNesKP}+QaWFRj z0&J117!7l6)unBLra$ZqIC7XgPcKxS$r-8D$mI1Zr0Fs~V1~(orb3n>sA;GVPQ3v* zPHZGXOYANQG6*M?q=OLPTbRX$RKM=S_>L>>84f6j6=X)JtrKJz@=gUAt`uZOP>FyuPC3l-t5Od#b8Y)g=%tiQ z;!M~hakFEx^><$X0@^zD`eUbD-*tT=ld1j;2r>*ZNnv8(F*fI>?l2*f&?op|qQV#| zgfVPFh3P(l+QmzxYB4oo1)yo_C<{{i%R%~V1@AUNeXadIJLc?IBPSKV8JdL1mXM3! zpEzCcD}+mk`AcO0lC@2vbD`olVj#STuyO+1G*R<==HxePerXk{`AuE`KjCW8@BW*J zPeN)!HbTr|7Fx~6#dY~Wg{Un~O9(P(c%?4hn?DFir=VX$h?ZFL5z4pF)tU90xwE<1 zuG!~i(|mn(dt;`!m^My>^Mk7m&K9c?(zQs{h>%SV{Uv<{35B{kAa=mthtUWQ25{04 z{D-)=;A@a1imA~Ea4ngXzptW5R$ZC{qW>O@&58w8{<^Z#d6n+crMGr0y1Cd|9dGoN zwicN+!FXw`qu3azidO~P6>}nm^J@Gfv2}MZTzF(v1@lqa{FxQe5{JR+wA$R9s(_B9>wn$EoLqiuhcByfgCJi*&5+0q z@&b`njR@5q0z%{z710p%_`#{Nh{7|u_8=?Qw%-IeilE0Bd|`K_Y_j!tw7u|OskY?e zk@6{5o@mSd;c7o#`gy8tIkXK_G<^HI>^oQ64#4s8g4V6j0AdV9>Qk*#51R3OOic}HjAz;Bwl%l)w%yl8m&K6~ zzt4w~Dmc2dvlZ2FHB^lQf;AlQZ|YgjS(JFzERDOOC6rrajLTX~dJQCAk_13<8Uo>4 zL6n4J!Be$XgxF7=E&?Bt`UD8KUQ=gf6k@N})TI!wJ{1Rurid1y3JcEl6RK5uzFB^? z>%m~;!DWk%t|}W#u(#b-*wNJB^UrGTDBSmpDoZ zjke>+H%qiSx;whBt!`OARMJuIgmSL7g`JbNThx+t%?m6Cq}`IzCs>X;LET)3Ye204 zJPl>JSmh)<70P-9(FBY$DVo`WXg&+zfK`7(G_P&HsVyZ!Mev19+4+;LNgqhb(3ERm z0FfYKOfF6rOu6zzTS44X{TTRv&I}SU8>X_30Lh@(n_T&agG3Dw) z`%j>~Pw0QDKLbL0!6YbQxNyuIq>quAoG=NX;s-&U=83%ELYJWpe4REM&K-0aElh^T zoZ<0gnBhd1x#fKMo8|A9)9P|83PTS1G?&F|WK3?iJ+B3_NKUBFLi}8fO=^?cj-%<^ zbRil6L30v~ApiRSVh=!_a(_zsL*y+0tk)7ZkZS;i$}Gt)@JFGjn`9mc2jowlwo{Ty zW?z0{xIVV&p%wi{gA(VKu5r}xPIgDAuKK$2KQvsZ9c<2h&QrH$*6h`FZg>6aMo4q7 z{K0=7X|5T5cJt=b*H@QUFq_V`-no8PM%22^70=zbZe#V`KYQB_cn{=8isjoZ5l zUm-3QtQnN`6Lr-p$mUC5rdG@iD?oV)js?0Lj&Rz1>5 z_tR81T?*u&kgu2xxqgW8KMu$NM8s~#(#MgGlmjJnCw1p^Z|cNiIJWC?OoDRczypw( zkPuOU<5++)18#=!sU$RUO=M-l%BDORII|n9KpE_S(N8o^d$& zi8tg>DjgxOJLFWUoFVGnYur=K+>e>_f489|z)(U-JDD-`kDs}x3Tz{E2fF>;oC4@Ujd9e$e zno`~wip%CkxCr*UoCX_&I>L5jv*orJ)JAs;9Lp(S#L42~aJsk!!YflleN*aOG-BU3 zt6Oj%DJ7n$XGvEX3#Be266~B>HA0U}t!SB2>motPl0%wn3S7vV3F8*#DmCtbCj{F? zVWLd1+#5}eJUvrFnG^1VP|vmHU#X;f!7+o(a)|zt@-JuyrVdk3!hRG=*!9TAYt$-+ zLFXilY6ntFdLDM7CO| z*Oo)sM0xED3-gX=Q8c>9V-vzr9-+#e!mHt4g`zz|D1hhl^t%Wl(hMc@5JKNoKr%sZ zAqjyob+ft`EYP@GEK=KVQM0{}(Fi>m{S7D)@PkKxGjZThDtRImmlX@)##Bi)UFfZB zFATSpyIkdM;ez%m@5Wdm9Pe0K7-OG|w3gcKrOlB@GdxF|i;62Mis6x-i?`T6fMZn3 zd)1~8MEwz*^g@ZHMs~l7hxAkx4{6*gm2N)-%D`!dtU3}F2|}SID7asduq#A{Ng4l~7xFOmprGTMf1iCP!JW^o`sPfRd&=aAjS&SC(s1mV_-Ow_b z4Eq?hkd-%^=NpHQ#79PsNtrP8-l!4728U^<1_w@USRvojvNc3+x)6&}%Gn6_gmPL5 zQOw)OqNIDn$L#L`6T44?$bvAZLEIZDPlE_KJb2PwEAO5>ZS7h~IQZa+tDYB;@W*P2 z%?pse#~`g3r%xPV^FfZC5POVRTsyn8Dw*1V8m)cQEJwOuB`&z*=Fw)v8-hTF>~?9Tiy~ zS;sa$rz1nif?q=S7&7|E$U^8q^S1s{RO98^HGi^YLmo(XKx}b=8>NT*R7y@C(hL@? zu~D`9t#P2pQ1MguoHMp_wu?4xm+g@4C7TB8RfjFj=H|<1xoXBdFpv|O53}U|=E!rA z7dcvj?5IH*Ijwx8n306BJFF!-HqW0Ql)}i5UoGTpo{=ND!C{b!T+UOE?W=p{DR}Sd z9!HlbVyp6*xF#n%on<>S5Fc4CSj?6)D7T#E ziePcMu=CYaG#?pL+x!!sO}Y3lQ`UPXdh6)Sx#QF4y5r;BbJNGq%^a<^&#cNEK0LE( zrl%@%)`WNYA?O}OO0cLjPq7h5y7&T z2SSbs3m>L=+~fMGABS_$+mP}h4^aEdiSa=nhbX4c`bCA?+2&LW3lCSLJ_|jovh>B+ zRbja zHT%0aTgNR*504)=HtHhTLG(hrBNnspp@@c2)r1}IH>%%_tRFvXDF`1H zzCOG&VnM{Vi1+%B?!UT!OaDue6C>9~?uv?yx;g5l=mF6yqQ8jo#x%zKWJ!qG9NF?er>5;wBXeoNw+#NUro z$BiAgW!&y@$H%=l?h99#%i)^rDsbKGddv0iuFj;vNhwL5q>7}Mlk<`*lJ893oV+jj z#gvGYJt-$s-b-muRa1whj!*q0wIeMuZFJhyw8FHT(^jW7rX5PpNiR!ZmcB84M|w;8 zKabBDe|-FV(6Ms7K#fcr0 zk|xcb^spf*q*ZlnO?-LOw6BGoUNP6JL$2>I%mwR9J9l(iL!0<~L9iVa9btL-efdq5 zPxRF>;vsF4SQp|F8KzqC6LX`e33*&BQBI3>YON>))3GlEHIT?uAHpu4WiFS56}yGt zSx^C_pCMokxDAwnnE?*u?)(m@d2X60zi(;h-_mD3zTL?Es)BY(bvH|dKkMQ@yq9(+;rmuwV)(DYJssGMyqpqE zA=61e@=6}n6xP@arp~lLf>DLrx3oINtW;=1L4Xsz&9YWbI-Aw+|1sz7ImL*#Q za$^oze_N^7@5CHN-LBE{{8adqboFKR-_)}Xt7DjBq+^t0sw0OHpb!qRquXtuBi1n- zw<*%i_gmkKz6Zbi?z{8f{o&lfbNaah=k}l5dTx^u0|9^Na_5$MONxsM=M-Fj-H)!# zpFJyYrYAQiJL?*E=8Wmnu4em{NfWP1PfJZnP8vHVA%3)TRGdAQ*WOWlUCdjNP!5`q z+qos>Iz1H*J)uH3B{=i)lI63r5{t?piwfO=m8Yj4-66wO;?%>Z8=p13e7gNU-Q7Nw zXvZ`$Ejh`N>vZUE<~SWKO4%GQ_nUH@r4IdbYh7l8*(dhl9QC?a7(r`yV{bRB$_SZ+=g?fk9FSKtc;zZ7&q$J+^Nl~2X{czZZ1JsH=yXIo8=4~`8g9*n!=k{eE7%{s z&!&5RUaCh|)GAZEGV3YK*9Xp-@72|KkE6Dd?DLoDjGJtWiy;FBLp+54VP^p&Us-jAKeFX~eICM9->P?9f9Jbh8W<8~`npK^pjn z5F$JL#^*L7NQfEWnBs(tq?7B+t?2r@qc)Zp9mz?0p35)#Vz2JbfgA41F15&QPD>?3 zWd$NyEG0R|tEW0|)veAfzusUWla;%;$ZG`g+ve)lY+Y0=>I$W&<`NcV&ux&pC6gd& zIp=tv7a6{D%@Z89gBfCiD9yoIADoR|CFC}Et7>$6g{_K`R^#y6;&gW@0xorWt4pP# z!J))+M8`$8bTzx!n_uM2pHt>Vs&S=;i*%6WO!2wB$T+<=KN(a`4~q|TcvYKLipOXy z93F0*S=0EhhsKA2Xc%tTN>$C8=I|;u5%5hi^h8H)bxxN*`P_pj4_bV7UVxfeK0+ru z&lX3aHm>9(6=z3R!~_qMOwE&S(n*VV;2eg2W=lIiFJq<7JG{-?-0Um z*s9W2T1qbt`5-A3@(^bDw>|F`VwK@)GAxYeMJlu8{JgW(8&>`>~Q0e74;819)Pbc?e(OAmKuN&8G` zpXs+ZOZ!k~md+yCJ$0iH%7Txa~&1Zk$Puu^b-t(VI#g|Hp@Aq!kA0whY zA7y+{@}ci+N!ka>2Z~m5R{PeT_M%dE^1?~=(&1Qp+7YGV$f_f9hXeQIBT)lAziU?P z2T~7Y9?-5oP=289faX4c?fyBjcKf#SZR)wl6wTGB*c+A2jXN6E`o=YlDyt9AyN5RR z3-=s&Xh5-*43%`b&6%F;#;chU8+2?RLNhe zn1?B`B|``L?4c%~-AvvyZ&K_xrQVczlkX;zyQZLKM~z-%%B)tRst;7FAv>zhRr$9e zy1vS6@l=+(>=orR?d1id?0=fCw9i+H=dYcw&YG`WJzx2+GUY;<^5Zfkzf8HhOfe74 z7*G-d>r5G1iCyE^kWB4>rbW%O&r6+WoEvg*?tnoa7DOGK8yb#z@?f#?eA2z7*c|RD zEO6N~3$8CHFVMctQ!eBw8}gLuJY{a4GBuAQqLm9Xl?^kMt7j_a;WnRraK@k#zK|)2 z&WI|ZXHuff6ea)tt;_zcZGfFmR_sxy*^!Pn$*OTO*a2~Y>3yNB8q9l*7G-mDG3OQM zw}kp=>C6T5b!DR-UnKv#=alK@jk+i)o9}H_l>1B9ZQ3NVM&#?`i@bWph|+w$iW|4Q zS;bBCh~~i}tF*rU#`@*A%Vk;eQp)SQFH!Gbu2o(y>X-L+lh3Yl8MeNDxqoSw5tK)+ z<%C!cEtd>gFFo?iqhSjED=y)xU#`^4|1OMrmy18eMOfjAy^*<)aB&@PM}~;oLk@FL zL63`B0IPU_8J~7(effWOt-_JvV^d`uZIk`)j}SD&~?eLDcE=E0vtj6{wK2-RK_hw zY+Sw?`8nPXTyj!X!I#EJE`-l$Lm6Q9VOn3E0TS&$wrV&EC?rg^V8n6ekk$3MfE zSsKoVSR+}==3r~fQNqa#bUX_%$1tKyL=G-^o=gf-Nkckmk^Q@?#6-4`n9N#^seBSK zjRQ4jFm2|BzgeU_hg5r5>Xb)5X0w3qTJaEZ9^e>D&|A*xniF1ic&OTzE~h` z5H~XARsnbRizmcdu}-{zCjVZn7x#<%#4p8Ou}e`!gQB5%4~Z|ue~L|%(T$>2d?@}y zJTCT$uf8Uh zpmOu zG_Q?}Ida4G$JNWEo!=kJaiADR7>V}`pEXPa-D)|nj*2;}j#BSbKkV;SztIXKQ?&is z`N(ff1-z@}G%NFPvnSLYHZ|;6WQyf{wESI+KHu-HeqUKEmT37LlV%weebG{ExtZT8 z%d?hOE$746TP|8IhOdtp9dWe3xBsX8cY;qNQ@Vb)L^gE)zKJUA`t2~N`{RIW^hJ)C z^Zz1ZhIRkaVy1Tea$=6hoaOgP%qQe-C>&!7($IDVO^<sdFw|RC_y{p(asww1FMl;3YxJ6y?=)&^Qp;Cp}{YsZwt{osd-6aw6BT(iNse(X(^<<)KFp)DKmwXnndC8wThV_ z56t$pi9*a`Fb}sO35OaPO7jbbt>n}Ru&7^h+0i~Ymk7ho%R z1Uw2F!DIN{27U=12ix(Fd8n}9X_|i=XUtq4xd}a{g@Ac&EP?>o%9Yy@1~<+)2U@y^+8@^z$orV<2nWD zO$F0{8@mdy8v8Y1Em#NEgAG8&upNJ|LF)|=1!bwbiSVoqp0&ZVHh9(s_uJr48~ka5 zKW*@*4gR#jpEg1#5<1b~cq1HdyaLCY;CSm_$?--w-bjlQ<;x()8HS!q*#@{iowW3= zv$v6lD@&%0v`K1GBmO$cD267?hKq%8vCxoBbPxHg`<|{(B+g05AP24VfSDi<%qCm` z8a4+M@?3;jj9G#?7t@Pb3NOpRe6RrA09NC+2CN0^zi1!qDnfq7B*K6?R4TDEf za5@IaGSYWBXrl%U=ILd9Z~dO8<`7TM8m<*Sw!ue#O;>~|Yr7K6xxfon^SlPE1?#|i zumNl&FIUq1o%neI>;g}M-QX$ES50`3xcaIMe}&sI@B%mv0z8sAuR)t?(55DM)nt_N z-gR0IAv|Cv$OE(CX91W43VANVEXFLs^yj)AZIL|ht7d32%1d8$;!eUn0d|2W!EW#r z*aP;0ec)-ZA3OsN64xQj=fGib1hjyo;8(b@wHrBaC+F?tyaw&45sp4|rI><7<)L*O}Z7#sn7*g}eA3j8%%P9It)Yq*~Eqb$WaXtoE;1bM(; zf?JV6E2(Tm2C|)PLBCAW=|D`g=|ZX;UQh!!@Yg^g%oBU;$lRdV~Te|O^N z39t)133h|0z#gy{>;q4O{oomJkhl(EJ_iniBY-8{a z65Fp^8%wFP{5=ILExDDJT(%>A$;qCPza5d|>Hs$t+J;tRq~Lc8*hh5J%poSJxjVrV zU>A52>;_MPVEXPu@B#w%qbA)r;PctsY<5jd8HmwUn!F{$lsc}V4mF*#Iv?;p8X}Mo9Fusp1ljtK7(iP z{}rivAD+G6gJ;ydjDGw3{_j)uD(R5Y{57WRdCDz8{CX{WfxYXaK+o?=wPHZuqq`|Z zBTK2J0X>j%z3&Q|7oc!CExjqoKCPtE0AxS5FC7k03hsgS?6qG>#{!{4uh@^Ui3sx7 zyH}p<>kvjCj1oEp_uhR#H+B`^O1+aG?xpvRX;L3C{BJ1_BoCK+O=)mdjv{2;c_m7H z)v1L@zfkleZL-H2LCx16eUhV!o;}ygZCa6KeUaqeQPt?H-GC97*+9Z#rLS;#v#gGcW>a6cE(qSeH}Ia`$kVd z_Q=$ryAU{S0VDD2ry0dl3>f7*L|>zWzD5UqjSk*=S>bO7ZBz$+j1I=~X4<+A+W!vP z{|?&!4%+_?+W!vPuMXP&4%+?>+WrpO{tnvy4qDgg;&tC9Vjn9&a4_9=ZK}-ZuE(Us=#{@joAj&y4^8 diff --git a/scylla/swagger-ui/dist/fonts/droid-sans-v6-latin-regular.woff b/scylla/swagger-ui/dist/fonts/droid-sans-v6-latin-regular.woff deleted file mode 100644 index abf19899f75a0e8ab8e2d41caeecd8d764d3f08c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 24868 zcmYhh18^tL_XQd!yRmIYNYh&BCZQHhOXXAWg8!zAA|Glc$Q)gz*slI)0&(zed z?mMn>qM|^+KtIiZ1PJlJYg*n9|DW<7`~NpFVG&UvAYi{A4eftG09p`}ms9%D(tv;< zNPvKl`tCGkbj6fZ1c87cyMFq>{12!=zvYzZ7=N^}AFlfY{k+9y6C)b~dmtbviXU$I zgD4vVvkfB`CxRa>>4)F{jQhdi!z*oSZ)O7o^eYSq2#ow^3~8E-4qy5+em-R$!hS?an{rJ_6`mrPU51ngae#hR<(Fq7hNBf6s{NSWu0G`XEb13RWIpglfW{;##5g`#$4Ar97Xe>dHO+F0rtibqS)#6v zUZwBf(KFH!``Y@yD=!DhX+qQ6$9zn5>I`dKODtQwUaMBtpSK4NAAIr8@gS28A1`_= z%@yw)E8UKIUx|+Th)v7VcMt07YoUPmSS9H%!7kat}Nx?faQ3VXUwh#7kV!Xlg zOakHRqQxDSmI3$>jl-G;OA2zJ(aiM(oLuT9J{(l)<1R*Q*oDEW8|I~fvK^--p}KFF z^VbA?zrQX{uRn6d+=)8=xG9<{4VBNZ>u8sG!CGmIda%}oPjA>7P;IW&biWsx1@VdX z{`#8vc26QNfbCD*N8u^8#Kqpe#ad~Id9>C?&Ric*Z83QGSM-<9*z0C?Elku1YjJ%h zHqhz7>`qLqUe!kqu{4Bc7K~{~^Q^4YUo=BK@00QGPjr~ruTk-%?XidpQ@S12jIZ6h zzRFy~+nEziIjXF=nn%t{i~Phg>_T-qorxoFaZbcqogW&PyRUq)4RI`0Cb;M8ePpth z--M>xc!&wJFPk-AH`UYZ_z8Ov_`ewIXR`lfIXk+O4QF_dV=C>YGo@ZdxmCKFDqEAH zWDF+|2);eP>N@*kA+5X}cyRCG8COn{#(MpJ{FC6$KpG=8%vi5og-|(U`Dv9lY<(#~qBI>%)1ojNj@SN6wm#C<`!m5(WFv2yi&FDN;q`l5mjM`R-M&uynNHS*-}?}{>KZ808L z=7k}U)db6<>jhIb&$UV3r^E}H7k}RIoeuV^0DIKz$`v7uEHFYZY z-5+ID;r}n_D4k~Z&_lbBRwds2&ZhNgyr+iPPv%8IDx0Roe#kec2py+IA-YePGHl~8 z1KExh_*a{SR)rc>_d_K?%f&~cQLM__P_Xp$k`q#m;T@A7p0-Ao91O9@!8c%*F@NO z9u{W(Is`&6OoeSB!)r$*_AINOW-1I=iE&7LPDX-VYve7wSjE5*iDRNygO4os($;5` z$7o#|L%T1G`1W4*yvvz=xLYTS!PSyVf={U0n|Kqe2kCSF?majEi?BNwT9dSr?XlF} z?F~>1;0u0_8Q;MJtjyKEc2o7ndym0zHG`NWD^pb>nnkj`H{;Bo5ZroC=+&eqZ=LKY znl`%_1>$JUAJ#dUAJDTd;5g0oQ;xR(YM&d_k>~+XW zc?pGZ^Xm_;SWS}*$yZ7utX-J5bnGr^+`+yP$bIk>nn(V84b>Oyj0ljBIl(1*^r6;TxKBN(^A)e-0gd&_iEIB)ycB!3fN4z)o4&F~_c$mJq!(&b!ZmUDJU>IQIx$U* zuUC;LPn{nn>6^|PNz=3-%7yE+@W0m<>lLUpUtFKXhs4*6MMjdv@jd={b6!i78;1i5)4YwhR zP%zSfGN0fA{%?GJeGo%a0yN-+DE*A!d=POd1}GqrHjFcjFD5KTT%(R*CoutG0Z9Q_ z0f~Oe{-bZ-JS^}KaC;5|@L%AVd0C|5SKkNUp$52K6QKjZ;)1a(w=kB zO~607oePxs;Xhy1BcRsVe{sO)?tSfAy4P6Itabw28BG7nfGypku#@W16?F>t`KDX< zgWWn101L~L=Mc@+SML+;X#n#)0eRS@`B$Mpe9RK8+gW$*801vlC^(z7(!M)q<%0k0 zS23jPwmr&WQ6o+EvF_U;ecOHyGOM|RZlXvE7ob}24tT0{K=kA@34a8*fPRDzdl3hz zqNx_-1ovUfJyekHWRSk0FnfBcnadLo9yfT3Y+n(+cEpd7%N5$B7#S&KP1rbpikadg_b50D?&II%ki zZI?~95_Z|ucTKP2JZbFKth^1*c*lv) zBxp!yD&YA`&+1$Op#*(GdLKHaiFEqNApT-aXVLwM&bHh-3j>OSz%{HGxR5K~?l}^S zC0q8aX>dXfYUK!SWroSpc(qd?uKdMXtraE$aE+ZvH2c09&J11%`N*-=k?RJ7{jgih zzG?;AP_OKSA-t=7T-P4W6t(T|9n8eUbyYl%+}=0K;j^Z{{z>Y|GMF`92_55eA?4$q zPZe@Ep5)9_9j23CGPoI!$Wv0YdfXF?9V~;XnZ}dj0~GuMH@}^Eq4SO7!L<}zy-I%{a3A4jN-A1{+X5WS* zr@@Ix+Sa6cAJi(FZ&|I*=Wg@MJcmcsm|xLwC8c~rbpWmo1q&vyyDh z-x06DjSSKSpc;QwEQGQQ57%ik>tW-_5PCOTLxw>W#~DLMWX}l>4l`u6EQg-GhsY9T z{KQeDwtx2?ym}%=sAB#uo^%|7_|P~WPBXRvzwI=M1WW(|0l7psf!K;vv71o`5|-aG;=` zjC3{3D)l@sk~jnuXJo5hsuT%JA@Pv^GY%XXCD!K)J52Pj%!zkJO&86dAYMX*IK@cU zx)IBsz7|F3MCPs?dZkOzRhD_yays(3P3pethT`PoEIDApZyuwJ8cO9@LQ$8rfG)}< zq4Wkif|{s87OefVpv*_;vj$6SdGO4}GT@1rDBg7(d;k&HC|;h!n1fQN>DoH6a zrwc|r4(t@dMkF)BUShsk<}ze$e!>Y z`X+-96+Dv-b zOcB#9Tx8y!f^y8{@y3RE(=5s4k~o9ODte@XLHg7@FMtoHnIUmq!78EfLS8jyowP)x zPqev?Ld%4I-MGM-H4lIM`rx1Rx5w4zHo*1x>P)<}e=+F`gUInZ7drn+XPZF#!w!mU z;X1AwU{naOWglu$5RVy{cOu^uT^uC{huUzkNcvEqFGNxf-ajo4(nh2SC@=>C83-}w zSrY61*Zd-aAmR;p^CE7>>IniOL{Lc7)$9J!L2pyHPbzPcNVRT8TVBmqf&3t#tXGx7^S3sjKAw;VUAZ;j5QeKd+s!6iZwGS&s zV08~y7pTp^0Y{f3br-XdWyDDW?jU^=(CYs6){?U4o_v-XbEo9h zsUsrTvAW7_C&MVOqo^;tYoZ>4KU*7(mZo3W6gJEe8$#<%G4??hkL50pv8>Sf+$~SX zf>+p1zRI@3hNjs~H>?ZSqr7Hp4e~`gJ$Xtxe%6|z5za!(%qe8F%m)>f+%}eW3%Q>r zC476Io2}Nu4oll{o%j{qlbLi=S^kHvQT@_kiUj-~j5jkQd0jd9`y7=8Y_D=YVhYCRLVZ*7wZ(iuQ{2!PVtf2Z^PKh+@MkO4^xwazc*X#H70qclO%y z@~thF(VCCy`sSXPlL@npsO%#O4_;ZO=&vCSYKBA!v$sCtXfEMaF~? zbm(+ZMG4|CtO=yTd~gWELNg)iDfK3uKVC%436|^p!cZZw!hZtgB>W>jI&w0!)tcNT zXa8L)r8<*;0;*B7m)rY~l|8KXF7{OQyd`X_zGs5(U8rdzoog#YleiL2R&bcmR9J)i zUA_Q!OB;61Rd1;>uvd+r{}~m1C;pO2w~(WtI9OC6TNeP@AYSs zKGvaY%Cno@bx9cQ?yH76dN^JIoji|=oCVW^lgH@AyS&N^npZX2_2|HroqbVqb4kKi zf4*S9baI4W4e;b>+c2n(STp{#EK#l#WmeZh0cT|L`V&8RQd5xUqr>>B=!GpdhS0sz z$&)dE6N?7MuS=uRH|7$nOCBG$+FYs@cC@@p7wczPeP6D7 zFRwqRU!kdKfmr6QT`6{RGwJ89S9RGXXE$K+$a? zOkc(hk2gD$Hzy$f!5?L6jE4uN@IqIJz&_0r#zH}1;157Ro==Kb;GftZCj`VI(OvYh z$fcbVjlt8%KRRgU*m(jHT7iF{@QYdpV!Z2!yPffSyzwuj?bcIwQtFksKf$+xcttst z6YF|xfx^%1=lHie!r;#T0Sg#|Ytd;Wf)a@ng$)dHqI`yJlN0%(oEz=Ha=-+w*I}t* z!|s(`bS$3!-BiKns<8c9Ny!MYyIz~{U~Rhph#gX2_Fxz&^X0 z@Ux-S_G8QS$@uhvu*<70gQr5iLfoX?6u@R!=Wo15@c`ZX%?t}$aE=gK7G{KENAN{h zNTgL%dreHH{-{E5@Kt#`e6u_U5{F9o(Sx+Ra{}DRSUCbF5e7LF7xn8fL%m6qmd;27 zzL)RoDw4e~R%VGfDn;ilPHEo5SinFzuFgGFAzm45#efeSv|mPazB$}=o(RW`lWI71 zn1B^18Qxrgg$*PbV~RW4EnPKMnvg+XmekJ6U=vo1b$McNcDLd6)LChiX6Klt(X2N6 z>Rv{V%kwG2l+Nem^}+Yc-27DRpVybWeGZCOXsoKp(l=NTnCpj_ z5r~5XMrv`v1^bc?o;IF}^f!4MxEibh1>BA?i#9zL(84HsB?v_5=J?PBZ$W%zST3DH zicohUgg|78_(00;2|kHBL+-2oiC7e9d~ytkEC~zOdcC3kfrdbv>M(*N@w}a&dJ`JX2rXsvwSdGownHPJG!gb&$BhV zS@?9$YBamuZeRZ0?;Q(fD(mds4}`C26Pl54NE`ebc~e&=6lp;y?*`oho|Z#DRV ze@VY^gCt+vP>PHL7Ti~xd3~z@o4sw0+kjVSpY_(m7I2Z|$t#T}N3&I$`^fvu)TJu- zE*lAf-M4C{72ckn5UJ^|zXsK^b-i6&&Nm&xy5o(-^#d)a)C-S8+auq?_`)sHR8 zHjgT(9F-|o!WbL~`}-f2;zGE)@f51hbtJ>*GMv??r(G}IUv0jW|MpJ71YYA`N91xkPi3JgVI z|5iteFn|u+i1q_rah$RjBW`}|(e-y3n>@G8g7aA9i!hsZq26UQXXe9ALUy}ZuBOMudb24e&{w8c zk=brD0CNymsi0BOgoaD4mBQy-Bn-K17YxN+m%pkfqwKA(k_1&BPfv^yHME&Ag;rTa z)f%)admTty9gi(e=)lY!P`<6#9Sx-^a~WdZ4O>`V7QNr=zN2s5t)P^q{cZF`gV#F+ zI*Ywo>_VY^ZK>_c3f232;4d0}!sPV;ZM3V&^cPlF8N_cZOUne7qT??#3Uh!(KMhTp zapJN{>n|Nv7rkAJjEUy-3gK9DGA2v_AuL(SvRZ-IL&C30HCSIvJo5 z5bgDDS0l&0DCOy;J@8~2HXf#?ul93n$qhAGn(OdT&6@4^4Bw53!zh;VkDICdw&xVY zy4oEqYi)wm>DZIqf$yJvh*tELDjj;B)m2{m!6{NtwH0NW@0kP*wvr78Zzj)G7P__i zx|j1(O7!8t{t{9a)a~)0S@~}SfNpn}i>;u`231C9WlF6m6cy`{IW<~7#0FtkbBh5= zDg}jBZQ_Ry-;bC$-A`nnz`$-xgd!EG zL9yfGMlo<+&2ssN6yYE<2S?M#t%wFU18T_PJei+_Nwxd+ERk)2ALS+VvE%Fd)7H_) z$bt;Uw{PIaZccNJhXZDnnK+274aQa+uoxY#U2a}vt~TFKG*h{5_G#u8KgZ~W=ru-a zLNCf4$awf{_zjt6@b=vw^NkNf*g2RC97rgS38zwrj0(k+RWep7KQ zb%66O3)r1I5cqWV*eVaT@F-ghl*3;(>i)_km~jFhI^s1>VdSUv0iURkGp&6l6Y8jR z&#)8ZUR=(Q9`Sl9tZbIAqkc;p+~N|?bMAyGN1_z}Dc9wY572Lf*kf9;XXWyfuk(wJ zVLS2nkNzS60TGa-+ooVr0Oa)&QcnI;expRir0g$&fYZYvfsci}X^-6l5VHHycawio z4wys5IHq~!-H1pb!4j#GpVH+D+z5fq612hgqk&N&G831H__bZXzfi=s-uk(PkAln= zUh4tT576uLD}X~YMEHw%KankFqbqI;rG7~ZGUsCY&ZR*O!J(jq zm=PE+&y3lGvHGc;(-R>eP`a0G5IJ+Oqm*uRk5Zt8^r=fWex#sINSB)?B4TKZ>O<`N z`$nLrGHl^cWun!)Tb-;tRUci%q%`-ZhmRjJzoma0PJlQ4+O)9K@KY_=6ZXm{Mt$PY*TTTr z!(Jnyc5B z=d03#b8ul=x3QkgP9cK>y-Ab5@l>oNWvU7WHlw}VQReV;+#cVtCzhK0Fq3^{543hi z6tjS-U^e^3*2+*kqR@I3$Z!p*R-dzX0rjtKREsX>?Pdx*VO|LR@HEt$RA(q^<-jflc zHn?~k@kW+g+b+O5pCky@E&AAzJ(2->lux0VV!q_z=?=Vu%!__=t&Vjfr_t5VY&cMY zdbBxlO*N{>!G7SfYKG*Jmz zPVN)VS#MY)(jPZmglZ$8N7Sk9_a^4sna8o!V)K5J=|+{&ih(QL9_2glaS6HZQ##>q zw7-}3)n92%xiQ)Uf37mu=4#cfo==`?uQ$7icUF?fY_(O`j;0#L$LN($)83I^+(nxB zcUg^se4#dCclqla5flT7)oelE9`9LbqSrql;8*SIXFf6EKXk6pJ5ck!K>!OV8gwPa zL+GCoFJyzSyz3(qH9vk-8E{5-6l*gl>c7<-UB$&$A%$$zoNI_zX}wKX;n1;7bZ*C$ zqbpy=vSMEF_*Wa>rSqOUJFAFJ+@?+#L(OJ=UsFo@JdHhW6N6uhs^}GTgpk;Y z=jer~wBz6_x(0I=65&G`NK+A@D6gj_QRCpW81&$4`r676#$@Yqy<{vYU!#kF>poC; zpTBPZN~c?KI=@b0{eL^X1gb6If7IpTYu!1#HP&BebvGWJD~zdGk6y{?7C)~R91Hua zfK#H{n+NO8Jq+Z@^3aO`71|5;C7rF@fTo#45D>>Ox@?t+qb7bK89^mQ`(c(Ru8&n7 zSboIb;v0>=K(_hgdlKnH#_0Y_RY(N@# zIzZl6*c+ka{Y#qiRT1S}fxIIwthQt|OeI-_bVIWOiz)_zFfofs|MwNNz+?apxkiD) zd&D-q224uDhfH1E9;u?LWQC#6SJ|#@Z$%^aZ}&%D3=Bqkqzab*P5J~0mDgX^%J(%|BN9%W6hP-xHV-ea z(+4}RJh1UO?Ehd%;r;+|8~7f>?Mf-C0}0I&E z-VEjy1F0D#GO0RS5h(70IcYW8nj1#s11x3?7QqC?WYp24`huhc>Nx_Zho1_!Gw^=- zTl#CWILSFkF^~KoK&N#@ac&wHzw(v>ETesFcd~8U`O9tQKXSVmU;kJ=E}tk)(g)_0 zQCI018a!}hG+FKfKg)$k2yg;ppg-f4LzQ>S4T6I`E?K0fYUnu=AKeJWzSn}Tq|_?3 z5bU@R$untHEPnk<=Z3OV#rQg5*}@*(vNSZVvws73VMW=>nl8{CPK!Zk%L;HTiL*G!S$2tZPS2!WZZkUc-S9G_<;=g+kK0m10 z7&{Kzz*aN$j$rb*2QCnIB8h|u0@clX{l#_e_on4<0z060rc!vegvfTG{xw6gg2a=U zgLlTF0XT3l7JRUNdie9|D58yd7-0V=A8`cRhfg;L@sau~S)6@D8t*t}*4D10^=bcI zdF<(l)mKW&2=12ZfL}H8C0e7Wg=p`2VMpGn0!kLGKZGa@4Hm`V+;Cdnz`k#+9l~ug znKggxj?dWcHChLf&*St~4$s2Q_hH}w{`0xNrgpbV{r<3dmA9P?P3v>$=cc&@r1xI> zt#_uX_;d31qEY{SJcET#=KI;P(2j3y`SIUoq3ajGzorKAD~R@)DScy^pV(ZqV6CMM zVVQtcpmX4}0~<*)4ob9MjU~-UI{DWxVCaBT$?|Y>WF&F63>YA;%BF56rg^=2OF zhH~(fVl3fZf|hcxNigIlYXm~dcyaZv_0ANkjQ{onK)dD(MCaQ+W?75r$+_^~`RHw5 z`?|g3=XWwyz4Ta7)4zR9cW$rgd^_{Ld)_?Fa@Ip`zn69(%e$i~(6=*A6|i+gKZ+5f zD61JPmdc$8&iZ=PNjXKRBP>C^^j{YIU@s~J=m6b+uf2D!J>)!Q-Q9c9Nlu3Y@o?i6 zq5LCk+y4l{^i?RM0q3ykiTd@Hhbv%UM>aTCjf<+!t8N{(nyFtC1l8J)H0=%$m@gdZ zP*v{jv@Y&?UWe~!bqGdlFF(Gb|IBKRBQ{Omi+N@BiNnARJ2TixpRLLlA@}c$-2#% zlHWEF%#_z3HHJ2Ws-v!eHg%_?Bjn5Fb+BH%0hDxsm*=nbL-H`$U1>EL`6%pnT+?iS zSZYZvCPLfwOmi72J@3zxDzte60xK&6-RZ=6OK(SDRkCSr|E)(90^pByw;G{ z(<-u87177+Ao4#RH3{d%YCI@ThQ4|M{f z!IXoj`>1YArEkEl14S77O#aGI4`M71#bZ=v+Z0x%FOp&PVf&_Ckpmup?0kqKonsWL z?%{l4IMlD3o&yxWLUhEm)gAov)woWXVWH4)v4MveHS9TlNh5<1oo7A4lwIAd2Y@e--{e4F0Dw}o)?KBlpoP`{lnvABS@rP zeJuZ*wNIf92#5ai0;M((Og{{(1ydF)jF7M_ksI5nd*~KN0l)L%DiTXcaayw?L|4(s zP?SF@gi_4h#I&$l+@&d*l(g(0BN#yRz7?@zTyC}qmruIW=eNNuN@D#|YC3gio6{|m zjm<2z2*m+8VYU71$p{H5u9~>$#-45oq$r|#Ez_b9|HbbH&$XGvNm(&~g6$Y32)Qh> z9#Z&Na8>@tT$8+O82V*?oDPXD4`Uw$CzEG{Whsn8nGkG3zdW~gS2f0aSVbK`TIVxNlh9}-VAyT)Mb--uAKw>QY;-gI6!W#R{DZRT&)%CbXhx7`sxlAx zfDa-PQJY8}12N-#LBZG{R0s#g7DinV!@dXGed|5^jj?RXD)K8O7!-SBTDYa74ZiWo zdD*EG9n(6~10Hq_qlCpHIA&-5yBXjZza-N)e0RXNr_Jbt1&6eiIBEDYXt*)Mg}*nW zWgFEg33YX5-bZb+Tp0&a2rC}@-!D}YQIF;a$WtIXNo>aL;ey}vv(oNh;%|m*J~m#g z>kPc8z1^9cPfJ&|t!7vE!f#+TDrZ^(1O55EzjBG<%sp~J;9h2L(Faa$j*i-oV=-uE zMNLrX_BI}{);zDY--2$IcWNirT{#d2@G0(h{PFdHez^&&;}Wnl-Qt2nq#R*|nA0Ex z-# z#j%#E>z(H7eVq#Oo`LU4~kSjG1{Y?@DruMT^XT0i1ns*wGiCx93*`Zw%0 z`(?`wsJ}Q8fLwHd1S)7si41R#vxq41WS7n%F*-aLRKwk_RgWL#;7ejk==U&|Ge;o6 zVkit6)e!M?^zpl(*OVzGiMS%k!Vu{`y1%{gU}00SsA+2ClfiXofVP#Uwz6_Nu^;NQ z$(MKPv5+7l-*%!|NWse9C%E3Q4FuXmLT9F4%8sPVLy1wW;5H+%@H}4UNp?1=J>f9^ zj;r2Wj1|S~GbzT|!b)L)5Gj7(zYF#-|#L`x$jhOOEnc6HwEUg8V*N+|iWV!xVlMIKWz1fc0& zo@wZsAIK;WaizshZ9$#t@tI*EI;p)(NPRGV;Ff;Z(5@rGk0{7Ul}h*sxVtk-jUmO! zNN4@CWO+r~NYH2_S*l?4?Aj48eBSR~qRI`rNBrgZgt&tbXlFs8dLt( z31IOSvzOI15UA@}d-UtN+f$ju*HHQ_)$6{+K6YhYl?*Ab^)ir`5v+{bl=n>n&vzc z$7iwV))9j`9=Vdswa{BW9Mo{!$z7Gta9l4KR{C`B$+G;V`*t^oTm1R_fNoOtdR)tN zmw3OFqKhI*6F#87KuSa5(Fzq3NiZvXsbPNRbRu$B#{eJH#a&seM66NnTzjLH8jc7- zNS?VK$%Aksm_kY(BOzuf_mO(E%H7dp+m-Em1MDuK=GkWNvKp||%+-98 z-|3Rye7WP>{TZ`+d1=1=b@vusI?9X|`^q3yqapIM19;sOojojxGq*I14un_UO$qmVqHH-W_b&JsIndziBkdPFp6)qmdor%gh;K&^tCj1d}YXKQffbj1m zi~@29r?6QGKJA!MB~B6M#0ZnONKT_v648hTG#b#biYwWI@#~?u}5IU5fuH zHBcOTd#_nJJ+J&2au#o?h+7>Kl9(e>&3z7Nw2(V-pL5gS0u|%24~6{5VU!yX;?7B; z11DEHQGC(=jkE_=j4_ZF3uu4KX~&Y;N#J-K#ubGOgA?m!_Lj}Tj*EAYQdT-1TWn5X zloP&%2Olqe$M`UqXc(DQ?aoqF?XCkLu^Q%))rG62^cb&*uhb9ROuIYg9o4)|`~z+% z7Qnoc57bD)QC=~FI^36Ygf0MeeO`}*E%Dntq^`Kdy~GAg*mYd4x>j?vbueyq5pWh$ zGVlnhF^!~nn^~C0aAl8`Yh`eMMKpG_0?g}pbeoJXSpun4VRaibtTs>KYBG0vx_gnr zMvmy5IzUYcH`;3q_O_*{N}O!1tM#_HMXb`b*6Yk8IChc~(6_s*1S`L=LXnEEWUQo0 zA)FL%trIqy4BYNh&TWPL;2LkCrk&9vNV&_FS$1jKm97xk7_gYDH?DBP&zmGTi%hBs zL_y61p5!e}`;s(*3J~y0rWW8jr-f6hHs+dphM8DP7TQW?brhch)ox<#x|(T5$bGs6*Yf zIaQjng0ay9#2F1HCi=4|e+cg>ZGV^)RvQ~-Sq#5O+!Rk3NT5TX`V}fBFUU1SI~|m z{9r!YqS$KpLqbqxZm^w+z;F9?{a{f-3yUfB{1p2*6&f>NVP1T&us$SJfta|ufiY{s3y!ap8CH3%# zqI|n`_0v^?zAT*C1@(gjTp-4tv{vNae(s0*gds$VWgcN``22s-_gcXoxRQ) zezvONEQAfKU0IB*a>|%U^~@`WqAaBuNhpQI0i#2<>RCbM!tr+;OH5^ANH`0J1=(cZ zRnFxs>BA1%N=qJSu#fU>$mY~?vRnp)$~V^<#2M7 zwYP!W@SaUI&8~;SdTil5D5R|!He77Z_V!NC`^71)-DM7o)w7ms*XxRwUC-;=xdu#1 zi^E^=-lY7BpjKvpZXbe>Kgp@))Y@!Y$hVzv;dF;h+AS2yrl*B3>w~($Hgt8b3 zs$nRWRCde&lRWgnUrV~bA1Y=G8ZHwB0$Iotz<&e6iF|mMGHQYFg%);{a;|5X-kP&*%P|ye8Wuwn{o!GUEB{3wU=x&S_Dtzv zL)BzapU$BIE$Lp%w_oEMCG(Hg&9m)IgCgH`N?`?f_I!Y6HA_3c-(#?MXv--pLvmS4 zn(Gnp5vAn=S54Kli`XO}n58!^a_p0kc~|Cd8Ze*Pn8q9IFKXx*u7@*WQEDL2bEw{y zk*6O_RZoW3W~nf@jMg|Tx6mHGxqn&cCPBKmMB^WA_}~Goq3WHTQ9|p}%7n*-CB_yy zB2_)2(3D3jAuD>dWQr^)Y*l<$9ftyWcvQeevQrFs{kE-2 zcFb~kx8ukPJnl)Z!4F1ahaRpa@-#h;5>D2Sb&6eAu|ttn#gk991HE{5dCOL>IVd@A zk8375L0QF&X>%QFap9~Cg(Gpygs9$PmMj>gPtwWjDaP-@w6Ln-J=EkhwOYLOak9`D zGzybA(1;8ahxknu0yaU^oMk45Gl0n=wL|8@FGd(O0wLpZ%ou3aQZc!%l1C4ntTC=` z-AgO-g+lGk7h?ZpD>oeW9U)7s7NX^z1~_^2>!ZMUR7-iDRzJLxyP8*-f|B2mW$N7T%fXf7x-gL= zbg!4-^5lHMcP(fmRoctgM9D>0!$tE++qhR5FGmdLWPwXTg1Ak;>~xWWLIX;Z&kNYg z!9eo}1P^U2wP-x(Zo3s&wnX{={lmxU8Wli=#QDwzfjaaCV`ilH^55&FUoz~g3)vkw z0OyuI+H~WFS_RgHlK-^`e9td4j9i@;;9K^O2UWS8pOoyPrf4kZK#SNG@fHNJB*KOW zTq&VPI@T2sdseRiKY-72!vu@7>Zb{s!FfqdL%Rf0+pJJWw9%L5{zcxn(h^%;|MPPk z-W-9E(68as8FQEU?4m3thF#;>$w_&=TrA^OkiJv+3pwBC$J!nbo=+(4Z;!Xe2R*$} zMwg<{e|;~I2fZPw3mUo#i7g3^OQP=P_SABU<#GLK(O4~VZstoEq=gLM#$V^Jxrd1} zUi8Oe_a}$Q^S{K?9Yh~y1KP7$TY)iA8Fxvj2pu(hI2AC0SvlZfj2F(n+0DOJuVL}f zA35z@J=I5?4p|?yS2gCZT1W#-m?#BDG|!$wf1}f8VLLriUz+_vx!Ty#szG(RFDPVJ zaJu;ouOZ>AsPv^*>Ej2aQ2r>9paCn1ER6DpiX(5kg!H}q8RJNnu9984_S=sVZ`2Q! zY?=Bjfru@X%`hC#U>T>bz68+#+gT;3;a@}mVn=Fkzm95*XB|`Zh}bCK;7+yi#;Gf9 zkNeRpHES=vXt*#%=*jmUyf0Q0#y2g1AD!9fOYB$F0uZ)4`jOYwTfn#Wi$eh87_am# zy2cmt)3w6LlzKAhby0#r_k=p?w%PBF=z7VE*WWKw8Vkzoo}>KBG1XeG%CHw{I%~@N zTU{25rsY+HWue{(<~qB;xKti^XBxnbTq~^*`gEs(uqJ?1Rh#;=Dz=~U`@g^@R6C^` zcZq>eH;Mz~4+x*2lbgmUIPo|{hlm@Ze%l~*l*V>$v;=}iv%05C}ogtX{2gT6nj%tXaOlIu3h$9eqcPhrW0pir?*oLOB_<=d_c)<7c+{2&D}u zWfK#IRwaz!wyYaz>uVBK)+EU%={d-6kQs{6YSci^m3Ac?HX7iU5x7s3F5mXdERaj_wUhpXB zKBM~rtVgQh++8Yf?Qu<0_qxI7K*`j`^^JCI`jKk3oUJ=R{)yv}aaDlx)n#JX;L6Y6 zxFj&jniFdNWC<~o0D+-^xAW|;b4G>0qzeBRwYTOcvQ%7+HVv$1b^ihmE77o=*C%go zmU^0p`#e(~GD;?HsbeMKQYKXTI1KTqgdXShq%SbNo7@zCgBsi9=EeTg}Kf5&GViwd^5nQNe-==n+K@Db7LF(rKvvHS z-8b*9Cb<$^8Lzc>in$r+;MZ#%Ifln$)#Y`h`y|5^<$vnE&4NWb!SR0tl}!5pIil{M zU{oe4&o6yvgVWP2&r_6{TKIn0i>~Yg4cNYJ4D#HK3!cZ)+ZES0e00CA?u56TKc@kF zF+Iy|6o~OToOv`EElo!k6w+GsoCrb~Ho)+&ah;JP9xNR+h|F7Nke%HR9GMCyC|$q> zg4k2~Kg;muzSsL)Tmiqd*$HqNq;=QPz+X} zb9}?J29sqqnizJx2OGT>nvRwp9~r=tmF;72fWJ-oWwF)iOR2GzWhR}QS($}^wC<9& z{9KoK42}8kz1Fi8;a0lMtJ&FSyVBl{tGn9ccR-D?4bNwFKme6*Fj<>&P4)^i*G8tg z_HogRf|>6M1Z!rDQokdH7Y6-KEKs=c=6fD$Re}Pf>zN-7zSL=$_@3?r1s1sDgkYRW z<}EKVZDSucOxPRsnmL0ezg3Hg89-fn&NTP)8cF&BHtFbv(Q|=Mb#t`j_-I4Zln+fH zJ)H~Ue4X#;5A#!OPCw&Aj9sjx1}I@TY;Md-AlxbTDU82t$s@D+4gpA;9J z59~@EHk~&v0J@hs=a~o_sp&0GMGdBZz!T;pQ$$3O6fipmJ64}RX=zH|I4Qjmgu|44 zdD1U#6O!p1?RGyfR88%=w^GB@A z&X?Yu_iUQ4bX-EP6&9tYvE82RyW`7E_iN7qcTxo;DDe?r)1Nu050hHYH-Jm2r~SW{rr-5bT4%kS9`zqYZZ zdfkPWYu4TPrHL(DCcbpzx|;0cZ5vPg{JQIYeq!UyE9)*>x_?p6{(*=x&!9e{HR_aY zS3SCUTS zTi^w+&?_7kP6%AlIjzk$qJ`Ru@{f4l2}2(N#f0f2=Z7GUC;0SKz07aZW7|v;TX76q zj=aIpoPcaF6OSc3jB+4Wl!LPHCk4ikP#1_Ks@=DYUDjMzxvI2M+|zVu-y32_i7{GQ z*bs3b_kcCv$ajiM#7$c{)|;qzTQ@k1Mz2aWz_&H>*MI`# zgAvKVu=&5x^syr>!)jQKLaQcsB0ml@t9hP3&Y$3s;upseL<)fNl^P9)5X|FL5D20m z=j_nZDU?n`T#86E9jTe3!G0SuDPb{v8Gx}pzy9@})LZb=u;$8N{p!lpn@kCOuqXBT z)aQHPeXiNVnYsnY4LEEmix={)XHae+I8P1^}3zo~b2ZQ~*5yfX%EO74U!t&;wqP4t?G_Zar;9NBi`1x^T@6VCUYNIAwrC+lRros*y(PL#v)zQ#$f z&i)1fR+rV{RK4NCVBYC<3eLmMG3Rk7UUqT}X9tera88%g6_j4DHZ6QT$P#OtS$8|h zP!ciMIU=>}{Ba>U|VZN#z$5D6S&75jH< zlKDwub??%ZUGR2SdA+wiW-Ik;G*7*oeBfsfKlqPd{Ku;ifkfxR#%n4U)ra%1SYF&+ zXE4B#)I;ddCsULE#X5x3e^;1Uh2o)qo}0lc{_#gPPhq~9uG@oXh#?#SlN?tr^E{?0 zIbH(Ik5&Q+Fu?Jop7-(0IsUgivz>>4Cx0RZ&j+65WcnH<%MooN>y>b$=MmH9kJrKI z;O^Ge-9ypn5P2Pp9`saqg~MGjk0(Z6t3B*k=D&9hR#gq|Y7KQ(dpy;hp>TK1>y35G z?cM}Hx>g4d8YI8o0C|-P7wpJ`FuP_){u6mwWjn|~$RufX&zIJM3e8681>lQ;_H7gg+fgq*J-I=Zzn_&%+<4 zAF0z%I}u7FC)bu4e!+PJKOXv-Z5K~}P@f3%Z8hR>c~8xC15N8XirqE6;+^njX5o&H zS_a!23tOdVaBXF)Kfby?IyAVt_5lEam#JsA;PzF5by5=o_}NzBnF7EvrHCe@V^Jnb z8X5}eB!@4dLynK=DoZ-gIysfoX$%I=mwjkF&uhk5NQ7c6Yw*(>=3kFKP6#8&L&3!9c+npdO%+$Z?FBqrjLyrOq# zUJrUd(gHwa$*~?IyAZfRtyD;&l7sj@ z@0+>Po88?7%REjO!a)+Pr9jOS_c+Zyh^Duc;p{eYAQYg_3bUE>8%DPeT+v?e^c^lu z<*l0&qXEb;@ad=F(b)~El=h z$K}I&fJJYCAYYFe$jAsd00c}v!TL?sA)U3E^aq0?<7oKVP5a+F&^(yDZ(CFAzAtWI z#MA}FfxjI^qEXyCaq}H#_D2D&3Bx4dZHUCBE{A+tvQP67RTHRC*A+`<7$Ya-Of!clS9f;>D{D)Dv$YHMMT50&PVoQe_V~P3n`o zRnC$I(v~>05I!&G{#Q%>Uj1P`qbDmzr(>qTVKjyqAF4zQVt$yfJ`a9?7x+7I*|toP zWkMf3Xz>;(7jHRw8(~Q3g4VSpDfdo>bu=~ASq)*sf)Q$m+mN2QiF-|se^z|Od5)%q zs`&))^L}VPr}5c7;wIc5@zhr$8Ggj#M*dColewv6$+nL8XiubM@wSeZkM)$L8Ws%= zE?P7=w20ld^ph(EVdW>6F1=<&Ma7D1mhL-nVBh}TyGeVNf!*j>+V>ffgtUXB`~}Ns z@E6jmb$~cKBsoVyh&Vh7fHe3k+vg!0_p?3^sp%C>yTeoRqhiQ}Y9U8o(6Q8;#Lo{;glF1H{O!~})WNQmx#k^`afWNIH8=Ck zP=y)iM1SkQ+(cG$bi-Frq8^nV8HVX;%57HeHU>eFJcjMlFTnv|8D}h95O1R2Kkl-y%3>Ia0sNfHPH0zfe& z^J+|(8(yj~X>_Zg0qPNJdc*{aOx31kCZvbZoCi%X5C1}(3T(Ep90^MJIJGIMKuLXm zQq!w}yyk?4QR#6)y8^MgBqtMUrUZH8MRvf+SCmBX2$Pml_wm0&M^B$NK)Q=WxwR+& zGyGaYf>R7#RHcgFdY&0WZ(u5`_2SQ&=6~#gcWjl};c3b(e3f~aj?bZ#nee3X_ypxK zy$W6c0BE0vy`@EV9~lRVlR1R)+=kP-^Nc*V*h~entM8&|a`gr`JQ>2A@lxwDW*OCc$=a0-OcYWQ&auqp^g75uMRC z!ekiygcU8)1n#IDLZJCmB;-fgZD9C zN5#x-gj;@6=9x1`MhM3|rTc8k4U6dfaXNYJSEbkfP3f`n;jz;A1AU zz%puE!m@37Y`h|zIU^T-FJ#iTr5c~GB3$ur_MOfN1p=<%wjRy5(<(vV8@{rpvQj! zA3jo(BQ-ff&r%Q~pi2CPln<30hqMs(k!Y$$!!nEyKmdH84-n$aL+LG8wE{c^9C5PA zY7p@^3ECOtFYI7|2>u7jFEy-HLz@~ZvCjeDO1*M9Y=&)HQm0^iQ|jf^6k2G=j~U*Q{Pk2(K?)*K@ByOf*IWPQuOP zp1kX_)Q8W={MYWoJ-2Our7z2r%+lcD3sXlv=s{n0`!E~xMhHKBVOfC(6Eum=P;Q;9 zV|8gAZl?CfF591pagSY_(U~0hz2HoGjTGS%5_ZolI;Z?ea3EX$#c^LQ`H>8M6dand zKYrPU+RL;c$NjWczD85JO;xl_#ALzC)Vz|(v?-hWs|PO7B>8QaU+X^ZhI%(NXbI!D zz%Zh4jc674OGyd8G+aV%e={s<{F24^6)j>;&hPUl=W>3Lj&OccllVCyL;4-N9snqF zLiQ-UU=i3RC6rwiv2MUSybi&E98$h1k*wHW!SFI;1gRVq08}_oYf-mh`4asS-;#+X zNa}7~wzx>vj$(&w z5&b?AUlfn%LS6NBE1Lav8*dq0cT>#Snpj+1*I#WnL=tuJ!I-tACDBsiZ(1r=t!N89 z9p8EXnl*QCZDM~}zj8rSZH>p`4^;(=69XeH%dhSarIXnlNXjI zh!MnYiYD-^b{x&%PtV}%2)?pe%*F#m@Cx$$PzSm}ANaWB6wHe}3AU{bEKUU7%LjS~ zk^{#FP<;Z(@%su)snY4dP(NtXw=r#=KEUc(hUFJ|7ct63iwypzKB27IimM6q*~|t# zkTU?I@^8Ymrj@}d*-D?OFM|J*2EpbNt*j*YI-i(A=?ipE(JIc+uwa_5vUV>2>c|%( z;uklsyLW5-#S!j~JE{hI78ZvV^$u3u_-0Gt;^dVJyAQ6aera8AhM|S7yW+4Ge%o2Q z42p9(T7$!b!}IAayLQ(MHhM9iYhcaLEP0E$EKPcuo+f#>g!_c&qbu#7wo(672>Q7E zjDJdbJ+l5JvUM{o>*jOVJK%U3{Lk=v2LSaB@Ckdv6&Z9c?Bs_OON67!@TFK;-zawU zdmw$6C;9`zO)@&8Z_`*5+$_s}CGC?A0@8BG8i>ulcU;5H@kyTqfN9IZ@13p$a%-R- zpKv9snd@Utzl8CFS@_A+3luM30OYar@W00RV!8bkKT7cQo{*2b3ju4t#3$WOa?TY2 z5CZk!N~y~jPJn>V>R<~B?FEH}1$LwX%NwP}_Z!bOBEAtSqBse?!RH8AS)1SQF7M-w zcu(XM&s6fz#@l!|tW77U)3~`ooT*nnldJxJ^nr-Hg+11JJ2&J$kD1C`i2SbgK$3bu zYvc!Jxq394eanf%-SNE-U%u(yh|0Tv)ihZ6-SD8$)p}Lqj~7n1Z|^O8v9RlkMN77I z`TgD77UMkk=KuM@-MwvxpWC;<1_v??yH{X<07yL{FPQqPwZnf? z{}NY1ym-p!EwS55eMY0N)MhX78t4Cw0~*ItzYYkV{CtlP@C#m@&MPqApZ5msEk zR$y-fPEZ8OvG3a$@`kbRTb!`jtrn$9qgLgE<$@%DT97b$3!)}3d>mFwjT@1?N zx%3veA)C;$HjC@>vF&n(kHjff*D_242UqU_!;}J^^@DQpKo9@}=|bC={G; z&lKkf$TfA8ff$KF5oY_n2gI$_HKXDIXGeP@=1nxVcRJYb7OtzUU03Lm<5866&cJQ> zT2+dpax6fS%FRc}jz}*^0v0%rRFYKCn@L7sobTn6_zpD9D;2!^W}ZuO42N4X^$k)5 zjl2%$hz=;LgPCMSR267mRuvs+^!XYGqLs^90(;_BxWZ#yRh;{Z*k9*%*Y%2GFMh4< zt&TM{#qdvhEI#0V3|v@q0=!MIF*cy~sTrl(%8ltHoS&+ba2~f#XBt!U01~2ql*C0M zQzFUbd0VbWNZq{v)gZFrEb0)G3imGV@|9QWws8K~!iv)V#hvc*3jT8P7yJD{1dnFZ z`p)7&X`s5LXlMuXbt_`1A3nF zKR!=6d!7w_752&XUWz4KQb%e)C6(KirRXAa;Ovj_O%v;TCsvkw1VyMejS zJYwEqK53pXzfarE>yo1vMtP3T%3V;J6!+)Lm4-;WX|spZ?arPyfA0BX z6905IvkqykYUc0qygImpH1S=r7PKeny)!?ln8=j@I12( z{T`S=msBKJl9uf_^X@&%Z!FUmrQQOQn(dn7I0SK4^Bavq^L{!VIFE#&y*%Xg93&R- z%rI^y{ym%{6n(5HJ`}AQl8TF^p{nRmyomW__l9^uL3~4Z*M_*q6W@^LbnJ!~?E#v*NIDE@QbtP zH)tBkH4hjx=UObGiR8MJI85du6HbK}aP;(7znT|4CD%$l1OT%|u07+P^O-Bq2$0O9 z$o|NcEf>D|0E(mw&oCg5S;&mx`obDu!822;W~Q`CgltLM7`FZ-GH#jTAbVcTYrR@Q zi&R=I!=G3Ffcas%ifOt=15SRye~2{AJb)AF)1Om8st;gIgv{8XvD6Llz%7STe?$m7 ze|GhKNHa09ooT=L&SdFjRoddxFi$>v=>GwHK#imTcmZQzWMBZ{ls(O=@%%Pl8RR(_ zK;VS9|2Hrl(-!$}!9Nl9b?n7JE(Ze>NE84$Yzv_PcmZQzWME(~{wKn~z`^iu!M{c9 z>wqGt;4%P*lm=Y@cmZvZ0}LKP6olvYy}Q~9YTLH$HmGg4L2VlT_2kd-gE5+d5dwnlKPr@l;;YdFUjaH#!;OEBuUTzbp^BnX~{bV zVI%*Xg<3{yqmVQXzv^mlmexRL-HY9FkbmAw8e*7Rk8GL``SdV$yYyJ=(uXJIW3<0J zNfGR}d0lP$sXQa4czz|?^RKzMwoz^X{ztwKB&kcr zkZiU`9$*yzRwmero^CVh%A>gd^xoza;b$2X-QSz*rAUrwE$6V?uZCgfx3>{_IA6nE zKy99H>pwybQk3KnzGuGwkKju9-LkM8e#3I8PFj<6q$aZ}k;vh1_^)}Umc{QxvtVX= z1-s4n{}F6Q=V%rU|AZObHvg%o$ZPsPB$2~E&==|m%i?z|i)hwVyNjMVGAOD*>!if% zjVy)(y?a6>G6^-rVXu&piMD00000000000000+0C)ia0lET8 z0|Emk16Biu1H1$31Rw-B1abtD1mp!C1#|`S21W+12Ob4mJ*i4x|ql4{8s@5E2l45bhCJ5tYB5WeCBU&TGBp4({Bu*tB zB{n6JCC(-uCX^<+Cj=)#CwwQuC;BKtD3U0=DB>v+DTpbcDYz-nDd;K!Dkv(FDyAyF zD%L9QD+ntfD>y4qD{3pwE9NWyEEX&-EJiF~EXFNJEnqEpEs!m$Ey6AgE~qZRF4ivW zF9I(bFETIUFZM7FFeETKFj6pTFoH0eFtjm5FKJh;xaX2t{hR zNRXmIUU!vGAu%CEX%f0Gc~C9$`TBfKXi4`aFFf+BxFkwLXkKceinob>>`qfxlzHb# z(dx&exUwoHHleE|S4wQ2%6U006M` zYumPsk#6A*)GqLe2>1m-oG%h0NhxU=Svh$HMI~hwRW)@DO)YI5T|IpRLnC7oQ!{f5 zODk&|TRVFPM<-_&S2uSLPcLtue?joD1qA>A0B|xtW^dWH?X|s|^|D=;Y}>Z^GSB(t zb2FM~s+s0mXsMOf+Gwkt_B!aOlg_&6s+;b5=&6_9`sk~l{stImkimu+YM9|h7-^Kz z#u#gy{kEBHjtkcMXs)Fe*86=srmwy`>!nxTd+n%WPCDhf<4(9?o{KIyU!!)tN;VM)wCmU)SB(@) zpzOO<@2n8+3X13)}zO8@`>cmW(w1B@J99G~BNyK~=Hy*>M08P#^@IJ?bh z%_L`AQEj_DE3Rj@+^CA#SjVWg{SJQ{siyybP{Gf9ANWr8d`96Xgv5)~#jj_X)Unkg zu2C`US}}qVP}ea1#aBE*!=v|)$e(+0e8qhf-rswlgv6uN4fk23`aB4(E!Px{=4%C7 zgGN)e$>?=k9Ou0Sy=1%xba6L$H_mnsc9WLw1Kq?J60cI@yZ@i5Y8?+d2wmI`-j0EG zt)1*_$8`2k`($?cdF6cmd765D7E_m>!@xNlIUL7RcUpH6YHFR^x}uemR(xv3j0T7X zTxmc@1KJu6HIVEE%&3Q`$CY|?)FZndasuL9@sWau83dQ9;7SE}#qtWJLR1)1%dO>u z<$Ae6DFZ9h%7}5Wbi9<%sVDPGAs0B z#)AN0AO?Yni+aG*BjTw4?wLIR7-ZIIZEh>n+szdD|F&J;xR83NVY3uT8*NNBAc;A8 X8CoUMY*lMRVpH6ur?CLJX!hLz`Fb1d diff --git a/scylla/swagger-ui/dist/fonts/droid-sans-v6-latin-regular.woff2 b/scylla/swagger-ui/dist/fonts/droid-sans-v6-latin-regular.woff2 deleted file mode 100644 index 9f93f74c3bede4b96290650f7a21c98eb1e01351..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 11304 zcmV+@EZ5U_Pew8T0RR9104yi~4gdfE09w!h04ve}0RR9100000000000000000000 z0000Q78|8N91I3v0D%k<3<~;q=QaU00we>5Obdf300bZff;b0*XB%`*RR?O=IKbeI z+boLGNUBkkFv^PT|1Sxgj2(RAS|*sJ;wThFClZ4onc3`&C>jI= zh6lIx&zTu+bfZiKC({kGWC{X;-~(BB)7y_wg)MI*O*;R44?q<# z{QSE5vj;R`9{ok9%p23j!``ml2ZFW3vXlq7eL(UT0CL&GY{>yDH0GuXRTY7%Fs`=37XHout(+8CvG!d$ z!LqJjChJhG>EHhhG32HmvY3@w)M4i?W5dWWX`x4m_*DgrI1u73{C0=u+tx9^KqiOp zTTuk8WT@Y~`Qfkw%kG=p8Y_!JI2JyL$~q{Nv}&DK5jpvzcVDcI&qh`8Ur9C=oR-t8 zv43Sr?$2-Xr^c({wCL@~_KC6J&Z`M9exP*Yt)b7I%1_+ZaKKw5Cq8uKo&2KK*g4d> z(s?z!t@FXyt}%ihIX0>q{cUts*}shiyV3fw&XNBbmoa80UM=K+xBoC1M~^tnJreH_ zIhE~j%E6NS5ubCJa%T}AFxbR|9S3I6{8+--P-}sSZxSp71QCU;Ff@+XI6UwO!HDhA zMv#y}Qc%K3k`Aj`yI=LufK(q`g7}YGewl3_}VbPXHtZy9|wFT<$Yi>(h z+tzJuZ^v%$j`p?>70CjXwR$0N?8aejBph@iKt-D9{d72*A_4#+=x0xZx{B(CPuQ`L zZJ*#9e?(*kgN6(nF>1WLTFM(_Y|F03?v2XXv1j+@K!q-A!T7yWBiH!qtaVW~vUi9k zA~;{bf{ZoU?sEImtUNMbJG5b-*oPxryk5abuJq$%C*jJE;wB!|P17VNWV$+G>uhws{1XLQ6-$HMV@!bzvX={@`EOZW0zC3AzH zWETgzqnW-P8lP8ixpGBXvb{mwPOcZkIMPUvX&#q#_i#ay(_W90T=IU1eH`;SrRIbq8U$^#&;`K=)S%4 z&&+0b=CUXA*_(yz%VL(Yoc%f6waLe=$Cx8InqxWM6Fq58rK@t>P&c>|ozE$^yv(w! zx?7?TTj;(ncB#wVPlhB}t7ct@Y{Bg~@y@@9RO!8{qM|nIwb-bLh-^%20g8x-h=_=& zh=_=)mPKjS3UTwssU%R7Fa~v6GiO1Iw%H;IJUNr6o3LJUf8nbkcoXc3)~wxOu6v%I z?_N>V7A1*F?3~sWo^AOxq^n9M%~ldDsMHp8`!ed_9&HoUWI#EL%XOJ$-CxQwIWoVMrk0F ztITIZPZ`#b*nNYA81n>KYqWinLI3Sn8~eAP9*x9X8j}_VeUS}j3)JOKh6z4H7>13~ zN>8IqF()>zY9553vS}!~*w?%LLBDwOdPUvqv)@kyT+?x9I%*xffBvf$7OHYpP5l39 zQ0wA;6V|D1dF3do#(q;4KmOY6GTn$p^6YDZ|IAj}Qqys=__g-h8?UrWrGxh(ugMxt z{Rb{y;)4=wla+FklBK(_uUy2EW&5pJXLQ6-$1qvR=(91%*TvJH9qw}Bttn`+g@Ck^+O_SO=#{3o2A2$T>-2t#9 zz)Bk>u)x00UXX3+GnlEhRUY0f2uyAS$eR@-U_quFr-Ye3hVkz*cM@Q3_RMsg;hPB| zUW!lRj|)QK9&m3s5*`U>fBTkP&-jBA+zsxvF5<}gZ9P}}{;zL)_5YVI^cM$StiD)& zG5n$jy(JtU6|tuBdfo*HcL2TyCU@Q#eQP~$6~0Tz4kwaAz7dwj^X#}vvBkYS?TVPj z-6FE(VS$t*FAuQ;B~}7g(HMz-QY~?=#EbQyIG^{{{K4B?^4~1Xhl|u5#0Xj=YYo}D zm}-0=2=sAu&a-KtDy|^2#KSrx%lS8?-JWJ$7h9{wNCT|I$A90(c8K#B#P3BIb+>(i z=VTbNhalz{yJz2Mwc`syP0CHd%Xq9EIkQj_>0JHT^-+k&$~&92Bi1|%LzDlbVhUTn`uq6Nuhys6b#4WQ9{7Cc6*x_>KS1|mo8T8)wF5Ja%2B11?jm?6rvT(y#40l9A%j_ck}O4Ocq zMWjijI*03A6p=KbdQ^ab9HO@HBXcy+bb%>Nw#l^_jw;G^dmd9qraEyqd?#lXV!YNb zRkfb5eWuxjkd@JN=L9h7z;YHs%e6%!7C{y?MMl4}~ zkLjC*mzfm4N#oW4vIQhGTGcsevW|H63>V6wWnPq^1v9_y3)R&1_It=0pL^Bfr-0SuRzX-MIM_ZERF98Ubzr_klXWNkrk?eB!l8sObQHk~ zb|QL)QR!Lo#<0lPPrVRyOJB7<(s!hd;(On`$bk}qC}c~;w&nCG3^OV5dR!+AHvAd` zu{&nY(vBS};muqKOe;j_@W|RLSfG$JM5AH){90rx*alxfDxG>c8>OL9E5lU85=DsM z7f{_}6s61?20t2%Fh==fqcg*kP&mgtI03Ay(uSQK)ZR&L|KuuKm@1VuecH!q_rqpK zSpSG4ECs{}PGr~81_T~a1voJJ*PacVs9m_qSGXPa$G~(7S@U!6bom?-K5|*!do{}M zYZx))4)o`!02(@Hfs;03Y89iL%3fca2wYu~S=g(azrSLn$ai-Yb}oz^ym6 zDk*`viCFh22;naSaS)quOk)N?Us&2?GPaXG1YlHx>{s9ovpVOT^J~#JZUUEAglFy8 zG>dXmV0l?FRtvf{(26=j86MYVeLi^47nV0JD#kZN=jx}l+y;2 zY^9g5mBXn<8*l?U%$n=6>ndGf&@ke6?{z`YnuLu(GRAaO;IkL9QV@K(vQCMrW8tfq zMH8%4K`j;in!eq`!`|6Z>zUyh$gRRjU#K7!`&1TygK*Oc8tI+K)fl~pHTmVDK>Mx< z+u$WCU*FVO+k{T;kr47Oez&s9@a2b!7~P1#=q)QG%m78!g47OISWbc0d~BV7v>qV{ ztnrC1eOI$F3+9PEK32jHScxmywUUXNZOXm8)QAU&$!mRznVHu@v$Q@6|E;O7b~I>L z%9?n+0Ui9x7g)EpHNxp*sR!(Z*Hj8R0_m`+se~Jp4z?HzyL1q9A6jo9)jA+jw^VwG zSS|)dO==^Ws4ZNdX@dhcPzN!k(RjW_obbgEdd}@6cHBo_}%mY*?qd(g5Rq42f&e1;<4_Vd%B3L0BnU8sAoD3(X3sN>(fC9q@_! zLKpEY>nkRsff?$;#hlGMaN>h@-rU=6%T~@I{p^=qJ-|_Be5`x}$Ja z4(V_V#2;J+6T6A-3 z(9|qjniNZvJb@_3Q_D@^s=62><;~l2-+#G!!+hMf3EJ!!#irXEe?0}g6F}>Y|9zC= z2~pj4D}2BqJ^H-H5POQPLX9;P;m~y(ZmyL3KvUwT!_3!o-ww$i`uwVE9A%Tb^$S{c zAa~9_+t|ia;ER%`yDPh{TCUGsa7vfA4P8zEGGxlVV5B?GdW=n-bFPg<&YL|6!T+0i z)y$JMWMh?tEkG2;TitaJ7K8vt%^2XC@hUO_|8a-k)QDI`EQ79K)p%JYYq)Tygv7b9 zVkNYD;>Y$yE)y2qV_0bE9;J}~S9$NUyyt}HqKD^Udk>mxmud#f=Q5S4Gdj@DT?K`; z+t5iC_0ohP6>4y~-bYY*{8Wt@+3mTV1=p8s|IE|lojutXLw0!Y;rZNw44`|ASiACp z2qMZl1Jv?*8rRK0k}eG-ETO!UUeS`N0Y>LK8Um!N1B}5WtoHtlSsDmZ9x8^50@S`! z971VoLThNkrawA=B5JMF4hilbmhBSZYWHmWkV7h1GeaTA6$)7RqGVYkS!}h|c_7Pt z0mt-a^j3Fa`4;m^?`@1^Sy28e7ZJJ~i$PST+9YSZokd$UFfgE0WhYYsyE)T6oH@y) zfHx`O%wC$)qn7xUWwdyHF-=&BS<>r~`*;;4RIyMI<0^djEv@h2?3PUO5s8cak&A?Y z5bSM{6#gaIoAnddjXQ}Kjld(j2Wdx#(@9M7tb)4LGZu&Le$8<6Ps~zTRY)T{B{cgd%R)8_ z=>&a3s$P6pUB0}8V0nmk9<^m)i^v&tc^(w~^WG(hnp1$U^APLi$z?gwgFd;>fD~Vk z`o#aOSAzy-ghrwU%AXM-qdk)5Wy&;1$o zy6W?eo84!YMX4qC1izs^0o*pIe@Azb`<)Yl7>Dwe6j6yjnb(owrl~>79iS3!uLULJ0;}8j%*p!$@Z% zpf~??kmy=+_Mmp_<`r$(*>Kl`zjwdr%mHmf)oOP6nF!Z62Zv$L8H4!(k*I*r9`JrK zGVJ|4QVaO}4CnJ11F+}d@`1hpkYTDZRhgKK?R~-+s8-iD_z!~*y_0EM1d_@&+17x@ zxF|%@JB1HLJsQW3(rzwKq3MDuJs6sCNDZd)iS$GB^4_r(Px<*J#ZSzayV+&YpvL=Jp4NGbpX_MCb$@u|V zDj6kZgr~s53uc*oO3Ie>R7xRsiz`Pz%YPu6uvWnO$a}~;z}r2IEC1odori7N;ik1_ z73Nx#jal@@snu~cKsq&LErvD}aHn-;to63~3(Q7R=Eh`4Da>ryQiaNJ&4l>J*P#`n zpi1eE)!_xt-~Q}iC54Bs9-4>x&VI?W0VO3m=xy?;l-)hm7uju2cYm8Vp9oNH3P|qo z&k`{UaLrBwrtuKcc41(;dz5-CvY<3_F29VrUf!^qm6N?9+B(hu_=FRkiRH?C8Z7)fJ7mf8x9GGHNa^!LqTd%eiN-)Bglg+`fX?FRpm)e8 zQ|5ZARGs=@-F!0ey|(X9)6V!mG~aAPsuu>S)to{zmNm@v)9%-OV)}6tXzapMkzdFN zJ@r{V=3%Xi_R1qx~DC>U8+Kh_eehQvZ48S-~>~u*{MVO%APUjt@@` z1MT+bjOPV2BVhDL-~w=)!bI4n_{KBh-2cU5`m>>83LYlzkb8$G43T0Hv|b7Bcw|P} zATd`R)~RH9vl;yhO8XOxPoa@>p-mds@ zWaN&xZl6if%B3Xx*%9%}(dl2L1JunnMXp)iGd&8-4*|`G-L&V^U*2 z8`vu~^K_s$6ge=wOeMX00i@Enqw$@+ z8d3cFrrm-^4EYu8m(-3h9~*ii zpHvR5iZ&-rBSkK=MS2#=Z8VlqbMFf#FAAOa%_pv_)567n7mn%A2Z(W^iT49RApZH6 zuG(e&m{rB2+(j0g3GBh1RHRkK!`7IEs&iKu_hQ7^d8x>`{+ zJ^u5#X6|~ME5Lti{@d#_#h#UpO&f*Rf6SXar*K(WPIq2EXPtL%@$~>Dj=KvlgDG87 zZGulZE@>EHJT^3k{!le>Qn4+NXZ}NXa4i5le0*_yk)i72dEzCaVv{0k}%P8&sy%YRo@T1u(rW;^TZ!F?Q&$cF4%a$SAf#-r+pIV>5Fd)CU(EG z@b0-RE()Pj=ile=o_Li%eL(_cepoV)p5%|tNy^H^Ms30qLnq;}Zg}_aF71&N43i!M z7TG!_bZ>p*t+>(Qh{bm~f7{vcdVSRN!O^C)PM>5>P>FQ=?v9liUf+XS@IR-o^|Yj7 z{L?zNCq-5Fd__v_+4 z_ktDU?%DaBZp9DnP;)2TFvg2Ef7hIJ%Cyh0NseXPW;hkN6rBN@`x&)GwI?q8ch7g1 zgu$ZVQ*(!b$GwDKu6X`hA8|aStF~VUUzhs=E4ChR*|KgKAfO*t2S{acN}A+9 z&QCx3OM)aJ!Z_fL9ls%=L}*sst`lwhdT|_T#PWzzalVetXpY?w+Uz z`W<7u^eDA^od3RJ2)eTkSVE1}C=;=RtFu++L|lN{&23ncPo(F1Oz5;8ll1(8P-%O^ z;~RGxZUU*3c9quD!GAD3C4AD_agdrL9z?iN#&l(;Mm?r>TzPQ6_uQbg8LgU4t%U&! zeZWe(99P*YqSMB>g`&wqU7A+FcWMYC*ge#K%_#o$4Ma4@Yk~kv40JQ`i`}R#duXer9xigp{~Z;wbdWm@SL*lsF`AurzyG z);eK5_=fFK5+1J%0NBH4znndC_MGr6OH&ZoEl;RJilZ`J3WK{}@$9kKezl{Fk@p3} zTo9CA@Hm9g$HoOWrl)n0(_9YR+i11A!OgRzsnX{_+SXu6JW5Yh(Yx9-H%MIt(>Pr` zxr7_-o6KM&;$HPrq!en5ijCXFq2VPSYElxRDGtIqIvPP7sYNFReF5mkE0 zn4hL_0=aAcz0fzqNdJb(foNZYk@&!=T`FGk4M{&mqSqIogtM zp9s}Br}rMpzp#|wR+zsFiP+|G2VAl}|| z*yiwU0OkZIzt8@`OX;R`kP@~-*)&d=jf1<;W*F?Pv=ky#p2v%Mg2tmH1=GPm{OR| zX@YW`5C+vV-F}VNzNzR=ub#~;7_xx}yP<#6Sjzpm z?H8Ue8s7IAd?*R=yxR~c5Tf&#Cu7< zoNoN~-Vn4ihyu$)mO+;VuWG;Tb1p-M{~We?;=6AEM{<|lo_r_3$V=ta(2Oir7dkOx z(vRyC)=Bs5^5te}BO^TPyVAw~D)KC$i$pjLjqEDNvhb(b7@=OwjVvmN8~4-bC*Z3) z-w9y}E-IUV*i_e?OivgjFbyyC=6RP<<3-r4F%{<2%j{4tEI7KwhKux_rbiZpOJVKr zQgy(IQXaY$>(@)6^!Z^+Hv^3qH<(k-4 z($P|DA~K?KMi|*O#C^G1%6wkcC^I{z1-?veh|qYF5zIWwY0bRBFt0gH$;(s2d5yuK znWvo2^G)rT&p1tSz1Gn9wIghDFbeU zpHqW{!4jP0<5TDV&?Iq6i7cQ7WNlMnos9Ct1FzK^43X_f$mQvRa2BqPK<-bkaT-Rz z$uO@5mNmEanb(lcD%9gdePz_??g9FR%~g8{*(A_^uc1z9=%3jm-WlBm!!8$p(f?v< z&yDvdxb;8&g_pEi6JF*zwqO8G6l1%-&)j`efS<)3r6i5VR|D)?VFI=(PNffK$LNAZ zD*vX01Z<;FsSje)@`FUmfX0imDnWIk!sn=}7L+Lx*8&llwv_RgP}S6k2YLME^@dG> z_}!RDyJTMpUHD$Xl40^8A{!agC)Gs|Im;PTY>!-WlH{7E*)Ri~z6*Yp)YfXizb-z# zcHtQIi}C6=*C8&mF75ITUK&`?1Jl1TqkV&2^MMAqs%4bksA+u5K1pUJzc#-Q#kG4> zT3%i>l|jwTq0?zGS}h|wIx;5*NMl!029q<>5J#H8Eu3uSh#ERXNxhn^WZbEt*bb)8 z;24!zf{Y{@6qLly^$+jibQBUF6{|?ND z?%3V_(`yX+O@zOS#uG&SvB%r_PEIVotwPC-mcrFEWmtW-q}x}K(TQZtPt(1*u0mT( zxWp}bD;90Nmm5*`r<_G*I>-8lV#Uw6l*d?Rpuf#gnInmybYz71${2@?2l8m$CGrV$ zYU&84Kp0gH(ul*3^|tJh`==7+ds1^V#yACy;h)5bnI!hF4-n)VMUH$g#%!j*Q|ut$ zK%|@~y{M`lz{19{Mu2mhMx-UHZkq zFF9Aiq!&$yj|KdAd^H@AA1f^+2u1p!SS7q!5eMJGNz&uv#u|@ zoo{W#v02u=?To-7=##4k>&~=qW>4!2u3fqm_kat_xXBtc-QM1cR={{|!+^K_hAeCa zVh0g(k82PGn$Sd@22tZV$r3Upk&=&RK2Bq1O4!0~Kyp4rLP{pTRi^Xq(3H)tS28XJiNn$w~7qj_jd;xepCZPueqqKcM4A;K~uh>v$&io zUllL{@#Nm2vZ|#$mH_`9RW`Vdz~+0t@jnLKTsHqBg6 zKu)UKKK>p!HedpnSRB-Kc?obg3c|Am-iiPF|A?o@_Y`IL@rg8`616boofJbj3g}z2 zH>oFsjM>w}?DJtxvmD)pkQ*>-f%*vx^iDk`{0|VZ*wm4pSY?c3_Up&t-TH+JWNtE8 zXZC6}vtR9ocdLy^AV_sl#1n5q4$NLKW%jKwr_~SdS`@<(lvH5$Tx3z!fY=2wI*K9) zY<^a5av8X$^;VQcNWeQSWHI?P&ck`gK1M&vAMrh1g|L;(^&8 z9PimRNlEGcresIjupUE!SNJ z($i--2;d%o2E+q0mO)-ZI*@mhbbR7T@@1fmUA0_a-;`1g!q@;q0vtt@4uaED!)p{G z{amEU2H^&lU&Y6;ZoAR}G!^G`0Ql##_;Q=Hd8Kg{EVD68 zHd={}W};&~k^XhVHyUEQ@U(L4r>n zo@cETs#31=!W`65a!kC5c~MzjeitJ$(BhN2I>ny=$Zy`RU&{bX;N<`UZEQ1g@Jf2A z5#oG8LC({24PFK>hoI(lF6X;eIQQrJG>gau;lY^bSf4@urFxR8cX_Ac68HotFZ)jA zW$9@K#fkm}KgTC?*>{D8YJglj%H&fOqLw z*q*s)LWpD_F5vgy|Jhr1yKIz)1T%rTGaCDaAYf*)G^8?zXBNwfo7tFBo%}I7=)_j0 zm`*mh1R08ylhMjmNiRbghH_c7ip*WB3|x@>rU;24_i{SLii%7S4BzU*WvZm3kHiF& zq*TMqmZ!p}mWUKWRms6Jlt|8!N1NIzRwh>nJslmge0}9Y#NlI!C6>?~L5SVzmFya- zIdUAV|6M>3C0WsEZ3#pF$OXU8`go5SH^G4;Cs58@xN_spgC{RA-h99zAfcdP`0|5= z<1avXKmQTmwG&TaOVPi2Te)J1zR+vw!?2 zSmdiNTahVCuk3#Td%Ul>=C&Jdy5*%D_uX;Vs$3uR`Q8H$<$3KbgM2y#3h5P@QLI#n z1YQZ1Dr{Eem1?zW)Ty_{QM+u_piz@I-Z|z6kNo7ZLk=@K;(|5nE^2qmX(xht>x3h9 ecszr>vACx#E6*w}(?W8Nsw^w7ELpYdipv09CJr44l1sRf(jbV^A<_-v(jAL*ch?e2h)bu^&BOEk zo%4PE`u#EQxpQV-XJ+oXbLU3rXer}8rF;qi0G_Ifq8D|DCwR-n=IDL>ob*=NNmlJRW(O8-vWlRta1d^>JngezA^kml zYwJ9+!B3f7079%<8+!LUMik&OP*ReUp#!rGK=Gc&!2&uoGdlRF!yX8B<rv|{n1^9Hszpw*n ze!$xSMn-Soa~eSM>exu~FJ}ee)}wE|(`qCenZ%TWO|iILF^!CPXxYY8pL3FkSU#~# zm*wg5Nuv-579#j{G6Dd(@uZKpJ-PE9!>~ceSt0K?#!Fpf0btD| zaPppux0W(U0wV}=|DE{|&E6a*_rpb$T@8V3J&?PzXmsN8U*9O@eQjJ=*jQhmSL=~C zwHz`ExCeJxbQs;ey9$)Ny*T^T_M0hKz${o9?ebUG$f*XDdi)#qXRD>nIOW?0oQGSQ zX@(wEt40t92~wBHHC8b_`a}TA5F!7Ky_b3F!RGfW*A1%lsxVOHD2?J5&s}6@je4%m zN(l1k_LG~eQ<6aL(GIz?k%s`Nx>Ni&aFjr*aF&L_q>Bj;9#oSe93FV*K1W~)aWiR_A&lWmbMZ@uycSe>*s6*F2 zG{FU*r_1mszLX2WwIx<|CtFJ}Hk#Z37O^G$VmOLbB#1E<>v`IjOZrX~G@>Xby1{S~ zT?X}dVHJM8NCP@U6`Eryw42}Pp}v#t|SS`Xv{3g7t7ULm6&q zA7$0+GSudXGwbncFEpZHr4DQnG%tBNOIkSSx_9R)&Nk z^*WZOXIDMsRs#HCAQdh~I8huiFQH$!LXRjDQG|j3Yvb1^s?|RXrii9qO}*D++~F$D z5K^IJOc-3WajL--OXQ;C9Qd-HwcfohxK6cBe{A|R%SzVu$EE&nHoYN7HHr0+ZHWUA`W^6yF0l=jccvQCJDM$k{;VN1*Xt1cq_9Mz^-Y58d2q3uH?l9ga0ctv46F6JBZPhhX6z zmg><3e@~9))H|ByD5;X-JTV19H9@0Vy^};c8BAoV>t&{g7WNifVaiEh!mNT;rDo%sV0^iLHP$z*%HX&$^sFuY1^wm1 zr-fviQsQS7JS9$0s=Q`JulDzahpE|Z=0VvS&V?&Jty|aB0laqxcaZDCGi6*5MlCKA z1_F1CT(Vc#)mf5;w;%CWSHY}XRsm|6WSO$|IlggHGJp0}%qxOuhrTyRCM2W}(wEPI z!9vfXuDPpun69VUSioK&p&_BsKRPn{eH5N1oFTVl`)sG+VIxI+k^{N1p8^L zTC;9aV0;K`dH=;k%oqwXG%>4vRi0JO3~w%PE__zlsFk2qnhghcSN(+z!ipOxsy5~^ z5EU>8EWi?M^&H<hV=((3%j?6cBSKg^3rofL}^uLKEm-=SCv_T6`saEb~w%p!YO+ zhZhVQCmf#_M8b%N*?Sza^fRWF!Oy{s?ja}PQ4#8&hIvw?c`~T_mIqqb)jZBz&DMOU z&ayIUGrA6n5S51_hYp8fOF1J#IqccSg6`=!(FKvBijJN5eqFuy(g|w#AoKg^!F6HV?iJ zlR#k*GYS|rB3Lfi^vTVouRncztc*Cq_Pl1{KrTABQI1qD?o;`vjm~m<`+@zh<@6U@ zsbleD4)|Ym0=MB4n3kKCQQd*KtY5;u7=_Bjx`cx$C;3x^y(X6w+*cK^6_XWLGQj-W zVwK!#!W_~iJdTo!qD?|gGJQOD#v`+!ERgCub!ssljtY_Y@7h*x4^F~{FjEnl3N{@1)3N_`Jd! z4qB~a6%I|`Z~O5r!ahvBf>5rF#?P$9Ut2WrG?p{Ov&qsu=^z49;;sB4-{QZz%9qe< zCcwbE;7vQv;WFDVHTS*mqZ)W=lQ0LJYQL7D8*@K}$ro%Jn6S-pVAgPFl&pv~4YN3j}7S0BVvBq=&)=xdBJ$)Axh z4#=!_>48y7MPMt7uclM5dFRll&UzH5JsiWQ8(#wUmgWx3v_ZVatM!)Gp;=VYq!E!7 zB#7rJq#x(mmb^Ep!kmZN)0PtJic5PMZN}}U>~=O+xU)_1lS@)IQ}Ey8EiBgIt-h{1 zI6GHD@TQEiA(}&A3XS>gl0RE)3kSzWC1ebK7@Qhh8;BfEE!SJlUA~_@r1EPy7uugi zn6_NpNe{Lm3{eags)eBlY@kP&Qzp^#V=@*_fU>aUW z`Sj!TR~h>0H>OsmP1+;UlknXY-&yG>NEX`!kYw&goFn))YOw( zYe8xr-L1DQ>%Ku;&*L1$jsDC@8?B7 z?-MBKHNU^m`rvoixYa&>vgEGYW4WTIsZZ%(FNoTWaJa%cx{9em2ADf(GO$6d+CF-( zWZ5)q{&46X;Nuc+l_niquGuQt+wDFH8WWnJ$dzzlEn|77npQ!FH8|~buJuu_klohE z9`q!7A8wO>CjPc}9e@1q#;~DUOuj2TQK&rnsns?I2+Y}PHS>8F>FDE#r~V>4Bh=O? z_moH{<-({M-?aQ!#ovBI0?X&2&{e-9De3ENMuvD5y^wUX@Z%E7^5@8pC` z(3V!+otU1UPUE-6aBlgFk-)0WLWqSs&`TVl_~**s#>PfRUtfWb+@n5canWQ97K1@I z>b2nmF{U&PDeu&o97XD;)Svki@Z8aO34qdX&r{O)kSmva?WOMYV>~crytbKM7tx;pKq9zpG|!kg1R_4aVFa`(>zmR zcxGa1y0g9A0mI~B`g`S%OCj)Cg-M=`#H}?)hYhXdqa7)~a26TJbLKNHX-xW^i8Y(O zXg-8iAztfLa82cORaQoWGpZ~xF5#S4^R7!_ zsrRt~GV}Q8ehA^AuLGH(Mp`W%83 z^8SHi()-gY^(Jx!(vDc2Rgj4s5?Hc<%;LKn+*=YWub+$qF$rH8x@$C?NQ!PjF&X$> zGSabH;mPOo5_}};K{?DEONS0|rHIOiNKa_gaom&R1Q#r?rl7gKRy$Nv3ybm1(Tp@H zKat+v-p}2Z@G|4>bYUk@oqfEuko)EcJvpv;uN?v==DvvwXv^FQb%zmnt%zz857%Jq zTM0uzryX=^$4_qWv+T}a9KBuFA^7P3jtv=l18UoG+NzDy99qvpg(#NUug_MhBdr2X zOkxwhl83?_wOaa+VBrs}`KE;w<1c4E?eK2*xXY7TG~`Ht{#2XpavNY=tMR&BHsz*nhhKS~2ms#4^T=+mBH^id& zQbIe-{4mcvzYi>*R*(9RF8Vbd)8J#~8D=P`z$)7V4Gj&YihtlRapD?wgVUi%o{R`S zW=L@e4ANhg24#r+LpfPKKG0w48_-|JtE3f3aLGe9tL<+&H8DS^jZ@n+3pL20EFg!A zc2!9SufK-))r+nTmeL(cA;*Yc#Iziv@5F3g5eVzW&4}UdaQ2hC@iG=oqF#g16U-dFD!xwAE!biy^7EF1^$Gd)46lQX!T8nO1NF^~iImLR zug)H8g^*U)<_vxex99SE^e<~gR%o-0h~c?s78OxgoY|I|ndD~uFzbGN&x1wuj?2GD zc23Ub0+z%9e$%_3xE2VX;0F=YvQ)2-lNG85+{YN-vyD=k<|&ACo`dO1iY%*&ahqC* zBAI^jm6?qfPn;&53rr0AiommjDouEJ+M;Om>nLcgv#8dbAIdpA+&m`*bXq+yNAI59 zBaS*g-q5`91~a}sxgu|ZahfGHF#jM(;zsq|aYKd>UYdK{I1;Chwt7^biqEm$aNN4} z`>vF8I;OvLWq5RGB!%#Dz{PTzN&Qf<_J_i{x*2|0@S8ruI4^?F-WRg_W&Yi5uSNEo z4eTFIhq2tvrTxrab$u$OBm)(ZVqEK@TQ`Zm7cZ(LG1El+EpxkLs)WUm4o$>ODTvmA zS$8f-CRTL9&d%oezjGGEl$CitpjB@e2lwwn)!j*LV#44Aowwr2QX2Zm2E`>xbyHKS zg@pxnil52JWKV)+m%e0}=^A(`>_wI|6$YCjY~y2X&x~t#RbNtTl~_EkEc$cyw`dui z=ZAkL#_`(egJ`Cp*a34^1mwlGgGqo++n(5XvlOes_xR3;DfYBb2z72w6Q$vO7R2ux zd=?LyMqaYo#Aa5}X0c=9b$5NX$cIbo|3|K-rsf-E9UT5z#Cc`pS7!)27Z>#eNdXl4 zWoSsPFPcI@S2w;i&DhMW{J}sb6vwi8)d^aGQGk~g*qbkUq_XpJ0XF&x9jB*W&jAGV za@Nm4Gonb z5QyG5lX=|M8Qjzv`u#gYnmc2UU>Q$A#SDcSLLV3UNyN8IKF6@gxBT>6q!O0eZ%4>8(W#wYqhSwb{^F1i1co+>ms!v9G((c|!6!Br*$KF7Lq(dCUz-WoSDnG~5`*r0M&3~wpxl8`$St;*iTWaKbDB-3v1JC{?23TYCT+R3eoNkmKI(o=Rp`f)hrV_4LBw#sF|Recbj>QtxPq2*gb(r6~X`+j^eNEDye(y-YTjI+DCs z$B#LA)cuuuN6U~VwE=0{mIr0`PZ(jcxfC}#UeFtuFC+E_zR}(BlI>iYaU%@M?z3&n zBNggSED_Q7+V;AGjDEgeCh+FdHN1^M05;N5qKWsqf*uLt>i4zjBCtZMV#nMr6WB4c z=vr0rzOeQVz^{X9P8HnlY`af#YlZq#=Q&Y*mYv-!2tkEbO*WU}SLO$uZ4a1R9AR^7 z6;RFc;6DPCDr6%-kgPMrx$=&Vurh2 zAIb#ob$uk3m;u1$Y^S|3XbKoB8KkAbSF@dL|HmLb?4IMCqjgA=+Ca%DtEpsWDL8I~ z*+@r^B)gnmZuuu^aI{7!9pw^{XDGWbnwZHi)7EOyFke#$uRCZW#IQgfT&B|c_c~(; fcq^qAFU9`_7RU2E7DPsA00000NkvXXu0mjf!rmha diff --git a/scylla/swagger-ui/dist/images/favicon-32x32.png b/scylla/swagger-ui/dist/images/favicon-32x32.png deleted file mode 100755 index 32f319f89bd07e691de2ccaf21632bb9793a9cd9..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1654 zcmV-+28sEJP)Z zYjBlU6^5U+_c^%{LI@BBNTw~6nu{hdI!rrEwGFgm#aci_3sGk}<7m?sf9eb^ z&gef46cL=-R%Vz2t>aV&tTwa+k{T7cTawZs4M`*flAN4x@8yqkX_Av0()aswzFB*( z_g(v2XYI9wX~KJNyap8@D?yj3`UN7k2td_e5i#!rt)NGYgj$R-e!9LVh4Vp9;GzxA zMK7v)oerFfUJTsuv(5*(Q~8(4BX%1_Ls=xV0tMmqzMUI4PvXOov* zN@+ngMR`jQiMjRax>fZ#BX;*hCo--8?_I7}_iH1eH6lVJdXsn0?&9dFMn390GigKx zvpz#@c|A3YzfF2L8&$=tJ7mOs!x(#QN&()xLa+W!YeGv@mDYd1!hz-=a(&=GlM;_b zPG%vitA9$x!Y2?Bybn%_m{rEu%M%KSMehG%O=wBrhS}Hj9gdybeM`h+QN3s*YnL|? zvgvUVc7-t}c@jwh@o&E0n$ViS4YT|3W1Q?bbX(*J+;!)d`PKvb6Cw_Y*hdmMVUhyW zZ8T!ns4DxK>OV1ZfPY>3J+HpK1yx1F)_HZC5;g(w-sLEI-iR$|x$s+l^XGNR&0(9# z+U1ShGrx|W{)=pX?Y>FLo_?g2xjCQaWXJ2g+O&meFp#vZ`{G`z$~NHDbpvJAgsz!b z^uFGREr>*Ka-jL?Q4VX$H&eA}1J?(yaQ5=yDWg5td6>SNom7^sV|jV~DBHp21|rcL z7_oCzwI0BD?~T9~fOpTln3C=D^Q!>ZbNFi<93yT#96a8@?!#XNptPWRl&yd8BaWWl z34kD5y!Xa<4JwQYEl^dOPB*4F%u0_Hvw`81zwm@e6sU}FPKxd5=|)r)V{EC{pvuH* zJ6!Jl2VH$<$8i_SQ&l%@0nSHJRgly=clG*tI(ymx5M-qRT?Ww6)jY1Vth8L5k8)$E zd)fkqhWl`Sm>KDF#NbSZjd4f1W4iLB@ygIB2Q(Evj zKI~~rAs@*lV&6Bw-ypSUBrp)UhBaZbGx8@q#<9o<&%&CtWIY&*`2|&ppREQs0#KB< zgo0U%##!zYYNsv0+Hfp4R8wsEISVP7vn=s?#2B$H-n(`Yp>|B|1{jR=0g#zCWqX%I zR@!U;hN9O~Y_;X}i3a5ZBlfs4#&|`42~e~6Npdm^M{Vf1dK`d_D_^Fr>iH?jAFkZS zH}BsU+uL<~lx=2a0X5~D0Z?>@F~%DJ5oz@5yVJwjtghZZYSYovJ2-LvC30t!u()K^ zlmg02Rxx|VJX$Zjl5)4yRc$9DJQJ_JTg3bVMs!5ZMK_7qUje-Q*G)8^+cPEEt)Q}O zJ?p-(7XYvB$&d~2f)O2&h~4AW9TE}NE^p+nc@KSJTfS+;@4SL}@UgYJ}w)Q)V=$7{=rRQ;RAgzi>VS}wfAf#wGK z2d~@`@yyISSY5T9%ChzGtaqnG%mc=liQDoKB63yzUJ+Xt8%%ES&Y7Jwo!-IKzH^fj z=jW7BTfT+b^39}&XT}GU;0+^o{j>&?k41Rn)ol`yZ6ims&fdS%-gS)L{`TapmX$u6 zyqTqx=2ufZ=iU*cneSC~&lw5r`oD)&=Y!n94L=*WXvefB)7Ws@5xC*4w>6xOb0Q(H zL4_bI75%KLUtq)*#v*lBL~MLK=dE~!3#Q}ufBOd*=m{K;N&o-=07*qoM6N<$g6kDD A00000 diff --git a/scylla/swagger-ui/dist/images/favicon.ico b/scylla/swagger-ui/dist/images/favicon.ico deleted file mode 100755 index 8b60bcf06a7685b9ea53983c125e7058906fbcbd..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 5430 zcmcgwYiv|S6douFYJ&9 zoHH|LSE(UtsLIS#7_-&1VM>ivN@Zu;-zA{00&N^|8r`@w~u}3a1X>=27;pf_a{DPJMTR$JSvjrVZRL?J~Ga>i0GO0Y%B;(7M$}2m+kcrz@>Hae2Pk^@b^f8%H zL>`N;qJuC@Oe+t~7I z<(F|AfU$#EfN{kFKA+53u@A<+4YnEIf**{2=CuCE#{h4Wo9v-X#BN#D=ro%g~u zG`=OQBiQEDZgc6n2RYf*+oRWbKVZW9eJJuGNa8A-F{8(f+y@)q2tylad}QmVTBh{m zkpvl@O*Y<9#KzvA;3if`|g4qjlr75YyFqbGlV2Q--{B?*ERj@$;+A#v6A5% zKJZ-k(ti_bGM?eOj#f{ZHOKc6lK8&sk-j?4Kdjr4z{}bU>r#UnlQAQ!`bOZB;Ct>?+|WpbgrOY2zPu-B!V?-m447=gVK(*?-RQObKSO{281uhZ-RJZ_ax0d9HE}&gAII8uJO7-x&ULT3$-hG# zr#*GDR$v46R``GL&)??M=Z|+Z8{*GeE`I+!{6E}3tWV7S1MCU*9ccYQdsa7AC-|$l z32U8*#-^_c?O%nK{oI ze@n2sr7p*=%uQX9Dk-qo-r2V)7}P0ZWaab6B%)NcuSZfL*ZERjG4kd;J~ udLgV-)@7w`Z&hkIdqAlS_J#Nx!E}|RnRSkVm|Sa24|P&EF^Huxf&C8$As%)B diff --git a/scylla/swagger-ui/dist/images/logo_small.png b/scylla/swagger-ui/dist/images/logo_small.png deleted file mode 100644 index 5496a65579ae903d4008f9d268fac422ef9d3679..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 770 zcmV+d1O5DoP)K11rQipnJ)eVnTSzHNF zN8ab&RhE5cC$$4FI-PZXx$pga@8yN)KS}L2Us~^y$(x-xioWbnFcV+~b9ig=!ft8Q z0RD+rpA8910Smyc0GviVUOPGiY6YM@-r6Nn8S&~cxHl27$l)-R$1(!Xx045RDy;_& zeXkG{;_#i9rz0B6149#Ddj=KM6MV^rTD%ylzGdCBX<^=^@I0X3SCR7OMbn}sUKdeF zKO-flaJa%@kJ27@Rod?J9=+Qx5|=PtG8n> zy~9rIu}+48M}FW5Bbqw3t#po?c?kmG!FX32W(dOjzTb+U@64MzHItoeB!M0Jcd}|E z>ekW`<~FjR_ZVVJkF|_htH&v!({Oad?xax?0K0sLwBY%nr46DpCmIIaa?@|Y&?n0q z@kJlMy`pE2HtEgASNd~xNzt$Kn7w#^Fy5oi`e$bUE*+f>Vk5z7=-2pj68afrqli$_ zvqe##5V?a)QU_-s9+s?mJYT5m`MQDRH4cYs^L1lCW;Dua5Ln9lG0BC@9DJQHA(}y&Z}$apb{kU zbezR}b^|O%6i+$BFsT3zqAe8wg9`vfiRp#{)z2bsJw`vBQL7Bt!IexM3$Hsf0tHK3 z+R=x{lR$K`s;7__?ASPW=3?*xgCpGaiadSEpoi0pw-_V#OXM8Ap{4qlG08x0ig9IY z3Ijqh(t1_=g#jocuqyJO=729e9OSiNDSrhR0Gc5G)(QGH?*IS*07*qoM6N<$f<~fU A82|tP diff --git a/scylla/swagger-ui/dist/images/pet_store_api.png b/scylla/swagger-ui/dist/images/pet_store_api.png deleted file mode 100644 index f9f9cd4aeb35a108c4b2f1dddb59977d56c595d8..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 824 zcmV-81IPS{P)n=Rd;8mVwQNY4k4xJQ%YT}s;WA7;r!W@XgqjG_4og} z8w>{OB9REiMa8-B85td+y}bji^~2KA`Md4j-u{zw=H%Da@83%_8qEnl9k1WK;pWX- zb-lg)pQYAreK@>)*5Clqni{IZVYGG+NY67Bp-^bn;L{Nbh44I6CIK+n7p8#U?;fCA zYMFcy%UEjup4fgnli%NyzSe*@419QuU9lJ|T$?f9w?HIQ$RwEJGK7^!y7LhxIgVJp z9c!kB{0aydM1epU1NJ=h(}2X?Y{qn70yEN$dwm~favs=VbQ+T?!AvSl{P~PE zS&zsJbTQttne>kdM4$jBhLMFy@I1)3u-4cAzrY*l!o9eK^w%+jqY!oi(Ri8sMauvK zwnCP#%3hEH#FtNqq{iT(?=_JA_8XC>5Y8Y@!wmxKb|A87ZbpHA`+%v~0pt{5Nko1L zLKR^25YExt1lH7L1{t{|P z@n)yHyZf~3>LZ@#&CNw1rA#OlY^|)UJQKUrlKKO&x%wPhH}6&e0000K^a6u zQ3;5MiU^7p6*M3qDk!2=YEcHMQ>nzEYP;R`e2C@r+U+?#XaC*&gKPcB#k$`o&;7mu zYNhYYXe|Uo84#4ZIko#rcU5K8*yFL{qT47O&^5fZH$ zVZ@%(l~vVHjnm;H@KL8@r%yUHoo;rbHI_4lIH(_nsTT>S2`DFOD~uCb9_dF4`#QgI zy7ldMcLs+A_s%|e1pRPrbX-tpeNP!9(IpMFTce`t_5U%lP99z%&i6`1d~ zWeM!Rxc50<+d$e^9LT`?B+aMK~apR zHm?q;p<7{wN2g|I^aGlSws;VP84j(z%aQwvAWv83Z$}p(% zZ^?2;gxg(ey_`V5J7{;!o;o;KslW@z5EP~JGs|U)J7dF&(ff#A=6vU?cGQ$-4+;Jf z-ggJEa!yStn`_EWvl)#yhm6XVs}UUbsi;+agri;mCfjH^Uy;lH+Zw^h)4N?oZgZz4 zJk(fTZ|Bi^;+s_M=~+d#vyoxEPzTlOS=mX@sbl*uRj>=MaMr}cFIY8i?UM61>86uB zV$DlOUCiUJwbzJMP@D$urzK|lL2-PC!p1l47V-ZG<5Ev0Z5h~Kx?`KOp7gkAjV93A z-Gc7MrlxTf?wF;CbNc@tCHJH{TB3c;#{SVu%97}tyAM2n&|9W_?qv}$*Jt*%7Yxb# zV0;d;7|lDEltJYS+U)#aiJO};?_Jyy_4%syQ(uy?-J-Yx-9O5nKRk@@XSS~X<(2u~ zV-LamWm~!iqtH9wkpf8mAXZhOD&L#aA_%)4h2M;1M5jt zIR>Us+%W-GXa_f^opKg=DSrAs)AXeRa;Hp0aC1OgbxQ%Qr_QvTleM1jkR!2mkcX$3 ztsR8~G9iqh(-FJ@F_rQBIYDXV_6s7G9SxaVF^laZqcx$!D97m|7t16j6@Jt6UdDRy49Qyvs|c>RuA|@b%}`*wU}2^7q;&Vtc6@lb zcXl)T!6nYDzmMJ~%n$KNXyNlCG)GkJ4!82;v6@d3>s5r~E+3!O?049JDr14Y^PeMI02R`0lJ^=oJ zYd|*u9|SU(j7hY?+<=(?fP*mtV*zFhOrz6%{VA?ozdm&(Jf^V zMfPZ?>l`mS3{Uq8IM;e!+1YjJy2!mzK$O|wPeU{*QSbs9m+@`f5KxO3PBnQ=%RsZg%go*fJ`*w9TL{-WgZVIA$!YV}3BRcfeXaR$x#b zW)Tpd#8E4)^MyYdkH;4_;ChJuw%n+Be7Ko4;w-nHvyo$d_0e-YiF78Df&)_)(}fcr_r0mPH(4RRYWIu+d@t0&Ss@O^s! zOKyX&13)%N@83r^;QsgN{rl(!0|RF1FA)b1{CRXAy&1ySz@>olPiR4r$aMdq&_=nK zq|cFs8phWJ1@%dZ-gXd{zDbTILD>)qEvH-NU*Rf1b2J1Ri79`rBFl@ z8E^0I)OqEi{pH(a24b9YPG;Kz@t-qZW;3Mpe`MRlmYx{7bH-XZ&`RQ7Rb^%}gc&X| zd}Q-FZf|RWxHU?PR!(C?80zu(^l>*h{#ulSiid(O!J(8P-41bNM3tnX@U6NS5yo0? zdcF)~xFE&+&|gZ$23dV5t~?$$&ymZ;F8j7GGMncGSsDo%>J`26=&l=X#rSKv_64;0 zr;k6no@=gV`P)K!=kaHl>q?!`X>(A;84tg^Md<`zA%qbRLby1Z=fn*ZRdNqs%Tq|3 zOt}lZu0q9oKJhgz&+^7PCt$=UFW=R*w?a1)ePoL*`R$Gxj?TU@12tTHsT$giHQU+sqf;fS0FpT!< z z#UR4L_rT;lfRLVo8|3$7cmuxwjY5rmYs&kR6z_LRhf9-=4QalKQYEWw^4-EBI3j$& zA>$Im_{ZA>0`)E_&m%x6a)BThkx=e|aMkOrK9zb1YzqpQ&WZ^$)2T>CwTCuYRn5y) z3fVXg-@R5&Bf4?WUTyD|hBDe2>xEh|o-y}o5Se~+Ob!5xN>CaAN!<4)F zwNh!Y7B?@AigokFYNJL`0Vz&-ekrY95-n3M<%GR<;SzXRmO7(zd+gf|$Thb%;pby2 zyd{5TJ?|JYUgpSlJ0=LB@k6#d&opuPGq^qJAIumfhigC2qAX0OEnYnT@O;bA?X1O5 zpLe9|%_H+Yki!Rv$7Kvjv8r7Z?$<>G)g*%D*V#s&kz>Z3V1 z3!ZKh9H8Nl9IdhEW_rY#oYdDCLTe+nQ{(d2pBX8%CmxL+1`|b#Vb!?IY!kT7$PDWAP9$FY=e9KSK{DEH|408! zl-$lv)U8$EB{~es&j>rYg%{{JRvIl8@NK}L=xDAEVv(o#W@3LUDc*m?yKSPR0O|nY zAh;*QuBdpja8HzP8Uw`ce-r*LrUA47ZvZ)ff3k4^>;dFcof}9eXeeM<0OVj&CKDVK zpUKKIF%hSmry!pwK68UX>zOF@dv}B4Gg)^2GQmN7@A?zG!xO6dT*Cq0+r{eY6}AfU zf`|~y!?^R*nB0!iTcg|CgM}ou^H*s~5)%h;Xh;PYOM!|Yhfk$w;@`1Dx1y!EZrM&^zMat!^Wz# z=Z{;Pa0w21oA1X3*9=`*c7o3ePa^k%Vzu>2C_7DaZJ8FW5GJv|t>`Ym;_S>7g_3XI zdRb!Ppd`ErK`pUDHRsJd9@)bu>}s1)nKsyAR7h21<1u{DX1gd_Vf;^zdUpFPeSHHR z7AMgw^{FlFlK91CGMafKt`$FLhq#^=->@Uok7pqW6&#Zs4*E(i5-jog43A*qC@!(8 z8&F}pofRcMVmcJd=f;fvlfAR!ZqeaTE?#TQ^jQM0ioaJf8m^!Kdv^`f5kEsD0=gX#4={QE1$3A4K~V$ITKEd){XVLx?i6K*D>JF6E=i znqF^X#&UX}rfB|#A9%y|sR5i6B5gyk>8@Q+xHg|^5iz7C2}YkGF)nuP4LX#k2tRBP z=!VnWnXea(K#Wvg2&0f{!mXuuWaPpsoZ)3TSaEp;i|_)CvP=4wjI; zH%7tcLM8dQXsHW*#|}%TG9yiGpyjBltpcpXkpl8zg~x zD{QG)2Z8x$vfjgDc(J6i|OHoLX&!<+m^<$S3DtA8Mf!{ z7;g1}0uqJ0Mxuy%=#BFX5;Xh9JkrA$d}neS9T;$F$kXn}ss zF{Jn}9EDk=>h)sMy$YXfhKIDxr7U@3xl+uI|N5y!>?{aVn703L1Qgb$ql%JT^lsGD%)~)(H?Spj$zNt)h)Raob z@KyVB@&ngE0rtMW4!UTqGX>{&KHJAWqb)oYq9O)e)nmN0jVa;LNbKXx04a+8&O;q) zHBzGejrqt7Dk$Z2VR%%K#`!((pXE*MR{jGtv|q$p5#v9N0f^6B9IB!Q6(y$TmHRLM zsYXm2jn3f{9T)KVVzotDx=Ng8q0Z*VDZOkd5C!p0PRoFt>NyVEc9*%YR&2>Nq~$AI zXOQfjJ&wpGMe~I8y=cC(QR4=W2GWccFK(3`d&gN+)qWtW-`*}mZI%KDRl4@rUv1%d zxFO82lhW$xQyYxJg8tOZyXm1As%kEFNn)eW{R61M>af@wr(YW{R@+eL2 zx?SovK+867$F%T;Dfeajw|kiQ81GcOnS$Y4+hp8g_w1P8_~79d9p$*M1_Ei81$H$Ti6oi?ZW)&tmsJa7RV1LKddm7R*qL54L7j zvCr1Mrb;l!=m^TbJun-C_6$7w81E1eAQC^6s4>rZ4&I5+yyu$kha%Z&d+|S7Ki#{2 zy}%Giz|eR|G?ychX%%=eL`W(aLarb(L4jd>J+wlX;xMV9H8J!l&i?~Mw7)jlIuLD% zyq+AK92j#kC`ycv$SJ|E7!FBParx#v<3_rZ-DLQ@>`#sdl5}immok8&`{YgF|+< z`tB>e%6G{=B4?V-be>`&*}0d*f?$yBX@w+rJht@O+=^zttqB2p=IiA17#YD$4-fih z@$gJ95mGmFhN!d;3Ag4#>3o`>%L{G=9<}qOJ$wDN)%)MN6bVsAPG4oKB3+8r6!Qf9 z3m8?jIpWcEJbt6|f?Y4nMXK(--YZ|GA2_aRS!do%J9S7?Q&4FYL@sPilq}e4tlYa& z?f+we^=FH^Z9|dnXZghblW!IYGIAT{``58&7vZBybh+GuIPP{h*J?&vf7i8rv6qgx zab9~l+K`tvC7pWtlS!5lt(n#Yl}PAR(v01oXjc0F?T0w>+*p#PtE?Tf_hMrEaZ!^V zbv_>=4xibc0TUxg^I>TS?HR4fdiWl`@6{7|WU9G68l7tOz2p>oIe~NNr!>Q&PHm`4 z98R?g(IT*nl#{_|*WO_h0X78;WwMp?A^Zi)W@BX5q==TdOl?~J6HK(0b(xD6?m3e3 z#+zMaSJb(W$h5+d+6vujSjyi_R80c9>7h;0YlUFDvN`iNGu&5HQ5^e>6x?&JSc4V$6_I1jJ4vnCVbkU`Gz=Uy#~OI( zlL-$UAE$pVCsD_rICM#Q!ltzcqDphp5L|ZrqUm>=H%x!RjMrF#*?BN2shvUg=H;)& zy~_xWl*k$~9Hl6PIq({dELPE-r4*YNs7?5{>dlC`EcK~lPKB_8V)G@H)UZFF8$tXT z@^raW#Hq4OJGFL2Aye|HU&_NL%dYans6?ltqEBz`Q|m=@Zh4=-p2r;}q(Nbsk$fUI zP|(Ns2>MDvZi1H7<55frlQn#%?`WY3g`+fRuC#UJx%#d!zxEu3=}zF514S=6f@?~$ zeuSB=6E7r3ya|; z@K7M3VBrls6c{M*M_{AB_fVjgQ|F(FuK(@=1eWeVMSpLglllqV6Rg-L_46;?^IskS z)x6|SR1^gGl6amWjkb1dX}^8DumNXNmhsfxKA#;bBBIZE@0gma5yQY(FX>|N~Y^mgq`xc zdxOf6r{9u#_e0gV3(fdBTdV2Sc4SN5ZmP?cB4?KRdvj&>@zN_HP5m0E=+A=efDBI*IG*Gy%%< zz@yc%2XvGm)QQv5k^ZC6!9MwX8BCmQ{3eAX|GTwn#>(PS6PoB=$Pwn*?wz?%Tx2gwJ4apoy`A15D=>?%}hj`fV*p=6XW=YR(sp))`dxTnqHE&{&; zPdeO}SVkf*6_$c45W3Z}u|Z&a8{r!6ZNY62S>5{jAd)Hkjg@h%@c)c#BvZK2lmGw| z`Vh+%ECkF{t=)XpF3Z1bj=Pe9LpHbnQwjeTU#=4hB76#52DU2P2Ouj~^lRWwRd%eN zBw_z%FL0CUlk!`s2!`>QG&H__i_)I9=AuA=jn40z>;@hRsg)>J(58cx;l;h_zE*-R7Wbz6Ff#1Mss*)zTImU4`2@?a7y;v4 zH=lJ_PM5Rkw*AU`Cmq6aa>chASJ&Z3Ebj`y;w$MM!fa6`13VU7Kc|T5Xl#7ecj?mp zREV-nBJ6C)`?&}QDe_(KM>BrlN|iF{7-90j+J>N0^vY=LK;8!^9Y_m*aRPX{!S6ag zgRw(13pJvt`;{^S-vgUk?8pV_Vh4a4P7~}uHT)ENFMqd71QIOl8Q6+24TM_+158z) z54U-*C{M)S&!2Bfu&`?Ti6;WojY;%6+I;uCof+*T2iUMz!7Eg<{}#DJSx)C$5f zP(oSf>_s1t06cJ-U3?<9poS4O{Go>H>hro^ks;r3mm1Ehfq?m(_YE8UiVUgG%W9ZY z!@O^}KR%JW*0e=66rUYj5BP~=x%$^x92-m_ - - - Swagger UI - - - - - - - - - - - - - - - - - - - - - - - -

- -
 
-
- - diff --git a/scylla/swagger-ui/dist/lib/backbone-min.js b/scylla/swagger-ui/dist/lib/backbone-min.js deleted file mode 100644 index a3f544b..0000000 --- a/scylla/swagger-ui/dist/lib/backbone-min.js +++ /dev/null @@ -1,15 +0,0 @@ -// Backbone.js 1.1.2 - -(function(t,e){if(typeof define==="function"&&define.amd){define(["underscore","jquery","exports"],function(i,r,s){t.Backbone=e(t,s,i,r)})}else if(typeof exports!=="undefined"){var i=require("underscore");e(t,exports,i)}else{t.Backbone=e(t,{},t._,t.jQuery||t.Zepto||t.ender||t.$)}})(this,function(t,e,i,r){var s=t.Backbone;var n=[];var a=n.push;var o=n.slice;var h=n.splice;e.VERSION="1.1.2";e.$=r;e.noConflict=function(){t.Backbone=s;return this};e.emulateHTTP=false;e.emulateJSON=false;var u=e.Events={on:function(t,e,i){if(!c(this,"on",t,[e,i])||!e)return this;this._events||(this._events={});var r=this._events[t]||(this._events[t]=[]);r.push({callback:e,context:i,ctx:i||this});return this},once:function(t,e,r){if(!c(this,"once",t,[e,r])||!e)return this;var s=this;var n=i.once(function(){s.off(t,n);e.apply(this,arguments)});n._callback=e;return this.on(t,n,r)},off:function(t,e,r){var s,n,a,o,h,u,l,f;if(!this._events||!c(this,"off",t,[e,r]))return this;if(!t&&!e&&!r){this._events=void 0;return this}o=t?[t]:i.keys(this._events);for(h=0,u=o.length;h").attr(t);this.setElement(r,false)}else{this.setElement(i.result(this,"el"),false)}}});e.sync=function(t,r,s){var n=T[t];i.defaults(s||(s={}),{emulateHTTP:e.emulateHTTP,emulateJSON:e.emulateJSON});var a={type:n,dataType:"json"};if(!s.url){a.url=i.result(r,"url")||M()}if(s.data==null&&r&&(t==="create"||t==="update"||t==="patch")){a.contentType="application/json";a.data=JSON.stringify(s.attrs||r.toJSON(s))}if(s.emulateJSON){a.contentType="application/x-www-form-urlencoded";a.data=a.data?{model:a.data}:{}}if(s.emulateHTTP&&(n==="PUT"||n==="DELETE"||n==="PATCH")){a.type="POST";if(s.emulateJSON)a.data._method=n;var o=s.beforeSend;s.beforeSend=function(t){t.setRequestHeader("X-HTTP-Method-Override",n);if(o)return o.apply(this,arguments)}}if(a.type!=="GET"&&!s.emulateJSON){a.processData=false}if(a.type==="PATCH"&&k){a.xhr=function(){return new ActiveXObject("Microsoft.XMLHTTP")}}var h=s.xhr=e.ajax(i.extend(a,s));r.trigger("request",r,h,s);return h};var k=typeof window!=="undefined"&&!!window.ActiveXObject&&!(window.XMLHttpRequest&&(new XMLHttpRequest).dispatchEvent);var T={create:"POST",update:"PUT",patch:"PATCH","delete":"DELETE",read:"GET"};e.ajax=function(){return e.$.ajax.apply(e.$,arguments)};var $=e.Router=function(t){t||(t={});if(t.routes)this.routes=t.routes;this._bindRoutes();this.initialize.apply(this,arguments)};var S=/\((.*?)\)/g;var H=/(\(\?)?:\w+/g;var A=/\*\w+/g;var I=/[\-{}\[\]+?.,\\\^$|#\s]/g;i.extend($.prototype,u,{initialize:function(){},route:function(t,r,s){if(!i.isRegExp(t))t=this._routeToRegExp(t);if(i.isFunction(r)){s=r;r=""}if(!s)s=this[r];var n=this;e.history.route(t,function(i){var a=n._extractParameters(t,i);n.execute(s,a);n.trigger.apply(n,["route:"+r].concat(a));n.trigger("route",r,a);e.history.trigger("route",n,r,a)});return this},execute:function(t,e){if(t)t.apply(this,e)},navigate:function(t,i){e.history.navigate(t,i);return this},_bindRoutes:function(){if(!this.routes)return;this.routes=i.result(this,"routes");var t,e=i.keys(this.routes);while((t=e.pop())!=null){this.route(t,this.routes[t])}},_routeToRegExp:function(t){t=t.replace(I,"\\$&").replace(S,"(?:$1)?").replace(H,function(t,e){return e?t:"([^/?]+)"}).replace(A,"([^?]*?)");return new RegExp("^"+t+"(?:\\?([\\s\\S]*))?$")},_extractParameters:function(t,e){var r=t.exec(e).slice(1);return i.map(r,function(t,e){if(e===r.length-1)return t||null;return t?decodeURIComponent(t):null})}});var N=e.History=function(){this.handlers=[];i.bindAll(this,"checkUrl");if(typeof window!=="undefined"){this.location=window.location;this.history=window.history}};var R=/^[#\/]|\s+$/g;var O=/^\/+|\/+$/g;var P=/msie [\w.]+/;var C=/\/$/;var j=/#.*$/;N.started=false;i.extend(N.prototype,u,{interval:50,atRoot:function(){return this.location.pathname.replace(/[^\/]$/,"$&/")===this.root},getHash:function(t){var e=(t||this).location.href.match(/#(.*)$/);return e?e[1]:""},getFragment:function(t,e){if(t==null){if(this._hasPushState||!this._wantsHashChange||e){t=decodeURI(this.location.pathname+this.location.search);var i=this.root.replace(C,"");if(!t.indexOf(i))t=t.slice(i.length)}else{t=this.getHash()}}return t.replace(R,"")},start:function(t){if(N.started)throw new Error("Backbone.history has already been started");N.started=true;this.options=i.extend({root:"/"},this.options,t);this.root=this.options.root;this._wantsHashChange=this.options.hashChange!==false;this._wantsPushState=!!this.options.pushState;this._hasPushState=!!(this.options.pushState&&this.history&&this.history.pushState);var r=this.getFragment();var s=document.documentMode;var n=P.exec(navigator.userAgent.toLowerCase())&&(!s||s<=7);this.root=("/"+this.root+"/").replace(O,"/");if(n&&this._wantsHashChange){var a=e.$('