Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions be/src/cloud/cloud_meta_mgr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -423,6 +423,8 @@ Status retry_rpc(std::string_view op_name, const Request& req, Response* res,
} else if (res->status().code() == MetaServiceCode::INVALID_ARGUMENT) {
return Status::Error<ErrorCode::INVALID_ARGUMENT, false>("failed to {}: {}", op_name,
res->status().msg());
} else if (res->status().code() == MetaServiceCode::MS_RATE_LIMIT) {
error_msg = res->status().msg();
} else if (res->status().code() != MetaServiceCode::KV_TXN_CONFLICT) {
return Status::Error<ErrorCode::INTERNAL_ERROR, false>("failed to {}: {}", op_name,
res->status().msg());
Expand Down
1 change: 1 addition & 0 deletions cloud/src/common/bvars.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,7 @@ bvar::Status<int64_t> g_bvar_fdb_incompatible_connections("fdb_incompatible_conn
bvar::Status<int64_t> g_bvar_fdb_latency_probe_transaction_start_ns("fdb_latency_probe_transaction_start_ns", BVAR_FDB_INVALID_VALUE);
bvar::Status<int64_t> g_bvar_fdb_latency_probe_commit_ns("fdb_latency_probe_commit_ns", BVAR_FDB_INVALID_VALUE);
bvar::Status<int64_t> g_bvar_fdb_latency_probe_read_ns("fdb_latency_probe_read_ns", BVAR_FDB_INVALID_VALUE);
bvar::Status<int64_t> g_bvar_fdb_performance_limited_by_name("fdb_performance_limited_by_name", BVAR_FDB_INVALID_VALUE);
bvar::Status<int64_t> g_bvar_fdb_machines_count("fdb_machines_count", BVAR_FDB_INVALID_VALUE);
bvar::Status<int64_t> g_bvar_fdb_process_count("fdb_process_count", BVAR_FDB_INVALID_VALUE);
bvar::Status<int64_t> g_bvar_fdb_qos_worst_data_lag_storage_server_ns("fdb_qos_worst_data_lag_storage_server_ns", BVAR_FDB_INVALID_VALUE);
Expand Down
1 change: 1 addition & 0 deletions cloud/src/common/bvars.h
Original file line number Diff line number Diff line change
Expand Up @@ -750,6 +750,7 @@ extern bvar::Status<int64_t> g_bvar_fdb_incompatible_connections;
extern bvar::Status<int64_t> g_bvar_fdb_latency_probe_transaction_start_ns;
extern bvar::Status<int64_t> g_bvar_fdb_latency_probe_commit_ns;
extern bvar::Status<int64_t> g_bvar_fdb_latency_probe_read_ns;
extern bvar::Status<int64_t> g_bvar_fdb_performance_limited_by_name;
extern bvar::Status<int64_t> g_bvar_fdb_machines_count;
extern bvar::Status<int64_t> g_bvar_fdb_process_count;
extern bvar::Status<int64_t> g_bvar_fdb_qos_worst_data_lag_storage_server_ns;
Expand Down
14 changes: 14 additions & 0 deletions cloud/src/common/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,20 @@ CONF_Int64(default_max_qps_limit, "1000000");
CONF_String(specific_max_qps_limit, "get_cluster:5000000;begin_txn:5000000");
CONF_Bool(enable_rate_limit, "true");
CONF_Int64(bvar_qps_update_second, "5");
CONF_Bool(enable_ms_rate_limit, "true");
// Fault injection: randomly return meta service rate limit error for testing.
// ms_rate_limit_injection_probability is the probability (0-100) of injecting a rate limit error.
CONF_mBool(enable_ms_rate_limit_injection, "false");
CONF_mInt32(ms_rate_limit_injection_probability, "5");
CONF_Validator(ms_rate_limit_injection_probability,
[](int32_t config) -> bool { return config >= 0 && config <= 100; });
CONF_mInt64(ms_rate_limit_window_seconds, "60");
CONF_mInt64(ms_rate_limit_fdb_commit_latency_ms, "50");
CONF_mInt64(ms_rate_limit_fdb_read_latency_ms, "5");
CONF_mInt64(ms_rate_limit_fdb_client_thread_busyness_avg_percent, "70");
CONF_mInt64(ms_rate_limit_fdb_client_thread_busyness_instant_percent, "90");
CONF_mInt64(ms_rate_limit_cpu_usage_percent, "95");
CONF_mInt64(ms_rate_limit_memory_usage_percent, "95");

CONF_mInt32(copy_job_max_retention_second, "259200"); //3 * 24 * 3600 seconds
CONF_String(arn_id, "");
Expand Down
16 changes: 16 additions & 0 deletions cloud/src/common/metric.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,16 @@ static void export_fdb_status_details(const std::string& status_str) {
if (node->value.IsArray()) return node->value.Size();
return BVAR_FDB_INVALID_VALUE;
};
auto get_string_value = [&](const std::vector<const char*>& v) -> std::string {
if (v.empty()) return "invalid";
auto node = document.FindMember("cluster");
for (const auto& name : v) {
if (!node->value.HasMember(name)) return "invalid";
node = node->value.FindMember(name);
}
if (node->value.IsString()) return node->value.GetString();
return "invalid";
};
auto get_nanoseconds = [&](const std::vector<const char*>& v) -> int64_t {
constexpr double NANOSECONDS = 1e9;
auto node = document.FindMember("cluster");
Expand Down Expand Up @@ -195,6 +205,12 @@ static void export_fdb_status_details(const std::string& status_str) {

// Backup and DR

// Performance Limited By
// invalid or not-workload, the final value is -1
int64_t performance_val =
get_string_value({"qos", "performance_limited_by", "name"}) == "workload" ? 0 : -1;
g_bvar_fdb_performance_limited_by_name.set_value(performance_val);

// Client Count
g_bvar_fdb_client_count.set_value(get_value({"clients", "count"}));

Expand Down
1 change: 1 addition & 0 deletions cloud/src/meta-service/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ set(EXECUTABLE_OUTPUT_PATH "${BUILD_DIR}/src/meta-service")
add_library(MetaService
meta_server.cpp
meta_service.cpp
meta_service_rate_limit_helper.cpp
meta_service_http.cpp
injection_point_http.cpp
meta_service_job.cpp
Expand Down
15 changes: 15 additions & 0 deletions cloud/src/meta-service/meta_service_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include "common/stopwatch.h"
#include "common/util.h"
#include "cpp/sync_point.h"
#include "meta-service/meta_service_rate_limit_helper.h"
#include "meta-store/keys.h"
#include "meta-store/txn_kv.h"
#include "meta-store/txn_kv_error.h"
Expand Down Expand Up @@ -311,6 +312,20 @@ inline MetaServiceCode cast_as(TxnErrorCode code) {
[[maybe_unused]] std::string instance_id; \
[[maybe_unused]] bool drop_request = false; \
[[maybe_unused]] KVStats stats; \
[[maybe_unused]] MsStressDecision ms_stress_decision; \
if (config::enable_ms_rate_limit || config::enable_ms_rate_limit_injection) { \
ms_stress_decision = get_ms_stress_decision(); \
} \
if ((config::enable_ms_rate_limit || config::enable_ms_rate_limit_injection) && \
ms_stress_decision.under_greate_stress()) { \
drop_request = true; \
code = MetaServiceCode::MS_RATE_LIMIT; \
msg = ms_stress_decision.debug_string(); \
response->mutable_status()->set_code(code); \
response->mutable_status()->set_msg(msg); \
finish_rpc(#func_name, ctrl, request, response); \
return; \
} \
DORIS_CLOUD_DEFER { \
response->mutable_status()->set_code(code); \
response->mutable_status()->set_msg(msg); \
Expand Down
Loading
Loading