Skip to content

Commit

Permalink
Make metrics of raftstore worker static & thread localize (tikv#7274)
Browse files Browse the repository at this point in the history
Signed-off-by: Renkai <gaelookair@gmail.com>
  • Loading branch information
Renkai committed Apr 8, 2020
1 parent cc67b7f commit 09c914a
Show file tree
Hide file tree
Showing 6 changed files with 85 additions and 44 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions components/raftstore/Cargo.toml
Expand Up @@ -75,6 +75,7 @@ tokio-threadpool = "0.1.13"
tokio-timer = "0.2"
txn_types = { path = "../txn_types"}
uuid = { version = "0.8.1", features = ["serde", "v4"] }
coarsetime = "0.1"

[dependencies.prometheus-static-metric]
git = "https://github.com/tikv/rust-prometheus.git"
Expand Down
61 changes: 58 additions & 3 deletions components/raftstore/src/store/worker/metrics.rs
@@ -1,6 +1,54 @@
// Copyright 2016 TiKV Project Authors. Licensed under Apache-2.0.

use prometheus::{exponential_buckets, Gauge, Histogram, HistogramVec, IntCounter, IntCounterVec};
use prometheus::*;
use prometheus_static_metric::*;

make_auto_flush_static_metric! {
pub label_enum SnapType {
generate,
apply,
}

pub label_enum SnapStatus {
all,
success,
abort,
fail,
delay,
ignore,
}

pub struct SnapCounter: LocalIntCounter {
"type" => SnapType,
"status" => SnapStatus,
}

pub struct CheckSplitCounter : LocalIntCounter {
"type" => SnapStatus,
}

pub struct SnapHistogram : LocalHistogram {
"type" => SnapType,
}
}

make_static_metric! {
pub label_enum RejectReason {
store_id_mismatch,
peer_id_mismatch,
term_mismatch,
lease_expire,
no_region,
no_lease,
epoch,
appiled_term,
channel_full,
}

pub struct ReadRejectCounter : IntCounter {
"reason" => RejectReason
}
}

lazy_static! {
pub static ref SNAP_COUNTER_VEC: IntCounterVec = register_int_counter_vec!(
Expand All @@ -9,19 +57,24 @@ lazy_static! {
&["type", "status"]
)
.unwrap();
pub static ref SNAP_COUNTER: SnapCounter = auto_flush_from!(SNAP_COUNTER_VEC, SnapCounter);
pub static ref CHECK_SPILT_COUNTER_VEC: IntCounterVec = register_int_counter_vec!(
"tikv_raftstore_check_split_total",
"Total number of raftstore split check.",
&["type"]
)
.unwrap();
pub static ref SNAP_HISTOGRAM: HistogramVec = register_histogram_vec!(
pub static ref CHECK_SPILT_COUNTER: CheckSplitCounter =
auto_flush_from!(CHECK_SPILT_COUNTER_VEC, CheckSplitCounter);
pub static ref SNAP_HISTOGRAM_VEC: HistogramVec = register_histogram_vec!(
"tikv_raftstore_snapshot_duration_seconds",
"Bucketed histogram of raftstore snapshot process duration",
&["type"],
exponential_buckets(0.0005, 2.0, 20).unwrap()
)
.unwrap();
pub static ref SNAP_HISTOGRAM: SnapHistogram =
auto_flush_from!(SNAP_HISTOGRAM_VEC, SnapHistogram);
pub static ref CHECK_SPILT_HISTOGRAM: Histogram = register_histogram!(
"tikv_raftstore_check_split_duration_seconds",
"Bucketed histogram of raftstore split check duration",
Expand All @@ -44,12 +97,14 @@ lazy_static! {
"Total number of tikv pending delete range of stale peer"
)
.unwrap();
pub static ref LOCAL_READ_REJECT: IntCounterVec = register_int_counter_vec!(
pub static ref LOCAL_READ_REJECT_VEC: IntCounterVec = register_int_counter_vec!(
"tikv_raftstore_local_read_reject_total",
"Total number of rejections from the local reader.",
&["reason"]
)
.unwrap();
pub static ref LOCAL_READ_REJECT: ReadRejectCounter =
ReadRejectCounter::from(&LOCAL_READ_REJECT_VEC);
pub static ref LOCAL_READ_EXECUTED_REQUESTS: IntCounter = register_int_counter!(
"tikv_raftstore_local_read_executed_requests",
"Total number of requests directly executed by local reader."
Expand Down
22 changes: 9 additions & 13 deletions components/raftstore/src/store/worker/read.rs
Expand Up @@ -489,55 +489,51 @@ impl ReadMetrics {
fn flush(&mut self) {
if self.rejected_by_store_id_mismatch > 0 {
LOCAL_READ_REJECT
.with_label_values(&["store_id_mismatch"])
.store_id_mismatch
.inc_by(self.rejected_by_store_id_mismatch);
self.rejected_by_store_id_mismatch = 0;
}
if self.rejected_by_peer_id_mismatch > 0 {
LOCAL_READ_REJECT
.with_label_values(&["peer_id_mismatch"])
.peer_id_mismatch
.inc_by(self.rejected_by_peer_id_mismatch);
self.rejected_by_peer_id_mismatch = 0;
}
if self.rejected_by_term_mismatch > 0 {
LOCAL_READ_REJECT
.with_label_values(&["term_mismatch"])
.term_mismatch
.inc_by(self.rejected_by_term_mismatch);
self.rejected_by_term_mismatch = 0;
}
if self.rejected_by_lease_expire > 0 {
LOCAL_READ_REJECT
.with_label_values(&["lease_expire"])
.lease_expire
.inc_by(self.rejected_by_lease_expire);
self.rejected_by_lease_expire = 0;
}
if self.rejected_by_no_region > 0 {
LOCAL_READ_REJECT
.with_label_values(&["no_region"])
.no_region
.inc_by(self.rejected_by_no_region);
self.rejected_by_no_region = 0;
}
if self.rejected_by_no_lease > 0 {
LOCAL_READ_REJECT
.with_label_values(&["no_lease"])
.inc_by(self.rejected_by_no_lease);
LOCAL_READ_REJECT.no_lease.inc_by(self.rejected_by_no_lease);
self.rejected_by_no_lease = 0;
}
if self.rejected_by_epoch > 0 {
LOCAL_READ_REJECT
.with_label_values(&["epoch"])
.inc_by(self.rejected_by_epoch);
LOCAL_READ_REJECT.epoch.inc_by(self.rejected_by_epoch);
self.rejected_by_epoch = 0;
}
if self.rejected_by_appiled_term > 0 {
LOCAL_READ_REJECT
.with_label_values(&["appiled_term"])
.appiled_term
.inc_by(self.rejected_by_appiled_term);
self.rejected_by_appiled_term = 0;
}
if self.rejected_by_channel_full > 0 {
LOCAL_READ_REJECT
.with_label_values(&["channel_full"])
.channel_full
.inc_by(self.rejected_by_channel_full);
self.rejected_by_channel_full = 0;
}
Expand Down
36 changes: 13 additions & 23 deletions components/raftstore/src/store/worker/region.rs
Expand Up @@ -261,11 +261,8 @@ impl<R: CasualRouter<RocksEngine>> SnapContext<R> {
kv_snap: RocksSnapshot,
notifier: SyncSender<RaftSnapshot>,
) {
SNAP_COUNTER_VEC
.with_label_values(&["generate", "all"])
.inc();
let gen_histogram = SNAP_HISTOGRAM.with_label_values(&["generate"]);
let timer = gen_histogram.start_coarse_timer();
SNAP_COUNTER.generate.all.inc();
let start = tikv_util::time::Instant::now();

if let Err(e) = self.generate_snap(
region_id,
Expand All @@ -278,10 +275,8 @@ impl<R: CasualRouter<RocksEngine>> SnapContext<R> {
return;
}

SNAP_COUNTER_VEC
.with_label_values(&["generate", "success"])
.inc();
timer.observe_duration();
SNAP_COUNTER.generate.success.inc();
SNAP_HISTOGRAM.generate.observe(start.elapsed_secs());
}

/// Applies snapshot data of the Region.
Expand Down Expand Up @@ -365,35 +360,32 @@ impl<R: CasualRouter<RocksEngine>> SnapContext<R> {
/// Tries to apply the snapshot of the specified Region. It calls `apply_snap` to do the actual work.
fn handle_apply(&mut self, region_id: u64, status: Arc<AtomicUsize>) {
status.compare_and_swap(JOB_STATUS_PENDING, JOB_STATUS_RUNNING, Ordering::SeqCst);
SNAP_COUNTER_VEC.with_label_values(&["apply", "all"]).inc();
let apply_histogram = SNAP_HISTOGRAM.with_label_values(&["apply"]);
let timer = apply_histogram.start_coarse_timer();
SNAP_COUNTER.apply.all.inc();
// let apply_histogram = SNAP_HISTOGRAM.with_label_values(&["apply"]);
// let timer = apply_histogram.start_coarse_timer();
let start = tikv_util::time::Instant::now();

match self.apply_snap(region_id, Arc::clone(&status)) {
Ok(()) => {
status.swap(JOB_STATUS_FINISHED, Ordering::SeqCst);
SNAP_COUNTER_VEC
.with_label_values(&["apply", "success"])
.inc();
SNAP_COUNTER.apply.success.inc();
}
Err(Error::Abort) => {
warn!("applying snapshot is aborted"; "region_id" => region_id);
assert_eq!(
status.swap(JOB_STATUS_CANCELLED, Ordering::SeqCst),
JOB_STATUS_CANCELLING
);
SNAP_COUNTER_VEC
.with_label_values(&["apply", "abort"])
.inc();
SNAP_COUNTER.apply.abort.inc();
}
Err(e) => {
error!("failed to apply snap!!!"; "err" => %e);
status.swap(JOB_STATUS_FAILED, Ordering::SeqCst);
SNAP_COUNTER_VEC.with_label_values(&["apply", "fail"]).inc();
SNAP_COUNTER.apply.fail.inc();
}
}

timer.observe_duration();
SNAP_HISTOGRAM.apply.observe(start.elapsed_secs());
}

/// Cleans up the data within the range.
Expand Down Expand Up @@ -632,9 +624,7 @@ where
self.handle_pending_applies();
if !self.pending_applies.is_empty() {
// delay the apply and retry later
SNAP_COUNTER_VEC
.with_label_values(&["apply", "delay"])
.inc();
SNAP_COUNTER.apply.delay.inc()
}
}
Task::Destroy {
Expand Down
8 changes: 3 additions & 5 deletions components/raftstore/src/store/worker/split_check.rs
Expand Up @@ -192,7 +192,7 @@ impl<S: CasualRouter<RocksEngine>> Runner<S> {
"start_key" => log_wrappers::Key(&start_key),
"end_key" => log_wrappers::Key(&end_key),
);
CHECK_SPILT_COUNTER_VEC.with_label_values(&["all"]).inc();
CHECK_SPILT_COUNTER.all.inc();

let mut host = self.coprocessor.new_split_checker_host(
&self.cfg,
Expand Down Expand Up @@ -247,16 +247,14 @@ impl<S: CasualRouter<RocksEngine>> Runner<S> {
warn!("failed to send check result"; "region_id" => region_id, "err" => %e);
}

CHECK_SPILT_COUNTER_VEC
.with_label_values(&["success"])
.inc();
CHECK_SPILT_COUNTER.success.inc();
} else {
debug!(
"no need to send, split key not found";
"region_id" => region_id,
);

CHECK_SPILT_COUNTER_VEC.with_label_values(&["ignore"]).inc();
CHECK_SPILT_COUNTER.ignore.inc();
}
}

Expand Down

0 comments on commit 09c914a

Please sign in to comment.