Skip to content

Commit 993fc85

Browse files
feat: initial draft of custom metric tool and its systemd timer (#1963)
To support the performance monitoring on mainnet, add a tool where custom metrics can be calculated and exported to prometheus's `node_exporter` through the `textfile` collector. Currently, the total number of TLB shootdowns across all CPUs will be exposed as `sum_tlb_shootdowns`, collected once per minute, as the latest `node_exporter` does not allow filtering of data of its built-in `interrupts` collector that could otherwise do it for us (until prometheus/node_exporter#3028 is included in the release branches) and will add many metrics with high cardinality otherwise. NODE-1445
1 parent b29e83a commit 993fc85

File tree

11 files changed

+371
-0
lines changed

11 files changed

+371
-0
lines changed

Cargo.lock

Lines changed: 8 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ members = [
126126
"rs/ic_os/build_tools/diroid",
127127
"rs/ic_os/config",
128128
"rs/ic_os/fstrim_tool",
129+
"rs/ic_os/metrics_tool",
129130
"rs/ic_os/os_tools/guestos_tool",
130131
"rs/ic_os/os_tools/hostos_tool",
131132
"rs/ic_os/build_tools/inject_files",

ic-os/components/guestos.bzl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,8 @@ component_files = {
8585
Label("monitoring/journald.conf"): "/etc/systemd/journald.conf",
8686
Label("monitoring/nft-exporter/nft-exporter.service"): "/etc/systemd/system/nft-exporter.service",
8787
Label("monitoring/nft-exporter/nft-exporter.timer"): "/etc/systemd/system/nft-exporter.timer",
88+
Label("monitoring/custom-metrics/metrics_tool.service"): "/etc/systemd/system/metrics_tool.service",
89+
Label("monitoring/custom-metrics/metrics_tool.timer"): "/etc/systemd/system/metrics_tool.timer",
8890

8991
# networking
9092
Label("networking/generate-network-config/guestos/generate-network-config.service"): "/etc/systemd/system/generate-network-config.service",
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
[Unit]
2+
Description=Report custom metrics once per minute
3+
4+
[Service]
5+
Type=oneshot
6+
ExecStart=/opt/ic/bin/metrics_tool --metrics /run/node_exporter/collector_textfile/custom_metrics.prom
7+
DeviceAllow=/dev/vda
8+
IPAddressDeny=any
9+
LockPersonality=yes
10+
MemoryDenyWriteExecute=yes
11+
NoNewPrivileges=yes
12+
PrivateDevices=no
13+
PrivateNetwork=yes
14+
PrivateTmp=yes
15+
PrivateUsers=no
16+
ProtectClock=yes
17+
ProtectControlGroups=yes
18+
ProtectHome=yes
19+
ProtectHostname=yes
20+
ProtectKernelModules=yes
21+
ProtectKernelTunables=yes
22+
ProtectSystem=strict
23+
ReadOnlyPaths=/proc/interrupts
24+
ReadWritePaths=/run/node_exporter/collector_textfile
25+
RestrictAddressFamilies=AF_UNIX
26+
RestrictAddressFamilies=~AF_UNIX
27+
RestrictNamespaces=yes
28+
RestrictRealtime=yes
29+
RestrictSUIDSGID=yes
30+
SystemCallArchitectures=native
31+
SystemCallErrorNumber=EPERM
32+
SystemCallFilter=@system-service
33+
UMask=022
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
[Unit]
2+
Description=Collect custom metrics every minute
3+
4+
[Timer]
5+
OnBootSec=60s
6+
OnUnitActiveSec=60s
7+
Unit=metrics_tool.service
8+
9+
[Install]
10+
WantedBy=timers.target

ic-os/guestos/defs.bzl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ def image_deps(mode, malicious = False):
5050
"//cpp:infogetty": "/opt/ic/bin/infogetty:0755", # Terminal manager that replaces the login shell.
5151
"//cpp:prestorecon": "/opt/ic/bin/prestorecon:0755", # Parallel restorecon replacement for filesystem relabeling.
5252
"//rs/ic_os/release:metrics-proxy": "/opt/ic/bin/metrics-proxy:0755", # Proxies, filters, and serves public node metrics.
53+
"//rs/ic_os/release:metrics_tool": "/opt/ic/bin/metrics_tool:0755", # Collects and reports custom metrics.
5354

5455
# additional libraries to install
5556
"//rs/ic_os/release:nss_icos": "/usr/lib/x86_64-linux-gnu/libnss_icos.so.2:0644", # Allows referring to the guest IPv6 by name guestos from host, and host as hostos from guest.

rs/ic_os/metrics_tool/BUILD.bazel

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
load("@rules_rust//rust:defs.bzl", "rust_binary", "rust_library", "rust_test", "rust_test_suite")
2+
3+
package(default_visibility = ["//rs:ic-os-pkg"])
4+
5+
DEPENDENCIES = [
6+
# Keep sorted.
7+
"//rs/sys",
8+
"@crate_index//:anyhow",
9+
"@crate_index//:clap",
10+
]
11+
12+
DEV_DEPENDENCIES = [
13+
# Keep sorted.
14+
]
15+
16+
MACRO_DEPENDENCIES = []
17+
18+
ALIASES = {}
19+
20+
rust_library(
21+
name = "metrics_tool",
22+
srcs = glob(
23+
["src/**/*.rs"],
24+
exclude = ["src/main.rs"],
25+
),
26+
aliases = ALIASES,
27+
crate_name = "ic_metrics_tool",
28+
proc_macro_deps = MACRO_DEPENDENCIES,
29+
visibility = ["//rs:system-tests-pkg"],
30+
deps = DEPENDENCIES,
31+
)
32+
33+
rust_binary(
34+
name = "metrics_tool_bin",
35+
srcs = ["src/main.rs"],
36+
aliases = ALIASES,
37+
proc_macro_deps = MACRO_DEPENDENCIES,
38+
deps = DEPENDENCIES + [":metrics_tool"],
39+
)
40+
41+
rust_test(
42+
name = "metrics_tool_test",
43+
crate = ":metrics_tool",
44+
deps = DEPENDENCIES + DEV_DEPENDENCIES,
45+
)
46+
47+
rust_test_suite(
48+
name = "metrics_tool_integration",
49+
srcs = glob(["tests/**/*.rs"]),
50+
target_compatible_with = [
51+
"@platforms//os:linux",
52+
],
53+
deps = [":metrics_tool_bin"] + DEPENDENCIES + DEV_DEPENDENCIES,
54+
)

rs/ic_os/metrics_tool/Cargo.toml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
[package]
2+
name = "ic-metrics-tool"
3+
version = "0.1.0"
4+
edition = "2021"
5+
6+
[[bin]]
7+
name = "metrics_tool"
8+
path = "src/main.rs"
9+
10+
[dependencies]
11+
anyhow = { workspace = true }
12+
clap = { workspace = true }

rs/ic_os/metrics_tool/src/lib.rs

Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
// TODO: refactor/merge this with fstrim_tool and guestos_tool metrics functionality
2+
use std::fs::File;
3+
use std::io::{self, Write};
4+
use std::path::Path;
5+
6+
// TODO: everything is floating point for now
7+
pub struct Metric {
8+
name: String,
9+
value: f64,
10+
annotation: String,
11+
labels: Vec<(String, String)>,
12+
}
13+
14+
impl Metric {
15+
pub fn new(name: &str, value: f64) -> Self {
16+
Self {
17+
name: name.to_string(),
18+
value,
19+
annotation: "Custom metric".to_string(),
20+
labels: Vec::new(),
21+
}
22+
}
23+
pub fn with_annotation(name: &str, value: f64, annotation: &str) -> Self {
24+
Self {
25+
name: name.to_string(),
26+
value,
27+
annotation: annotation.to_string(),
28+
labels: Vec::new(),
29+
}
30+
}
31+
32+
pub fn add_annotation(mut self, annotation: &str) -> Self {
33+
self.annotation = annotation.to_string();
34+
self
35+
}
36+
37+
pub fn add_label(mut self, key: &str, value: &str) -> Self {
38+
self.labels.push((key.to_string(), value.to_string()));
39+
self
40+
}
41+
42+
// TODO: formatting of floats
43+
// Convert to prometheus exposition format
44+
pub fn to_prom_string(&self) -> String {
45+
let labels_str = if self.labels.is_empty() {
46+
String::new()
47+
} else {
48+
let labels: Vec<String> = self
49+
.labels
50+
.iter()
51+
.map(|(k, v)| format!("{}=\"{}\"", k, v))
52+
.collect();
53+
format!("{{{}}}", labels.join(","))
54+
};
55+
format!(
56+
"# HELP {} {}\n\
57+
# TYPE {} counter\n\
58+
{}{} {}",
59+
self.name, self.annotation, self.name, self.name, labels_str, self.value
60+
)
61+
}
62+
}
63+
64+
pub struct MetricsWriter {
65+
file_path: String,
66+
}
67+
68+
impl MetricsWriter {
69+
pub fn new(file_path: &str) -> Self {
70+
Self {
71+
file_path: file_path.to_string(),
72+
}
73+
}
74+
75+
pub fn write_metrics(&self, metrics: &[Metric]) -> io::Result<()> {
76+
let path = Path::new(&self.file_path);
77+
let mut file = File::create(path)?;
78+
for metric in metrics {
79+
writeln!(file, "{}", metric.to_prom_string())?;
80+
}
81+
Ok(())
82+
}
83+
}
84+
#[cfg(test)]
85+
mod tests {
86+
use super::*;
87+
88+
#[test]
89+
fn test_metric_to_string() {
90+
let metric = Metric::new("test_metric", 123.45)
91+
.add_label("label1", "value1")
92+
.add_label("label2", "value2");
93+
assert_eq!(
94+
metric.to_prom_string(),
95+
"# HELP test_metric Custom metric\n\
96+
# TYPE test_metric counter\n\
97+
test_metric{label1=\"value1\",label2=\"value2\"} 123.45"
98+
);
99+
}
100+
101+
#[test]
102+
fn test_write_metrics() {
103+
let metrics = vec![
104+
Metric::new("metric1", 1.0),
105+
Metric::new("metric2", 2.0).add_label("label", "value"),
106+
];
107+
let writer = MetricsWriter::new("/tmp/test_metrics.prom");
108+
writer.write_metrics(&metrics).unwrap();
109+
let content = std::fs::read_to_string("/tmp/test_metrics.prom").unwrap();
110+
assert!(content.contains(
111+
"# HELP metric1 Custom metric\n\
112+
# TYPE metric1 counter\n\
113+
metric1 1"
114+
));
115+
assert!(content.contains(
116+
"# HELP metric2 Custom metric\n\
117+
# TYPE metric2 counter\n\
118+
metric2{label=\"value\"} 2"
119+
));
120+
}
121+
122+
#[test]
123+
fn test_metric_large_value() {
124+
let metric = Metric::new("large_value_metric", 1.0e64);
125+
assert_eq!(
126+
metric.to_prom_string(),
127+
"# HELP large_value_metric Custom metric\n\
128+
# TYPE large_value_metric counter\n\
129+
large_value_metric 10000000000000000000000000000000000000000000000000000000000000000"
130+
);
131+
}
132+
133+
#[test]
134+
fn test_metric_without_labels() {
135+
let metric = Metric::new("no_label_metric", 42.0);
136+
assert_eq!(
137+
metric.to_prom_string(),
138+
"# HELP no_label_metric Custom metric\n\
139+
# TYPE no_label_metric counter\n\
140+
no_label_metric 42"
141+
);
142+
}
143+
144+
#[test]
145+
fn test_metric_with_annotation() {
146+
let metric = Metric::with_annotation("annotated_metric", 99.9, "This is a test metric");
147+
assert_eq!(
148+
metric.to_prom_string(),
149+
"# HELP annotated_metric This is a test metric\n\
150+
# TYPE annotated_metric counter\n\
151+
annotated_metric 99.9"
152+
);
153+
}
154+
155+
#[test]
156+
fn test_write_empty_metrics() {
157+
let metrics: Vec<Metric> = Vec::new();
158+
let writer = MetricsWriter::new("/tmp/test_empty_metrics.prom");
159+
writer.write_metrics(&metrics).unwrap();
160+
let content = std::fs::read_to_string("/tmp/test_empty_metrics.prom").unwrap();
161+
assert!(content.is_empty());
162+
}
163+
164+
#[test]
165+
fn test_metric_with_multiple_labels() {
166+
let metric = Metric::new("multi_label_metric", 10.0)
167+
.add_label("foo", "bar")
168+
.add_label("version", "1.0.0");
169+
assert_eq!(
170+
metric.to_prom_string(),
171+
"# HELP multi_label_metric Custom metric\n\
172+
# TYPE multi_label_metric counter\n\
173+
multi_label_metric{foo=\"bar\",version=\"1.0.0\"} 10"
174+
);
175+
}
176+
177+
#[test]
178+
fn test_metric_with_empty_annotation() {
179+
let metric = Metric::with_annotation("empty_annotation_metric", 5.5, "");
180+
assert_eq!(
181+
metric.to_prom_string(),
182+
"# HELP empty_annotation_metric \n\
183+
# TYPE empty_annotation_metric counter\n\
184+
empty_annotation_metric 5.5"
185+
);
186+
}
187+
}

0 commit comments

Comments
 (0)