Skip to content

Commit

Permalink
feat(crypto): CRP-2376 CRP-2377: Run fstrim_tool with randomized dela…
Browse files Browse the repository at this point in the history
…y on the guest OS crypto partition
  • Loading branch information
mbjorkqvist committed Mar 20, 2024
1 parent 5da5953 commit 7a3c9dc
Show file tree
Hide file tree
Showing 5 changed files with 141 additions and 17 deletions.
4 changes: 3 additions & 1 deletion ic-os/guestos/rootfs/Dockerfile
Expand Up @@ -113,7 +113,9 @@ RUN systemctl disable \
apt-daily.timer \
apt-daily-upgrade.service \
apt-daily-upgrade.timer \
motd-news.service
motd-news.service \
fstrim.service \
fstrim.timer

# Add user/group entries specified here: /usr/lib/sysusers.d/systemd.conf E.g., systemd-timesync/coredump.
## `systemd-sysusers` does not honor the SOURCE_DATE_EPOCH env var.
Expand Down
33 changes: 33 additions & 0 deletions ic-os/guestos/rootfs/etc/systemd/system/fstrim_tool.service
@@ -0,0 +1,33 @@
[Unit]
Description=Discard unused blocks on /var/lib/ic/crypto filesystem

[Service]
Type=oneshot
ExecStart=/opt/ic/bin/fstrim_tool --target /var/lib/ic/crypto --metrics /run/node_exporter/collector_textfile/fstrim.prom
DeviceAllow=/dev/vda
IPAddressDeny=any
LockPersonality=yes
MemoryDenyWriteExecute=yes
NoNewPrivileges=yes
PrivateDevices=no
PrivateNetwork=yes
PrivateTmp=yes
PrivateUsers=no
ProtectClock=yes
ProtectControlGroups=yes
ProtectHome=yes
ProtectHostname=yes
ProtectKernelModules=yes
ProtectKernelTunables=yes
ProtectSystem=strict
ReadOnlyPaths=/var/lib/ic/crypto
ReadWritePaths=/run/node_exporter/collector_textfile
RestrictAddressFamilies=AF_UNIX
RestrictAddressFamilies=~AF_UNIX
RestrictNamespaces=yes
RestrictRealtime=yes
RestrictSUIDSGID=yes
SystemCallArchitectures=native
SystemCallErrorNumber=EPERM
SystemCallFilter=@system-service
UMask=022
11 changes: 11 additions & 0 deletions ic-os/guestos/rootfs/etc/systemd/system/fstrim_tool.timer
@@ -0,0 +1,11 @@
[Unit]
Description=Discard unused blocks once a week

[Timer]
OnCalendar=weekly
AccuracySec=1h
Persistent=true
RandomizedDelaySec=12h

[Install]
WantedBy=timers.target
@@ -0,0 +1,36 @@
[Unit]
Description=Initialize fstrim metrics
Before=node_exporter.service

[Service]
Type=oneshot
ExecStart=/opt/ic/bin/fstrim_tool --target /var/lib/ic/crypto --metrics /run/node_exporter/collector_textfile/fstrim.prom --initialize_metrics_only
DeviceAllow=/dev/vda
IPAddressDeny=any
LockPersonality=yes
MemoryDenyWriteExecute=yes
NoNewPrivileges=yes
PrivateDevices=no
PrivateNetwork=yes
PrivateTmp=yes
PrivateUsers=no
ProtectClock=yes
ProtectControlGroups=yes
ProtectHome=yes
ProtectHostname=yes
ProtectKernelModules=yes
ProtectKernelTunables=yes
ProtectSystem=strict
ReadWritePaths=/run/node_exporter/collector_textfile
RestrictAddressFamilies=AF_UNIX
RestrictAddressFamilies=~AF_UNIX
RestrictNamespaces=yes
RestrictRealtime=yes
RestrictSUIDSGID=yes
SystemCallArchitectures=native
SystemCallErrorNumber=EPERM
SystemCallFilter=@system-service
UMask=022

[Install]
WantedBy=multi-user.target
74 changes: 58 additions & 16 deletions rs/tests/src/crypto/ic_crypto_fstrim_tool_test.rs
Expand Up @@ -7,30 +7,39 @@ bazel integration tests run in `linux-sandbox`, and running `/sbin/fstrim` there
Runbook::
. Set up a subnet with a single node
. Wait for the node to start up correctly and be healthy
. Attempt to run the `fstrim_tool` utility with the `--target` flag set to the directory
`/var/lib/ic/crypto` and the `--metrics` flag set to the file `/run/node_exporter/collector_textfile/fstrim.prom`
. Verify that the `fstrim_tool` invocation succeeded, and that the metrics were updated accordingly
. Wait for the node to start up correctly and be healthy.
. Verify that the `fstrim.prom` file exists and contains the initial metrics.
. Attempt to run the systemd service `setup-fstrim-metrics` to initialize the metrics to be served
by the `node_exporter`.
. Verify that the `setup-fstrim-metrics` service invocation succeeded, and that the metrics are
still in the initialized state.
. Attempt to run the systemd service `fstrim_tool` to run `fstrim` and update the metrics.
. Verify that the `fstrim_tool` service invocation succeeded and that the metrics were updated
successfully.
. Perform another invocation of the `fstrim_tool` service and verify that the second update of the
metrics was also successful.
Success:: The `fstrim_tool` utility was successfully executed on the `/var/lib/ic/crypto` partition,
and the metrics were successfully written to a file from where the `node_exporter` can read them.
Coverage::
. The discard operation is supported
. The `fstrim` metrics are written`
. The `fstrim_tool` service can successfully execute `fstrim` and write the metrics to a file.
end::catalog[] */

use crate::driver::ic::InternetComputer;
use crate::driver::test_env::TestEnv;
use crate::driver::test_env_api::{
GetFirstHealthyNodeSnapshot, HasTopologySnapshot, IcNodeContainer, IcNodeSnapshot, SshSession,
retry, GetFirstHealthyNodeSnapshot, HasTopologySnapshot, IcNodeContainer, IcNodeSnapshot,
SshSession,
};
use ic_fstrim_tool::FsTrimMetrics;
use ic_registry_subnet_type::SubnetType;
use slog::{info, Logger};
use std::io::{BufRead, BufReader};
use std::time::Duration;

const FSTRIM_METRICS_FILE: &str = "/run/node_exporter/collector_textfile/fstrim.prom";

Expand All @@ -49,18 +58,23 @@ pub fn ic_crypto_fstrim_tool_test(env: TestEnv) {
let logger = env.logger();
let node = env.get_first_healthy_node_snapshot();

run_fstrim_tool(&node, &logger, " --initialize_metrics_only");
wait_for_initial_metrics_existence(&node, &logger);

let initial_metrics = retrieve_fstrim_metrics(&node, &logger);
info!(logger, "initial fstrim metrics: {:?}", initial_metrics);
assert_metrics_are_initialized(&initial_metrics);

run_fstrim_tool(&node, &logger, "");
initialize_fstrim_tool_metrics(&node, &logger);
let reinitialized_metrics = retrieve_fstrim_metrics(&node, &logger);
assert_metrics_are_initialized(&reinitialized_metrics);

run_fstrim_tool(&node, &logger);

let updated_metrics = retrieve_fstrim_metrics(&node, &logger);
info!(logger, "updated fstrim metrics: {:?}", updated_metrics);
assert_successful_run_and_metrics_valid_and_updated(&initial_metrics, &updated_metrics);
assert_successful_run_and_metrics_valid_and_updated(&reinitialized_metrics, &updated_metrics);

run_fstrim_tool(&node, &logger, "");
run_fstrim_tool(&node, &logger);

let twice_updated_metrics = retrieve_fstrim_metrics(&node, &logger);
info!(
Expand All @@ -70,6 +84,16 @@ pub fn ic_crypto_fstrim_tool_test(env: TestEnv) {
assert_successful_run_and_metrics_valid_and_updated(&updated_metrics, &twice_updated_metrics);
}

fn wait_for_initial_metrics_existence(node: &IcNodeSnapshot, logger: &Logger) {
retry(
logger.clone(),
Duration::from_secs(500),
Duration::from_secs(5),
|| node.block_on_bash_script(format!("[ -f {} ]", FSTRIM_METRICS_FILE).as_str()),
)
.unwrap_or_else(|e| panic!("Node didn't initialize fstrim metrics in time because {e:?}"));
}

fn retrieve_fstrim_metrics(node: &IcNodeSnapshot, logger: &Logger) -> FsTrimMetrics {
let cat_fstrim_metrics_cmd = format!("sudo cat {}", FSTRIM_METRICS_FILE);
info!(
Expand All @@ -85,23 +109,41 @@ fn retrieve_fstrim_metrics(node: &IcNodeSnapshot, logger: &Logger) -> FsTrimMetr
.expect("unable to parse fstrim metrics")
}

fn run_fstrim_tool(node: &IcNodeSnapshot, logger: &Logger, init_only_flag: &str) {
let run_fstrim_tool_cmd = format!(
"sudo /opt/ic/bin/fstrim_tool --target /var/lib/ic/crypto --metrics {}{}",
FSTRIM_METRICS_FILE, init_only_flag
fn initialize_fstrim_tool_metrics(node: &IcNodeSnapshot, logger: &Logger) {
const INITIALIZE_FSTRIM_TOOL_METRICS_CMD: &str =
"sudo systemctl start setup-fstrim-metrics.service";
info!(
logger,
"initializing fstrim_tool metrics using command: {}", INITIALIZE_FSTRIM_TOOL_METRICS_CMD
);
let fstrim_metrics_output = node
.block_on_bash_script(INITIALIZE_FSTRIM_TOOL_METRICS_CMD)
.expect("unable to initialize fstrim_tool metrics using SSH")
.trim()
.to_string();
assert_eq!(fstrim_metrics_output, "");
}

fn run_fstrim_tool(node: &IcNodeSnapshot, logger: &Logger) {
const RUN_FSTRIM_TOOL_CMD: &str = "sudo systemctl start fstrim_tool.service";
info!(
logger,
"running fstrim_tool using command: {}", run_fstrim_tool_cmd
"running fstrim_tool using command: {}", RUN_FSTRIM_TOOL_CMD
);
let fstrim_metrics_output = node
.block_on_bash_script(&run_fstrim_tool_cmd)
.block_on_bash_script(RUN_FSTRIM_TOOL_CMD)
.expect("unable to run fstrim_tool using SSH")
.trim()
.to_string();
assert_eq!(fstrim_metrics_output, "");
}

fn assert_metrics_are_initialized(metrics: &FsTrimMetrics) {
assert_eq!(metrics.total_runs, 0f64);
assert!(metrics.last_run_success);
assert_eq!(metrics.last_duration_milliseconds, 0f64);
}

fn assert_successful_run_and_metrics_valid_and_updated(
initial_metrics: &FsTrimMetrics,
updated_metrics: &FsTrimMetrics,
Expand Down

0 comments on commit 7a3c9dc

Please sign in to comment.