From 5413ab16d10e309129dd51cb46b6db5f5bb56e38 Mon Sep 17 00:00:00 2001 From: Manuel Amador Briz Date: Fri, 19 Jan 2024 13:19:33 +0000 Subject: [PATCH] feat(telemetry): enable proxying of certain replica metrics from GuestOS to HostOS. --- .gitlab/CODEOWNERS | 2 ++ ic-os/guestos/defs.bzl | 1 + ic-os/guestos/rootfs/Dockerfile | 6 ++++ ic-os/guestos/rootfs/etc/metrics-proxy.yaml | 28 +++++++++++++++++++ .../etc/systemd/system/metrics-proxy.service | 16 +++++++++++ .../rootfs/opt/ic/share/ic.json5.template | 5 ++-- ic-os/hostos/rootfs/etc/metrics-proxy.yaml | 17 ++++++++++- ic-os/hostos/rootfs/etc/nftables.conf | 9 +++++- 8 files changed, 80 insertions(+), 4 deletions(-) create mode 100644 ic-os/guestos/rootfs/etc/metrics-proxy.yaml create mode 100644 ic-os/guestos/rootfs/etc/systemd/system/metrics-proxy.service diff --git a/.gitlab/CODEOWNERS b/.gitlab/CODEOWNERS index 8b401d57167..d763e8b7fb4 100644 --- a/.gitlab/CODEOWNERS +++ b/.gitlab/CODEOWNERS @@ -62,6 +62,8 @@ go_deps.bzl @dfinity-lab/teams/idx # [metrics-proxy] /ic-os/hostos/rootfs/etc/metrics-proxy.yaml @dfinity-lab/teams/DRE /ic-os/hostos/rootfs/etc/systemd/system/metrics-proxy.service @dfinity-lab/teams/DRE +/ic-os/guestos/rootfs/etc/metrics-proxy.yaml @dfinity-lab/teams/DRE +/ic-os/guestos/rootfs/etc/systemd/system/metrics-proxy.service @dfinity-lab/teams/DRE # [nss_icos] /rs/ic_os/nss_icos/ @dfinity-lab/teams/DRE diff --git a/ic-os/guestos/defs.bzl b/ic-os/guestos/defs.bzl index e868c59de71..f2f7760230a 100644 --- a/ic-os/guestos/defs.bzl +++ b/ic-os/guestos/defs.bzl @@ -40,6 +40,7 @@ def image_deps(mode, malicious = False): "//publish/binaries:orchestrator": "/opt/ic/bin/orchestrator:0755", "//publish/binaries:ic-boundary": "/opt/ic/bin/ic-boundary:0755", ("//publish/malicious:replica" if malicious else "//publish/binaries:replica"): "/opt/ic/bin/replica:0755", # Install the malicious replica if set + "//publish/binaries:metrics-proxy": "/opt/ic/bin/metrics-proxy:0755", "//publish/binaries:sandbox_launcher": "/opt/ic/bin/sandbox_launcher:0755", "//publish/binaries:state-tool": "/opt/ic/bin/state-tool:0755", "//publish/binaries:vsock_guest": "/opt/ic/bin/vsock_guest:0755", diff --git a/ic-os/guestos/rootfs/Dockerfile b/ic-os/guestos/rootfs/Dockerfile index 9432354573c..e799a1d9811 100644 --- a/ic-os/guestos/rootfs/Dockerfile +++ b/ic-os/guestos/rootfs/Dockerfile @@ -201,6 +201,12 @@ RUN addgroup node_exporter && \ chmod 0644 /etc/default/node_exporter \ /etc/node_exporter/web.yml +# User which will run the metrics proxy service. +# Needs access to the node exporter SSL certificate private key, +# stored in /etc/node_exporter. +RUN adduser --system --disabled-password --home /var/lib/metrics-proxy --group --no-create-home metrics-proxy && \ + usermod -a -G node_exporter metrics-proxy + # Clear all files that may lead to indeterministic build. RUN apt-get clean && \ find /usr/local/share/fonts -name .uuid | xargs rm && \ diff --git a/ic-os/guestos/rootfs/etc/metrics-proxy.yaml b/ic-os/guestos/rootfs/etc/metrics-proxy.yaml new file mode 100644 index 00000000000..66db5844c0d --- /dev/null +++ b/ic-os/guestos/rootfs/etc/metrics-proxy.yaml @@ -0,0 +1,28 @@ +proxies: + # Allow proxying of certain metrics emitted by orchestrator. + - listen_on: + url: https://[::]:42372/metrics/guestos_replica + # We reuse the SSL cert of node exporter. + certificate_file: /etc/node_exporter/node_exporter.crt + key_file: /etc/node_exporter/node_exporter.key + connect_to: + # Replica listens HTTP on this port. + url: http://[::1]:9090/metrics + # In case of conflict, the last matching rule + # overrides any previous rules. + # Full documentation at: + # https://github.com/dfinity/metrics-proxy + label_filters: + - regex: .* + actions: + - drop + # Consensus metrics. + - regex: artifact_pool_consensus_height_stat + actions: + - keep +metrics: + # Telemetry of metrics-proxy itself (port open for GuestOS in ic.json5.template, see nftables.conf in HostOS for context). + url: https://[::]:19100/metrics + # We reuse the SSL cert of node exporter. + certificate_file: /etc/node_exporter/node_exporter.crt + key_file: /etc/node_exporter/node_exporter.key diff --git a/ic-os/guestos/rootfs/etc/systemd/system/metrics-proxy.service b/ic-os/guestos/rootfs/etc/systemd/system/metrics-proxy.service new file mode 100644 index 00000000000..55d6775375b --- /dev/null +++ b/ic-os/guestos/rootfs/etc/systemd/system/metrics-proxy.service @@ -0,0 +1,16 @@ +[Unit] +Description=Prometheus metrics proxy +After=node_exporter.service + +[Service] +User=metrics-proxy +ExecStart=/opt/ic/bin/metrics-proxy /etc/metrics-proxy.yaml +Restart=on-failure +RestartSec=10 +KillSignal=SIGINT +StartLimitBurst=5 +StartLimitInterval=60 +LimitNOFILE=65536 + +[Install] +WantedBy=multi-user.target diff --git a/ic-os/guestos/rootfs/opt/ic/share/ic.json5.template b/ic-os/guestos/rootfs/opt/ic/share/ic.json5.template index 774f4265868..90648aa40ba 100644 --- a/ic-os/guestos/rootfs/opt/ic/share/ic.json5.template +++ b/ic-os/guestos/rootfs/opt/ic/share/ic.json5.template @@ -230,6 +230,7 @@ table ip6 filter {\n\ icmpv6 type nd-router-advert accept\n\ icmpv6 type nd-neighbor-solicit accept\n\ icmpv6 type nd-neighbor-advert accept\n\ + ip6 saddr { hostos } ct state { new } tcp dport { 42372 } accept # Allow access from HostOS metrics-proxy so GuestOS metrics-proxy can proxy certain metrics to HostOS.\n\ <>\n\ <>\n\ }\n\ @@ -328,14 +329,14 @@ table ip6 filter {\n\ "2a0f:cd00:0002::/56", "fd00:2:1:1::/64", ], - ports: [22, 2497, 4100, 7070, 8080, 9090, 9091, 9100, 19531], + ports: [22, 2497, 4100, 7070, 8080, 9090, 9091, 9100, 19100, 19531], action: 1, comment: "Default rule from template", direction: 1, }], tcp_ports_for_node_whitelist: [2497, 4100, 8080], udp_ports_for_node_whitelist: [4100], - ports_for_http_adapter_blacklist: [22, 2497, 4100, 7070, 8080, 9090, 9091, 9100, 19531], + ports_for_http_adapter_blacklist: [22, 2497, 4100, 7070, 8080, 9090, 9091, 9100, 19100, 19531], max_simultaneous_connections_per_ip_address: 100, }, diff --git a/ic-os/hostos/rootfs/etc/metrics-proxy.yaml b/ic-os/hostos/rootfs/etc/metrics-proxy.yaml index 3019d34faf1..43b8ec9704c 100644 --- a/ic-os/hostos/rootfs/etc/metrics-proxy.yaml +++ b/ic-os/hostos/rootfs/etc/metrics-proxy.yaml @@ -72,8 +72,23 @@ proxies: # The expectation is that clients will scrape at a # maximum rate of 1 per 10 seconds (6 / min). cache_duration: 8s + - listen_on: + url: https://[::]:42372/metrics/guestos_replica + # We reuse the SSL cert of node exporter. + certificate_file: /etc/node_exporter/node_exporter.crt + key_file: /etc/node_exporter/node_exporter.key + connect_to: + url: https://guestos:42372/metrics/guestos_replica + tolerate_bad_tls: true + # The GuestOS metrics proxy already does filtering. + # See ic-os/guestos/rootfs/etc/metrics-proxy.yaml + # for the filters. + label_filters: [] + # The expectation is that clients will scrape at a + # maximum rate of 1 per 10 seconds (6 / min). + cache_duration: 8s metrics: - # Metrics clients only (see nftables.conf in HostOS). + # Telemetry of metrics-proxy itself (see nftables.conf in HostOS). url: https://[::]:19100/metrics # We reuse the SSL cert of node exporter. certificate_file: /etc/node_exporter/node_exporter.crt diff --git a/ic-os/hostos/rootfs/etc/nftables.conf b/ic-os/hostos/rootfs/etc/nftables.conf index 2243caeb935..30ef77d6cb4 100644 --- a/ic-os/hostos/rootfs/etc/nftables.conf +++ b/ic-os/hostos/rootfs/etc/nftables.conf @@ -190,7 +190,7 @@ table ip6 filter { icmpv6 type nd-router-advert accept icmpv6 type nd-neighbor-solicit accept icmpv6 type nd-neighbor-advert accept - ip6 saddr @dfinity_dcs ct state { new } tcp dport { 22, 9100, 19531 } accept + ip6 saddr @dfinity_dcs ct state { new } tcp dport { 22, 9100, 19531, 19100 } accept ip6 saddr @telemetry_clients ct state { new } tcp dport { 9100, 19531, 19100 } accept ip6 saddr @node_providers ct state { new } tcp dport { 22, 9100, 19531 } accept tcp dport { 42372 } goto metrics_proxy @@ -218,5 +218,12 @@ table ip6 filter { ip6 daddr { ::/0 } ct state { new } udp dport { 53 } accept ip6 daddr { ::/0 } ct state { new } udp dport { 123 } accept ip6 daddr { ::/0 } ct state { new } tcp dport { 80, 8080, 443 } accept + # We would ordinarily add a host name in the line following this comment, + # but nftables starts before the network is up, so there is no DNS name + # resolution available at the time this file is parsed by nftables.service. + # The counterpart in ic.json5.template (GuestOS) *does* have a host name, + # because the replica service which applies the firewall rules does start + # well after the network is up. + ip6 daddr { ::/0 } ct state { new } tcp dport { 42372 } accept comment "Permit outbound connections to metrics-proxy instances so local metrics-proxy can fetch data from GuestOS metrics-proxy." } }