Skip to content

Commit

Permalink
feat(telemetry): enable proxying of certain replica metrics from Gues…
Browse files Browse the repository at this point in the history
…tOS to HostOS.
  • Loading branch information
DFINITYManu committed Jan 19, 2024
1 parent e379220 commit 5413ab1
Show file tree
Hide file tree
Showing 8 changed files with 80 additions and 4 deletions.
2 changes: 2 additions & 0 deletions .gitlab/CODEOWNERS
Validating CODEOWNERS rules …
Expand Up @@ -62,6 +62,8 @@ go_deps.bzl @dfinity-lab/teams/idx
# [metrics-proxy]
/ic-os/hostos/rootfs/etc/metrics-proxy.yaml @dfinity-lab/teams/DRE
/ic-os/hostos/rootfs/etc/systemd/system/metrics-proxy.service @dfinity-lab/teams/DRE
/ic-os/guestos/rootfs/etc/metrics-proxy.yaml @dfinity-lab/teams/DRE
/ic-os/guestos/rootfs/etc/systemd/system/metrics-proxy.service @dfinity-lab/teams/DRE

# [nss_icos]
/rs/ic_os/nss_icos/ @dfinity-lab/teams/DRE
Expand Down
1 change: 1 addition & 0 deletions ic-os/guestos/defs.bzl
Expand Up @@ -40,6 +40,7 @@ def image_deps(mode, malicious = False):
"//publish/binaries:orchestrator": "/opt/ic/bin/orchestrator:0755",
"//publish/binaries:ic-boundary": "/opt/ic/bin/ic-boundary:0755",
("//publish/malicious:replica" if malicious else "//publish/binaries:replica"): "/opt/ic/bin/replica:0755", # Install the malicious replica if set
"//publish/binaries:metrics-proxy": "/opt/ic/bin/metrics-proxy:0755",
"//publish/binaries:sandbox_launcher": "/opt/ic/bin/sandbox_launcher:0755",
"//publish/binaries:state-tool": "/opt/ic/bin/state-tool:0755",
"//publish/binaries:vsock_guest": "/opt/ic/bin/vsock_guest:0755",
Expand Down
6 changes: 6 additions & 0 deletions ic-os/guestos/rootfs/Dockerfile
Expand Up @@ -201,6 +201,12 @@ RUN addgroup node_exporter && \
chmod 0644 /etc/default/node_exporter \
/etc/node_exporter/web.yml

# User which will run the metrics proxy service.
# Needs access to the node exporter SSL certificate private key,
# stored in /etc/node_exporter.
RUN adduser --system --disabled-password --home /var/lib/metrics-proxy --group --no-create-home metrics-proxy && \
usermod -a -G node_exporter metrics-proxy

# Clear all files that may lead to indeterministic build.
RUN apt-get clean && \
find /usr/local/share/fonts -name .uuid | xargs rm && \
Expand Down
28 changes: 28 additions & 0 deletions ic-os/guestos/rootfs/etc/metrics-proxy.yaml
@@ -0,0 +1,28 @@
proxies:
# Allow proxying of certain metrics emitted by orchestrator.
- listen_on:
url: https://[::]:42372/metrics/guestos_replica
# We reuse the SSL cert of node exporter.
certificate_file: /etc/node_exporter/node_exporter.crt
key_file: /etc/node_exporter/node_exporter.key
connect_to:
# Replica listens HTTP on this port.
url: http://[::1]:9090/metrics
# In case of conflict, the last matching rule
# overrides any previous rules.
# Full documentation at:
# https://github.com/dfinity/metrics-proxy
label_filters:
- regex: .*
actions:
- drop
# Consensus metrics.
- regex: artifact_pool_consensus_height_stat
actions:
- keep
metrics:
# Telemetry of metrics-proxy itself (port open for GuestOS in ic.json5.template, see nftables.conf in HostOS for context).
url: https://[::]:19100/metrics
# We reuse the SSL cert of node exporter.
certificate_file: /etc/node_exporter/node_exporter.crt
key_file: /etc/node_exporter/node_exporter.key
16 changes: 16 additions & 0 deletions ic-os/guestos/rootfs/etc/systemd/system/metrics-proxy.service
@@ -0,0 +1,16 @@
[Unit]
Description=Prometheus metrics proxy
After=node_exporter.service

[Service]
User=metrics-proxy
ExecStart=/opt/ic/bin/metrics-proxy /etc/metrics-proxy.yaml
Restart=on-failure
RestartSec=10
KillSignal=SIGINT
StartLimitBurst=5
StartLimitInterval=60
LimitNOFILE=65536

[Install]
WantedBy=multi-user.target
5 changes: 3 additions & 2 deletions ic-os/guestos/rootfs/opt/ic/share/ic.json5.template
Expand Up @@ -230,6 +230,7 @@ table ip6 filter {\n\
icmpv6 type nd-router-advert accept\n\
icmpv6 type nd-neighbor-solicit accept\n\
icmpv6 type nd-neighbor-advert accept\n\
ip6 saddr { hostos } ct state { new } tcp dport { 42372 } accept # Allow access from HostOS metrics-proxy so GuestOS metrics-proxy can proxy certain metrics to HostOS.\n\
<<IPv6_TCP_RULES>>\n\
<<IPv6_UDP_RULES>>\n\
}\n\
Expand Down Expand Up @@ -328,14 +329,14 @@ table ip6 filter {\n\
"2a0f:cd00:0002::/56",
"fd00:2:1:1::/64",
],
ports: [22, 2497, 4100, 7070, 8080, 9090, 9091, 9100, 19531],
ports: [22, 2497, 4100, 7070, 8080, 9090, 9091, 9100, 19100, 19531],
action: 1,
comment: "Default rule from template",
direction: 1,
}],
tcp_ports_for_node_whitelist: [2497, 4100, 8080],
udp_ports_for_node_whitelist: [4100],
ports_for_http_adapter_blacklist: [22, 2497, 4100, 7070, 8080, 9090, 9091, 9100, 19531],
ports_for_http_adapter_blacklist: [22, 2497, 4100, 7070, 8080, 9090, 9091, 9100, 19100, 19531],
max_simultaneous_connections_per_ip_address: 100,
},

Expand Down
17 changes: 16 additions & 1 deletion ic-os/hostos/rootfs/etc/metrics-proxy.yaml
Expand Up @@ -72,8 +72,23 @@ proxies:
# The expectation is that clients will scrape at a
# maximum rate of 1 per 10 seconds (6 / min).
cache_duration: 8s
- listen_on:
url: https://[::]:42372/metrics/guestos_replica
# We reuse the SSL cert of node exporter.
certificate_file: /etc/node_exporter/node_exporter.crt
key_file: /etc/node_exporter/node_exporter.key
connect_to:
url: https://guestos:42372/metrics/guestos_replica
tolerate_bad_tls: true
# The GuestOS metrics proxy already does filtering.
# See ic-os/guestos/rootfs/etc/metrics-proxy.yaml
# for the filters.
label_filters: []
# The expectation is that clients will scrape at a
# maximum rate of 1 per 10 seconds (6 / min).
cache_duration: 8s
metrics:
# Metrics clients only (see nftables.conf in HostOS).
# Telemetry of metrics-proxy itself (see nftables.conf in HostOS).
url: https://[::]:19100/metrics
# We reuse the SSL cert of node exporter.
certificate_file: /etc/node_exporter/node_exporter.crt
Expand Down
9 changes: 8 additions & 1 deletion ic-os/hostos/rootfs/etc/nftables.conf
Expand Up @@ -190,7 +190,7 @@ table ip6 filter {
icmpv6 type nd-router-advert accept
icmpv6 type nd-neighbor-solicit accept
icmpv6 type nd-neighbor-advert accept
ip6 saddr @dfinity_dcs ct state { new } tcp dport { 22, 9100, 19531 } accept
ip6 saddr @dfinity_dcs ct state { new } tcp dport { 22, 9100, 19531, 19100 } accept
ip6 saddr @telemetry_clients ct state { new } tcp dport { 9100, 19531, 19100 } accept
ip6 saddr @node_providers ct state { new } tcp dport { 22, 9100, 19531 } accept
tcp dport { 42372 } goto metrics_proxy
Expand Down Expand Up @@ -218,5 +218,12 @@ table ip6 filter {
ip6 daddr { ::/0 } ct state { new } udp dport { 53 } accept
ip6 daddr { ::/0 } ct state { new } udp dport { 123 } accept
ip6 daddr { ::/0 } ct state { new } tcp dport { 80, 8080, 443 } accept
# We would ordinarily add a host name in the line following this comment,
# but nftables starts before the network is up, so there is no DNS name
# resolution available at the time this file is parsed by nftables.service.
# The counterpart in ic.json5.template (GuestOS) *does* have a host name,
# because the replica service which applies the firewall rules does start
# well after the network is up.
ip6 daddr { ::/0 } ct state { new } tcp dport { 42372 } accept comment "Permit outbound connections to metrics-proxy instances so local metrics-proxy can fetch data from GuestOS metrics-proxy."
}
}

0 comments on commit 5413ab1

Please sign in to comment.