Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions MODULE.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ use_repo(
"com_github_golang_mock",
"com_github_google_go_cmp",
"com_github_googlecloudrobotics_ilog",
"com_github_jaypipes_ghw",
"com_github_jaypipes_pcidb",
"com_github_motemen_go_loghttp",
"com_github_onsi_gomega",
"com_github_pkg_errors",
Expand Down Expand Up @@ -128,4 +130,5 @@ use_repo(
"distroless_cc",
"distroless_cc_linux_amd64",
"iptables_base",
"iptables_base_linux_amd64",
)
5 changes: 4 additions & 1 deletion src/app_charts/prometheus/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,10 @@ app_chart(
files = [
":prometheus-operator-chart.robot",
],
images = {"http-relay-client": "//src/go/cmd/http-relay-client:http-relay-client-image"},
images = {
"http-relay-client": "//src/go/cmd/http-relay-client:http-relay-client-image",
"hw-exporter": "//src/go/cmd/hw-exporter:hw-exporter-image",
},
)

app(
Expand Down
81 changes: 81 additions & 0 deletions src/app_charts/prometheus/robot/hw-exporter.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: hw-exporter
spec:
selector:
matchLabels:
app: hw-exporter
template:
metadata:
labels:
app: hw-exporter
spec:
containers:
- name: hw-exporter
image: {{ .Values.registry }}{{ .Values.images.hw_exporter }}
args:
- --metrics-port=9100
- --chroot=/host
volumeMounts:
- mountPath: /host/proc
name: proc
readOnly: true
- mountPath: /host/sys
name: sys
readOnly: true
- mountPath: /host/usr/share
name: usr-share
readOnly: true
securityContext:
fsGroup: 65534
runAsGroup: 65534
runAsNonRoot: true
runAsUser: 65534
tolerations:
- operator: Exists
effect: NoSchedule
volumes:
- hostPath:
path: /proc
type: ""
name: proc
- hostPath:
path: /sys
type: ""
name: sys
# Mount pcidb from host, which could be in /usr/share/misc or /usr/share/hwdata.
- hostPath:
path: /usr/share
type: ""
name: usr-share
---
apiVersion: v1
kind: Service
metadata:
name: hw-exporter
labels:
app: hw-exporter
spec:
clusterIP: None
ports:
- port: 9100
name: http-metrics
selector:
app: hw-exporter
type: ClusterIP
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: hw-exporter
labels:
prometheus: kube-prometheus
spec:
endpoints:
- port: http-metrics
path: /metrics
interval: 60s
selector:
matchLabels:
app: hw-exporter
6 changes: 6 additions & 0 deletions src/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ require (
github.com/golang/glog v1.2.4
github.com/google/go-cmp v0.6.0
github.com/googlecloudrobotics/ilog v0.0.0-20240112131211-2efd642f756e
github.com/jaypipes/ghw v0.17.0
k8s.io/klog/v2 v2.110.1
)

Expand All @@ -75,6 +76,7 @@ require (
cloud.google.com/go/monitoring v1.16.3 // indirect
cloud.google.com/go/trace v1.10.4 // indirect
github.com/BurntSushi/toml v1.2.1 // indirect
github.com/StackExchange/wmi v1.2.1 // indirect
github.com/alessio/shellescape v1.4.1 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/census-instrumentation/opencensus-proto v0.4.1 // indirect
Expand All @@ -88,6 +90,7 @@ require (
github.com/go-logfmt/logfmt v0.6.0 // indirect
github.com/go-logr/logr v1.3.0 // indirect
github.com/go-logr/zapr v1.2.4 // indirect
github.com/go-ole/go-ole v1.2.6 // indirect
github.com/go-openapi/jsonpointer v0.20.0 // indirect
github.com/go-openapi/jsonreference v0.20.2 // indirect
github.com/go-openapi/swag v0.22.4 // indirect
Expand All @@ -106,13 +109,15 @@ require (
github.com/h2non/parth v0.0.0-20190131123155-b4df798d6542 // indirect
github.com/imdario/mergo v0.3.16 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/jaypipes/pcidb v1.0.1
github.com/jmespath/go-jmespath v0.4.0 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de // indirect
github.com/mailru/easyjson v0.7.7 // indirect
github.com/mattn/go-isatty v0.0.19 // indirect
github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0 // indirect
github.com/mitchellh/go-homedir v1.1.0 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00 // indirect
Expand Down Expand Up @@ -140,6 +145,7 @@ require (
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
howett.net/plist v1.0.0 // indirect
k8s.io/component-base v0.28.4 // indirect
k8s.io/kube-openapi v0.0.0-20231129212854-f0671cc7e66a // indirect
k8s.io/utils v0.0.0-20231127182322-b307cd553661 // indirect
Expand Down
15 changes: 15 additions & 0 deletions src/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ github.com/Masterminds/semver v1.5.0 h1:H65muMkzWKEuNDnfl9d70GUjFniHKHRbFPGBuZ3Q
github.com/Masterminds/semver v1.5.0/go.mod h1:MB6lktGJrhw8PrUyiEoblNEGEQ+RzHPF078ddwwvV3Y=
github.com/Masterminds/sprig v2.22.0+incompatible h1:z4yfnGrZ7netVz+0EDJ0Wi+5VZCSYp4Z0m2dk6cEM60=
github.com/Masterminds/sprig v2.22.0+incompatible/go.mod h1:y6hNFY5UBTIWBxnzTeuNhlNS5hqE0NB0E6fgfo2Br3o=
github.com/StackExchange/wmi v1.2.1 h1:VIkavFPXSjcnS+O8yTq7NI32k0R5Aj+v39y29VYDOSA=
github.com/StackExchange/wmi v1.2.1/go.mod h1:rcmrprowKIVzvc+NUiLncP2uuArMWLCbu9SBzvHz7e8=
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
Expand Down Expand Up @@ -142,6 +144,9 @@ github.com/go-logr/logr v1.3.0 h1:2y3SDp0ZXuc6/cjLSZ+Q3ir+QB9T/iG5yYRXqsagWSY=
github.com/go-logr/logr v1.3.0/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
github.com/go-logr/zapr v1.2.4 h1:QHVo+6stLbfJmYGkQ7uGHUCu5hnAFAj6mDe6Ea0SeOo=
github.com/go-logr/zapr v1.2.4/go.mod h1:FyHWQIzQORZ0QVE1BtVHv3cKtNLuXsbNLtpuhNapBOA=
github.com/go-ole/go-ole v1.2.5/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY=
github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs=
github.com/go-openapi/jsonpointer v0.20.0 h1:ESKJdU9ASRfaPNOPRx12IUyA1vn3R9GiE3KYD14BXdQ=
github.com/go-openapi/jsonpointer v0.20.0/go.mod h1:6PGzBjjIIumbLYysB73Klnms1mwnU4G3YHOECG3CedA=
Expand Down Expand Up @@ -264,6 +269,10 @@ github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+h
github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
github.com/jaypipes/ghw v0.17.0 h1:EVLJeNcy5z6GK/Lqby0EhBpynZo+ayl8iJWY0kbEUJA=
github.com/jaypipes/ghw v0.17.0/go.mod h1:In8SsaDqlb1oTyrbmTC14uy+fbBMvp+xdqX51MidlD8=
github.com/jaypipes/pcidb v1.0.1 h1:WB2zh27T3nwg8AE8ei81sNRb9yWBii3JGNJtT7K9Oic=
github.com/jaypipes/pcidb v1.0.1/go.mod h1:6xYUz/yYEyOkIkUt2t2J2folIuZ4Yg6uByCGFXMCeE4=
github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI=
github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg=
github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo=
Expand Down Expand Up @@ -306,6 +315,8 @@ github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0 h1:jWpvCLoY8Z/e3VKvls
github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0/go.mod h1:QUyp042oQthUoa9bqDv0ER0wrtXnBruoNd7aNjkbP+k=
github.com/mitchellh/copystructure v1.2.0 h1:vpKXTN4ewci03Vljg/q9QvCGUDttBOGBIa15WveJJGw=
github.com/mitchellh/copystructure v1.2.0/go.mod h1:qLl+cE2AmVv+CoeAwDPye/v+N2HKCj9FbZEVFJRxO9s=
github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
github.com/mitchellh/reflectwalk v1.0.2 h1:G2LzWKi524PWgd3mLHV8Y5k7s6XUvT0Gef6zxSIeXaQ=
github.com/mitchellh/reflectwalk v1.0.2/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
Expand Down Expand Up @@ -561,6 +572,7 @@ golang.org/x/sys v0.0.0-20190507160741-ecd444e8653b/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
Expand Down Expand Up @@ -784,6 +796,7 @@ gopkg.in/h2non/gock.v1 v1.1.2 h1:jBbHXgGBK/AoPVfJh5x4r/WxIrElvbLel8TCZkkZJoY=
gopkg.in/h2non/gock.v1 v1.1.2/go.mod h1:n7UGz/ckNChHiK05rDoiC4MYSunEC/lyaUm2WWaDva0=
gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc=
gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
gopkg.in/yaml.v1 v1.0.0-20140924161607-9f9df34309c0/go.mod h1:WDnlLJ4WF5VGsH/HVa3CI79GS0ol3YnhVnKP89i0kNg=
gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
Expand All @@ -802,6 +815,8 @@ honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWh
honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg=
honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k=
honnef.co/go/tools v0.0.1-2020.1.4/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k=
howett.net/plist v1.0.0 h1:7CrbWYbPPO/PyNy38b2EB/+gYbjCe2DXBxgtOOZbSQM=
howett.net/plist v1.0.0/go.mod h1:lqaXoTrLY4hg8tnEzNru53gicrbv7rrk+2xJA/7hw9g=
k8s.io/api v0.28.4 h1:8ZBrLjwosLl/NYgv1P7EQLqoO8MGQApnbgH8tu3BMzY=
k8s.io/api v0.28.4/go.mod h1:axWTGrY88s/5YE+JSt4uUi6NMM+gur1en2REMR7IRj0=
k8s.io/apiextensions-apiserver v0.28.4 h1:AZpKY/7wQ8n+ZYDtNHbAJBb+N4AXXJvyZx6ww6yAJvU=
Expand Down
52 changes: 52 additions & 0 deletions src/go/cmd/hw-exporter/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library", "go_test")
load("@rules_oci//oci:defs.bzl", "oci_image")
load("@rules_pkg//pkg:tar.bzl", "pkg_tar")

go_library(
name = "go_default_library",
srcs = ["main.go"],
importpath = "github.com/googlecloudrobotics/core/src/go/cmd/hw-exporter",
visibility = ["//visibility:private"],
deps = [
"@com_github_googlecloudrobotics_ilog//:go_default_library",
"@com_github_jaypipes_ghw//:go_default_library",
"@com_github_jaypipes_ghw//pkg/option:go_default_library",
"@com_github_jaypipes_ghw//pkg/util:go_default_library",
"@com_github_prometheus_client_golang//prometheus:go_default_library",
"@com_github_prometheus_client_golang//prometheus/promhttp:go_default_library",
],
)

go_binary(
name = "hw-exporter",
embed = [":go_default_library"],
visibility = ["//visibility:public"],
)

pkg_tar(
name = "hw-exporter-image-layer",
srcs = [":hw-exporter"],
extension = "tar.gz",
)

oci_image(
name = "hw-exporter-image",
base = "@distroless_base",
entrypoint = ["/hw-exporter"],
tars = [":hw-exporter-image-layer"],
visibility = ["//visibility:public"],
)

go_test(
name = "go_default_test",
srcs = ["main_test.go"],
embed = [":go_default_library"],
deps = [
"@com_github_jaypipes_ghw//:go_default_library",
"@com_github_jaypipes_ghw//pkg/option:go_default_library",
"@com_github_jaypipes_ghw//pkg/pci:go_default_library",
"@com_github_jaypipes_ghw//pkg/util:go_default_library",
"@com_github_jaypipes_pcidb//:go_default_library",
"@com_github_prometheus_client_golang//prometheus/testutil:go_default_library",
],
)
122 changes: 122 additions & 0 deletions src/go/cmd/hw-exporter/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
// hw-exporter exposes a Prometheus metric pci_device_count that indicates the
// number of each PCI device type (vendor/product/class/driver) installed on
// this node.
package main

import (
"context"
"flag"
"fmt"
"log/slog"
"net/http"
"os"
"os/signal"
"syscall"

"github.com/googlecloudrobotics/ilog"
"github.com/jaypipes/ghw"
"github.com/jaypipes/ghw/pkg/option"
"github.com/jaypipes/ghw/pkg/util"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
)

var (
metricsPort = flag.Int("metrics-port", 9999, "Port to expose Prometheus metrics on.")
logLevel = flag.Int("log-level", int(slog.LevelInfo), "the log message level required to be logged")
chroot = flag.String("chroot", "/", "Path to chroot into before collecting hardware info.")
)

type pciCollector struct {
pciDeviceCount *prometheus.Desc
}

func newPciCollector() *pciCollector {
return &pciCollector{
pciDeviceCount: prometheus.NewDesc(
"pci_device_count",
"Number of PCI devices by vendor, product, class, and driver.",
[]string{"vendor", "product", "class", "driver"},
nil,
),
}
}

// Describe implements the prometheus.Collector interface.
func (c *pciCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- c.pciDeviceCount
}

// getNameOrID returns the name of a PCI device component, or its ID if the name is unknown.
func getNameOrID(name, id string) string {
if name == util.UNKNOWN {
return "0x" + id
}
return name
}

// Collect implements the prometheus.Collector interface, counting the number of
// devices by vendor/product/class/driver.
func (c *pciCollector) Collect(ch chan<- prometheus.Metric) {
pciInfo, err := ghw.PCI(&option.Option{Chroot: chroot})
if err != nil {
slog.Error("Failed to get PCI info", ilog.Err(err))
return
}

deviceCounts := make(map[[4]string]int)
for _, device := range pciInfo.Devices {
vendor := getNameOrID(device.Vendor.Name, device.Vendor.ID)
product := getNameOrID(device.Product.Name, device.Product.ID)
class := getNameOrID(device.Class.Name, device.Class.ID)
labels := [4]string{vendor, product, class, device.Driver}
deviceCounts[labels]++
}

for labels, count := range deviceCounts {
ch <- prometheus.MustNewConstMetric(c.pciDeviceCount, prometheus.GaugeValue, float64(count), labels[0], labels[1], labels[2], labels[3])
}
Copy link
Copy Markdown
Contributor

@ensonic ensonic Jul 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why do you need the map? Are there duplicates?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This counts up if we have two devices of the same type (eg a card with multiple NICs).

}

func main() {
flag.Parse()
logHandler := ilog.NewLogHandler(slog.Level(*logLevel), os.Stderr)
slog.SetDefault(slog.New(logHandler))

// Run once on startup to test container setup, this is useful during development.
_, err := ghw.PCI(&option.Option{Chroot: chroot})
if err != nil {
slog.Error("Failed to get PCI info", ilog.Err(err))
os.Exit(1)
}

// Construct and run the metrics server until stopped by k8s (or Ctrl+C).
ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGTERM, syscall.SIGINT)
defer cancel()

registry := prometheus.NewRegistry()
registry.MustRegister(newPciCollector())

mux := http.NewServeMux()
mux.Handle("/metrics", promhttp.HandlerFor(registry, promhttp.HandlerOpts{}))

server := &http.Server{
Addr: fmt.Sprintf(":%d", *metricsPort),
Handler: mux,
}

go func() {
Comment thread
drigz marked this conversation as resolved.
slog.Info("Starting metrics server", slog.Int("port", *metricsPort))
if err := server.ListenAndServe(); err != http.ErrServerClosed {
slog.Error("Metrics server failed", ilog.Err(err))
os.Exit(1)
}
}()

// Call Shutdown() in the main goroutine because ListenAndServe() returns
// immediately but if the main goroutine ends then, the process will stop
// before finishing any ongoing requests.
<-ctx.Done()
slog.Info("Shutting down metrics server...")
server.Shutdown(context.Background())
}
Loading
Loading