From bf67722d4e7e02d44dd29c4436e9a8d2ef960fa5 Mon Sep 17 00:00:00 2001 From: Micah Nagel Date: Thu, 25 Apr 2024 20:07:51 -0600 Subject: [PATCH] feat: add `monitor` to operator, fix monitoring setup (#256) ## Description This adds a new portion to the `Package` spec, `monitor`. This functionality is briefly documented [here](https://github.com/defenseunicorns/uds-core/blob/operator-monitor-magic/docs/MONITOR.md). Also included is a mutation for all service monitors to handle them in the expected istio mTLS way. This PR also fixes some missing dashboards and enables all default dashboards from the upstream kube-prometheus-stack chart (this may be excessive?). Currently monitored: - Prometheus stack (operator, self, alertmanager) - Loki - kube-system things (kubelet, coredns, apiserver) - Promtail - Metrics-server - Velero - Keycloak - Grafana - All istio envoy proxies (podmonitor) Not added here: - NeuVector: Currently has limited config options with regards to auth, keeping this disabled in anticipation of SSO on NeuVector, with a desire to contribute upstream to enable our use case of SSO-only auth. In addition this PR switches single package testing to use/add Istio. This appears to add around 1 minute to each pipeline run, but: - allows us to test the istio VS endpoints in single package checks (some quirks with this in current state due to SSO redirects, but will allow us to do e2e testing in those pipelines in the future) - allows us to assume istio always and not build bespoke pepr code for the specific no-istio scenario (ex: prometheus can assume certs) This would still allow someone to run locally without istio for some packages / un-inject, mess around with mTLS, etc - which may be useful still to identify where Istio is causing problems. This just switches our CI posture so that we assume istio always. ## Related Issue Fixes https://github.com/defenseunicorns/uds-core/issues/17 ## Type of change - [ ] Bug fix (non-breaking change which fixes an issue) - [x] New feature (non-breaking change which adds functionality) - [ ] Other (security config, docs update, etc) ## Checklist before merging - [x] Test, docs, adr added or updated as needed - [x] [Contributor Guide Steps](https://github.com/defenseunicorns/uds-template-capability/blob/main/CONTRIBUTING.md)(https://github.com/defenseunicorns/uds-template-capability/blob/main/CONTRIBUTING.md#submitting-a-pull-request) followed --------- Co-authored-by: Tristan Holaday <40547442+TristanHoladay@users.noreply.github.com> Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com> --- .yamllint | 1 + docs/MONITOR.md | 50 ++ pepr.ts | 4 + src/grafana/chart/templates/uds-package.yaml | 7 + src/grafana/tasks.yaml | 22 +- src/grafana/values/values.yaml | 4 + src/keycloak/chart/templates/istio-admin.yaml | 13 +- src/keycloak/chart/templates/uds-package.yaml | 10 + src/keycloak/tasks.yaml | 28 +- .../chart/templates/service-monitor.yaml | 20 + src/metrics-server/values/values.yaml | 2 + .../chart/templates/uds-exemption.yaml | 7 - .../chart/templates/uds-package.yaml | 20 +- src/neuvector/tasks.yaml | 13 +- src/neuvector/values/monitor-values.yaml | 7 +- .../values/registry1-monitor-values.yaml | 7 +- .../monitoring/service-monitor.spec.ts | 40 + .../controllers/monitoring/service-monitor.ts | 96 +++ .../operator/controllers/network/policies.ts | 22 + .../crd/generated/package-v1alpha1.ts | 34 + .../generated/prometheus/servicemonitor-v1.ts | 809 ++++++++++++++++++ src/pepr/operator/crd/index.ts | 1 + .../operator/crd/sources/package/v1alpha1.ts | 60 ++ .../reconcilers/package-reconciler.ts | 20 +- src/pepr/prometheus/index.ts | 58 ++ .../chart/templates/istio-monitor.yaml | 44 + .../templates/prometheus-pod-monitor.yaml | 16 + .../chart/templates/uds-package.yaml | 5 + src/prometheus-stack/values/values.yaml | 55 +- src/promtail/chart/templates/service.yaml | 18 + src/promtail/chart/templates/uds-package.yaml | 7 + src/promtail/values/values.yaml | 23 - src/test/tasks.yaml | 4 +- src/velero/chart/templates/uds-package.yaml | 4 +- src/velero/values/values.yaml | 3 + tasks/create.yaml | 8 +- tasks/deploy.yaml | 13 +- tasks/setup.yaml | 4 +- 38 files changed, 1420 insertions(+), 139 deletions(-) create mode 100644 docs/MONITOR.md create mode 100644 src/metrics-server/chart/templates/service-monitor.yaml create mode 100644 src/pepr/operator/controllers/monitoring/service-monitor.spec.ts create mode 100644 src/pepr/operator/controllers/monitoring/service-monitor.ts create mode 100644 src/pepr/operator/crd/generated/prometheus/servicemonitor-v1.ts create mode 100644 src/pepr/prometheus/index.ts create mode 100644 src/prometheus-stack/chart/templates/istio-monitor.yaml create mode 100644 src/prometheus-stack/chart/templates/prometheus-pod-monitor.yaml create mode 100644 src/promtail/chart/templates/service.yaml diff --git a/.yamllint b/.yamllint index 70a6ac478..b3782d109 100644 --- a/.yamllint +++ b/.yamllint @@ -8,6 +8,7 @@ ignore: - '**/.terraform/**' - '**/chart/templates**' - 'node_modules/**' + - 'dist/**' rules: anchors: enable diff --git a/docs/MONITOR.md b/docs/MONITOR.md new file mode 100644 index 000000000..0dcf9396c --- /dev/null +++ b/docs/MONITOR.md @@ -0,0 +1,50 @@ +# Monitoring / Metrics Scraping in UDS Core + +UDS Core leverages Pepr to handle setup of Prometheus scraping metrics endpoints, with the particular configuration necessary to work in a STRICT mTLS (Istio) environment. We handle this with both mutations of existing service monitors and generation of service monitors via the `Package` CR. + +## Mutations + +All service monitors are mutated to set the scrape scheme to HTTPS and set the TLS Config to what is required for Istio mTLS scraping (see [this doc](https://istio.io/latest/docs/ops/integrations/prometheus/#tls-settings) for details). Beyond this, no other fields are mutated. Supporting existing service monitors is useful since some charts include service monitors by default with more advanced configurations, and it is in our best interest to enable those and use them where possible. + +Assumptions are made about STRICT mTLS here for simplicity, based on the `istio-injection` namespace label. Without making these assumptions we would need to query `PeerAuthentication` resources or another resource to determine the exact workload mTLS posture. + +Note: This mutation is the default behavior for all service monitors but can be skipped using the annotation key `uds/skip-sm-mutate` (with any value). Skipping this mutation should only be done if your service exposes metrics on a PERMISSIVE mTLS port. + +## Package CR `monitor` field + +UDS Core also supports generating service monitors from the `monitor` list in the `Package` spec. Charts do not always support service monitors, so generating them can be useful. This also provides a simplified way for other users to create service monitors, similar to the way we handle `VirtualServices` today. A full example of this can be seen below: + +```yaml +... +spec: + monitor: + - selector: # Selector for the service to monitor + app: foobar + portName: metrics # Name of the port to monitor + targetPort: 1234 # Corresponding target port on the pod/container (for network policy) + # Optional properties depending on your application + description: "Metrics" # Add to customize the service monitor name + podSelector: # Add if pod labels are different than `selector` (for network policy) + app: barfoo + path: "/mymetrics" # Add if metrics are exposed on a different path than "/metrics" +``` + +This config is used to generate service monitors and corresponding network policies to setup scraping for your applications. The `ServiceMonitor`s will go through the mutation process to add `tlsConfig` and `scheme` to work in an istio environment. + +This spec intentionally does not support all options available with a `ServiceMonitor`. While we may add additional fields in the future, we do not want to simply rebuild the `ServiceMonitor` spec since mutations are already available to handle Istio specifics. The current subset of spec options is based on the bare minimum necessary to craft resources. + +NOTE: While this is a rather verbose spec, each of the above fields are strictly required to craft the necessary service monitor and network policy resources. + +## Notes on Alternative Approaches + +In coming up with this feature a few alternative approaches were considered but not chosen due to issues with each one. The current spec provides the best balance of a simplified interface compared to the `ServiceMonitor` spec, and a faster/easier reconciliation loop. + +### Generation based on service lookup + +An alternative spec option would use the service name instead of selectors/port name. The service name could then be used to lookup the corresponding service and get the necessary selectors/port name (based on numerical port). There are however 2 issues with this route: +1. There is a timing issue if the `Package` CR is applied to the cluster before the app chart itself (which is the norm with our UDS Packages). The service would not exist at the time the `Package` is reconciled. We could lean into eventual consistency here, if we implemented a retry mechanism for the `Package`, which would mitigate this issue. +2. We would need an "alert" mechanism (watch) to notify us when the service(s) are updated, to roll the corresponding updates to network policies and service monitors. While this is doable it feels like unnecessary complexity compared to other options. + +### Generation of service + monitor + +Another alternative approach would be to use a pod selector and port only. We would then generate both a service and servicemonitor, giving us full control of the port names and selectors. This seems like a viable path, but does add an extra resource for us to generate and manage. There could be unknown side effects of generating services that could clash with other services (particularly with istio endpoints). This would otherwise be a relative straightforward approach and is worth evaluating again if we want to simplify the spec later on. diff --git a/pepr.ts b/pepr.ts index 2d740d573..3e9e3ca31 100644 --- a/pepr.ts +++ b/pepr.ts @@ -5,6 +5,7 @@ import cfg from "./package.json"; import { istio } from "./src/pepr/istio"; import { operator } from "./src/pepr/operator"; import { policies } from "./src/pepr/policies"; +import { prometheus } from "./src/pepr/prometheus"; new PeprModule(cfg, [ // UDS Core Operator @@ -15,4 +16,7 @@ new PeprModule(cfg, [ // Istio service mesh istio, + + // Prometheus monitoring stack + prometheus, ]); diff --git a/src/grafana/chart/templates/uds-package.yaml b/src/grafana/chart/templates/uds-package.yaml index ecefc50e4..713a103a0 100644 --- a/src/grafana/chart/templates/uds-package.yaml +++ b/src/grafana/chart/templates/uds-package.yaml @@ -10,6 +10,13 @@ spec: redirectUris: - "https://grafana.admin.{{ .Values.domain }}/login/generic_oauth" + monitor: + - selector: + app.kubernetes.io/name: grafana + targetPort: 3000 + portName: service + description: Metrics + network: expose: - service: grafana diff --git a/src/grafana/tasks.yaml b/src/grafana/tasks.yaml index c657198f0..5547b227f 100644 --- a/src/grafana/tasks.yaml +++ b/src/grafana/tasks.yaml @@ -8,10 +8,18 @@ tasks: name: "app.kubernetes.io/instance=grafana" namespace: grafana condition: Ready - # todo: Fix single package validation checks in CI where Istio isn't installed - # - description: Validate grafana interface - # wait: - # network: - # protocol: https - # address: grafana.admin.uds.dev - # code: 200 + - description: Validate grafana virtual service + cmd: | + if [ "$(curl -isS https://grafana.admin.uds.dev --output /dev/null -w '%{http_code}')" = "302" ]; then + echo "Grafana is up and running." + else + echo "ERROR: Grafana returned a $(curl -isS https://grafana.admin.uds.dev --output /dev/null -w '%{http_code}') code." + exit 1 + fi + + if curl -L -isS https://grafana.admin.uds.dev --output /dev/null -w '%{url_effective}' | grep "sso.uds.dev" 2>&1 1>/dev/null; then + echo "Grafana is redirecting to SSO as expected." + else + echo "ERROR: Grafana is redirecting to $(curl -L -isS https://grafana.admin.uds.dev --output /dev/null -w '%{url_effective}')." + exit 1 + fi diff --git a/src/grafana/values/values.yaml b/src/grafana/values/values.yaml index 86cf36d70..0951b1b15 100644 --- a/src/grafana/values/values.yaml +++ b/src/grafana/values/values.yaml @@ -2,6 +2,7 @@ sidecar: dashboards: enabled: true label: grafana_dashboard + searchNamespace: ALL datasources: enabled: true label: grafana_datasource @@ -37,3 +38,6 @@ grafana.ini: role_attribute_strict: true # Automatically redirect to the SSO login page auto_login: true + +service: + appProtocol: "http" diff --git a/src/keycloak/chart/templates/istio-admin.yaml b/src/keycloak/chart/templates/istio-admin.yaml index d878fade0..8055ac0cb 100644 --- a/src/keycloak/chart/templates/istio-admin.yaml +++ b/src/keycloak/chart/templates/istio-admin.yaml @@ -15,10 +15,19 @@ spec: paths: - "/admin*" - "/realms/master*" - - "/metrics*" from: - source: - notNamespaces: ["istio-admin-gateway"] + notNamespaces: + - istio-admin-gateway + - to: + - operation: + paths: + - /metrics* + from: + - source: + notNamespaces: + - istio-admin-gateway + - monitoring - to: - operation: paths: diff --git a/src/keycloak/chart/templates/uds-package.yaml b/src/keycloak/chart/templates/uds-package.yaml index 4c1697b47..5474d885f 100644 --- a/src/keycloak/chart/templates/uds-package.yaml +++ b/src/keycloak/chart/templates/uds-package.yaml @@ -4,6 +4,16 @@ metadata: name: keycloak namespace: {{ .Release.Namespace }} spec: + monitor: + - selector: + app.kubernetes.io/name: keycloak + app.kubernetes.io/component: http + podSelector: + app.kubernetes.io/name: keycloak + targetPort: 8080 + portName: http + description: Metrics + network: allow: - description: "UDS Operator" diff --git a/src/keycloak/tasks.yaml b/src/keycloak/tasks.yaml index a95fd8353..11a752d3b 100644 --- a/src/keycloak/tasks.yaml +++ b/src/keycloak/tasks.yaml @@ -2,24 +2,20 @@ includes: - config: https://raw.githubusercontent.com/defenseunicorns/uds-identity-config/v0.3.6/tasks.yaml tasks: - # These tests break single capability test checks - name: validate actions: - - description: replace me - cmd: echo "hello" - # actions: - # - description: Validate admin interface - # wait: - # network: - # protocol: https - # address: keycloak.admin.uds.dev - # code: 200 - # - description: Validate public interface - # wait: - # network: - # protocol: https - # address: sso.uds.dev - # code: 200 + - description: Validate admin interface + wait: + network: + protocol: https + address: keycloak.admin.uds.dev + code: 200 + - description: Validate public interface + wait: + network: + protocol: https + address: sso.uds.dev + code: 200 - name: dev-theme actions: diff --git a/src/metrics-server/chart/templates/service-monitor.yaml b/src/metrics-server/chart/templates/service-monitor.yaml new file mode 100644 index 000000000..d7c603693 --- /dev/null +++ b/src/metrics-server/chart/templates/service-monitor.yaml @@ -0,0 +1,20 @@ +{{- if .Capabilities.APIVersions.Has "monitoring.coreos.com/v1" }} +# The serviceMonitor for metrics-server is unique due to permissive mTLS on its port, so it is created outside of the Package spec +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + annotation: + uds/skip-sm-mutate: "true" + name: metrics-server-metrics + namespace: metrics-server +spec: + endpoints: + - path: /metrics + port: https + scheme: https + tlsConfig: + insecureSkipVerify: true + selector: + matchLabels: + app.kubernetes.io/name: metrics-server +{{- end }} diff --git a/src/metrics-server/values/values.yaml b/src/metrics-server/values/values.yaml index cd6d08ab5..28c8b42d5 100644 --- a/src/metrics-server/values/values.yaml +++ b/src/metrics-server/values/values.yaml @@ -6,3 +6,5 @@ readinessProbe: initialDelaySeconds: 10 periodSeconds: 5 failureThreshold: 5 +metrics: + enabled: true diff --git a/src/neuvector/chart/templates/uds-exemption.yaml b/src/neuvector/chart/templates/uds-exemption.yaml index 803a97927..09e61c13e 100644 --- a/src/neuvector/chart/templates/uds-exemption.yaml +++ b/src/neuvector/chart/templates/uds-exemption.yaml @@ -41,10 +41,3 @@ spec: `/proc`: monitoring of processes for malicious activity `/sys/fs/cgroup`: important files the controller wants to monitor for malicious content https://github.com/neuvector/neuvector-helm/blob/master/charts/core/templates/enforcer-daemonset.yaml#L108" - - - policies: - - DropAllCapabilities - matcher: - namespace: neuvector - name: "^neuvector-prometheus-exporter-pod.*" - title: "neuvector-prometheus-exporter-pod" diff --git a/src/neuvector/chart/templates/uds-package.yaml b/src/neuvector/chart/templates/uds-package.yaml index 9e99bdbf3..f9c4bd08e 100644 --- a/src/neuvector/chart/templates/uds-package.yaml +++ b/src/neuvector/chart/templates/uds-package.yaml @@ -4,6 +4,16 @@ metadata: name: neuvector namespace: {{ .Release.Namespace }} spec: + # This is disabled pending further discussion/upstream changes to handle metrics with SSO setup + # monitor: + # - selector: + # app: neuvector-prometheus-exporter + # podSelector: + # app: neuvector-prometheus-exporter-pod + # portName: metrics + # targetPort: 8068 + # description: "Metrics" + sso: - name: Neuvector clientId: uds-core-admin-neuvector @@ -23,6 +33,7 @@ spec: global_role: admin - group: /UDS Core/Auditor global_role: reader + network: expose: - service: neuvector-service-webui @@ -69,15 +80,6 @@ spec: port: 30443 description: "Webhook" - - direction: Ingress - remoteNamespace: monitoring - remoteSelector: - app: prometheus - selector: - app: neuvector-prometheus-exporter-pod - port: 8068 - description: "Prometheus Metrics" - - direction: Egress remoteNamespace: tempo remoteSelector: diff --git a/src/neuvector/tasks.yaml b/src/neuvector/tasks.yaml index 910a1b5e7..a9cd5a7dd 100644 --- a/src/neuvector/tasks.yaml +++ b/src/neuvector/tasks.yaml @@ -29,10 +29,9 @@ tasks: name: app=neuvector-scanner-pod condition: Ready namespace: neuvector - # todo: Fix single package validation checks in CI where Istio isn't installed - # - description: Validate Neuvector Interface - # wait: - # network: - # protocol: https - # address: neuvector.admin.uds.dev - # code: 200 + - description: Validate Neuvector Interface + wait: + network: + protocol: https + address: neuvector.admin.uds.dev + code: 200 diff --git a/src/neuvector/values/monitor-values.yaml b/src/neuvector/values/monitor-values.yaml index e41f7352f..9dd149d0b 100644 --- a/src/neuvector/values/monitor-values.yaml +++ b/src/neuvector/values/monitor-values.yaml @@ -1,12 +1,13 @@ leastPrivilege: false exporter: - # Temporarily disabled until we can get the monitor chart working - # via https://github.com/neuvector/neuvector-helm/pull/355 + # This is disabled pending further discussion/upstream changes to handle metrics with SSO setup enabled: false + # Disable serviceMonitor to handle standalone testing, handled by UDS Package serviceMonitor: enabled: false apiSvc: neuvector-svc-controller-api:10443 svc: - enabled: true + # Temporary disabling of service to allow adding appProtocol via custom service in the config chart + enabled: false type: ClusterIP diff --git a/src/neuvector/values/registry1-monitor-values.yaml b/src/neuvector/values/registry1-monitor-values.yaml index 164799db6..f3f01b662 100644 --- a/src/neuvector/values/registry1-monitor-values.yaml +++ b/src/neuvector/values/registry1-monitor-values.yaml @@ -5,8 +5,5 @@ exporter: tag: 5.3.2 containerSecurityContext: - runAsUser: 1002 - runAsGroup: 1002 - capabilities: - drop: - - ALL + runAsUser: 1001 + runAsGroup: 1001 diff --git a/src/pepr/operator/controllers/monitoring/service-monitor.spec.ts b/src/pepr/operator/controllers/monitoring/service-monitor.spec.ts new file mode 100644 index 000000000..e4cb5cfc7 --- /dev/null +++ b/src/pepr/operator/controllers/monitoring/service-monitor.spec.ts @@ -0,0 +1,40 @@ +import { describe, expect, it } from "@jest/globals"; +import { generateServiceMonitor } from "./service-monitor"; + +describe("test generate service monitor", () => { + it("should return a valid Service Monitor object", () => { + const pkg = { + apiVersion: "uds.dev/v1alpha1", + kind: "Package", + metadata: { + name: "test", + uid: "f50120aa-2713-4502-9496-566b102b1174", + }, + }; + const portName = "http-metrics"; + const metricsPath = "/test"; + const selectorApp = "test"; + const monitor = { + portName: portName, + path: metricsPath, + targetPort: 1234, + selector: { + app: selectorApp, + }, + }; + const namespace = "test"; + const pkgName = "test"; + const generation = "1"; + const payload = generateServiceMonitor(pkg, monitor, namespace, pkgName, generation); + + expect(payload).toBeDefined(); + expect(payload.metadata?.name).toEqual(`${pkgName}-${selectorApp}-${portName}`); + expect(payload.metadata?.namespace).toEqual(namespace); + expect(payload.spec?.endpoints).toBeDefined(); + if (payload.spec?.endpoints) { + expect(payload.spec.endpoints[0].port).toEqual(portName); + expect(payload.spec.endpoints[0].path).toEqual(metricsPath); + } + expect(payload.spec?.selector.matchLabels).toHaveProperty("app", "test"); + }); +}); diff --git a/src/pepr/operator/controllers/monitoring/service-monitor.ts b/src/pepr/operator/controllers/monitoring/service-monitor.ts new file mode 100644 index 000000000..cd907a542 --- /dev/null +++ b/src/pepr/operator/controllers/monitoring/service-monitor.ts @@ -0,0 +1,96 @@ +import { K8s, Log } from "pepr"; + +import { Prometheus, UDSPackage } from "../../crd"; +import { Monitor } from "../../crd/generated/package-v1alpha1"; +import { getOwnerRef, sanitizeResourceName } from "../utils"; + +/** + * Generate a service monitor for a service + * + * @param pkg UDS Package + * @param namespace + */ +export async function serviceMonitor(pkg: UDSPackage, namespace: string) { + const pkgName = pkg.metadata!.name!; + const generation = (pkg.metadata?.generation ?? 0).toString(); + + // Get the list of monitored services + const monitorList = pkg.spec?.monitor ?? []; + + // Create a list of generated ServiceMonitors + const payloads: Prometheus.ServiceMonitor[] = []; + + for (const monitor of monitorList) { + const payload = generateServiceMonitor(pkg, monitor, namespace, pkgName, generation); + + // Apply the VirtualService and force overwrite any existing policy + await K8s(Prometheus.ServiceMonitor).Apply(payload, { force: true }); + + payloads.push(payload); + } + + // Get all related ServiceMonitors in the namespace + const serviceMonitors = await K8s(Prometheus.ServiceMonitor) + .InNamespace(namespace) + .WithLabel("uds/package", pkgName) + .Get(); + + // Find any orphaned VirtualServices (not matching the current generation) + const orphanedSM = serviceMonitors.items.filter( + sm => sm.metadata?.labels?.["uds/generation"] !== generation, + ); + + // Delete any orphaned VirtualServices + for (const sm of orphanedSM) { + Log.debug(sm, `Deleting orphaned ServiceMonitor ${sm.metadata!.name}`); + await K8s(Prometheus.ServiceMonitor).Delete(sm); + } + + // Return the list of monitor names + return [...payloads.map(sm => sm.metadata!.name!)]; +} + +export function generateSMName(pkg: UDSPackage, monitor: Monitor) { + const { selector, portName, description } = monitor; + + // Ensure the resource name is valid + const nameSuffix = description || `${Object.values(selector)}-${portName}`; + const name = sanitizeResourceName(`${pkg.metadata!.name}-${nameSuffix}`); + + return name; +} + +export function generateServiceMonitor( + pkg: UDSPackage, + monitor: Monitor, + namespace: string, + pkgName: string, + generation: string, +) { + const { selector, portName } = monitor; + const name = generateSMName(pkg, monitor); + const payload: Prometheus.ServiceMonitor = { + metadata: { + name, + namespace, + labels: { + "uds/package": pkgName, + "uds/generation": generation, + }, + ownerReferences: getOwnerRef(pkg), + }, + spec: { + endpoints: [ + { + port: portName, + path: monitor.path || "/metrics", + }, + ], + selector: { + matchLabels: selector, + }, + }, + }; + + return payload; +} diff --git a/src/pepr/operator/controllers/network/policies.ts b/src/pepr/operator/controllers/network/policies.ts index b88c79afc..df2dbbedc 100644 --- a/src/pepr/operator/controllers/network/policies.ts +++ b/src/pepr/operator/controllers/network/policies.ts @@ -59,6 +59,28 @@ export async function networkPolicies(pkg: UDSPackage, namespace: string) { policies.push(generatedPolicy); } + // Generate NetworkPolicies for any ServiceMonitors that are generated + const monitorList = pkg.spec?.monitor ?? []; + // Iterate over each ServiceMonitor + for (const monitor of monitorList) { + const { selector, targetPort, podSelector } = monitor; + + // Create the NetworkPolicy for the ServiceMonitor + const policy: Allow = { + direction: Direction.Ingress, + selector: podSelector ?? selector, + remoteNamespace: "monitoring", + remoteSelector: { + app: "prometheus", + }, + port: targetPort, + description: `${Object.values(selector)} Metrics`, + }; + // Generate the policy + const generatedPolicy = generate(namespace, policy); + policies.push(generatedPolicy); + } + // Iterate over each policy and apply it for (const [idx, policy] of policies.entries()) { // Add the package name and generation to the labels diff --git a/src/pepr/operator/crd/generated/package-v1alpha1.ts b/src/pepr/operator/crd/generated/package-v1alpha1.ts index 5546a1132..d477803d2 100644 --- a/src/pepr/operator/crd/generated/package-v1alpha1.ts +++ b/src/pepr/operator/crd/generated/package-v1alpha1.ts @@ -8,6 +8,10 @@ export class Package extends GenericKind { } export interface Spec { + /** + * Create Service Monitor configurations + */ + monitor?: Monitor[]; /** * Network configuration for the package */ @@ -18,6 +22,35 @@ export interface Spec { sso?: Sso[]; } +export interface Monitor { + /** + * A description of this monitor entry, this will become part of the ServiceMonitor name + */ + description?: string; + /** + * HTTP path from which to scrape for metrics, defaults to `/metrics` + */ + path?: string; + /** + * Labels to match pods in the namespace to apply the policy to. Leave empty to apply to all + * pods in the namespace + */ + podSelector?: { [key: string]: string }; + /** + * The port name for the serviceMonitor + */ + portName: string; + /** + * Labels to match pods in the namespace to apply the policy to. Leave empty to apply to all + * pods in the namespace + */ + selector: { [key: string]: string }; + /** + * The service targetPort. This is required so the NetworkPolicy can be generated correctly. + */ + targetPort: number; +} + /** * Network configuration for the package */ @@ -503,6 +536,7 @@ export enum Protocol { export interface Status { endpoints?: string[]; + monitors?: string[]; networkPolicyCount?: number; observedGeneration?: number; phase?: Phase; diff --git a/src/pepr/operator/crd/generated/prometheus/servicemonitor-v1.ts b/src/pepr/operator/crd/generated/prometheus/servicemonitor-v1.ts new file mode 100644 index 000000000..4d776ccd4 --- /dev/null +++ b/src/pepr/operator/crd/generated/prometheus/servicemonitor-v1.ts @@ -0,0 +1,809 @@ +// This file is auto-generated by kubernetes-fluent-client, do not edit manually + +import { GenericKind, RegisterKind } from "kubernetes-fluent-client"; + +/** + * ServiceMonitor defines monitoring for a set of services. + */ +export class ServiceMonitor extends GenericKind { + /** + * Specification of desired Service selection for target discovery by Prometheus. + */ + spec?: Spec; +} + +/** + * Specification of desired Service selection for target discovery by Prometheus. + */ +export interface Spec { + /** + * `attachMetadata` defines additional metadata which is added to the discovered targets. + * It requires Prometheus >= v2.37.0. + */ + attachMetadata?: AttachMetadata; + /** + * List of endpoints part of this ServiceMonitor. + */ + endpoints?: Endpoint[]; + /** + * `jobLabel` selects the label from the associated Kubernetes `Service` object which will + * be used as the `job` label for all metrics. + * For example if `jobLabel` is set to `foo` and the Kubernetes `Service` object is labeled + * with `foo: bar`, then Prometheus adds the `job="bar"` label to all ingested metrics. + * If the value of this field is empty or if the label doesn't exist for the given Service, + * the `job` label of the metrics defaults to the name of the associated Kubernetes + * `Service`. + */ + jobLabel?: string; + /** + * Per-scrape limit on the number of targets dropped by relabeling that will be kept in + * memory. 0 means no limit. + * It requires Prometheus >= v2.47.0. + */ + keepDroppedTargets?: number; + /** + * Per-scrape limit on number of labels that will be accepted for a sample. + * It requires Prometheus >= v2.27.0. + */ + labelLimit?: number; + /** + * Per-scrape limit on length of labels name that will be accepted for a sample. + * It requires Prometheus >= v2.27.0. + */ + labelNameLengthLimit?: number; + /** + * Per-scrape limit on length of labels value that will be accepted for a sample. + * It requires Prometheus >= v2.27.0. + */ + labelValueLengthLimit?: number; + /** + * Selector to select which namespaces the Kubernetes `Endpoints` objects are discovered + * from. + */ + namespaceSelector?: NamespaceSelector; + /** + * `podTargetLabels` defines the labels which are transferred from the associated Kubernetes + * `Pod` object onto the ingested metrics. + */ + podTargetLabels?: string[]; + /** + * `sampleLimit` defines a per-scrape limit on the number of scraped samples that will be + * accepted. + */ + sampleLimit?: number; + /** + * Label selector to select the Kubernetes `Endpoints` objects. + */ + selector: Selector; + /** + * `targetLabels` defines the labels which are transferred from the associated Kubernetes + * `Service` object onto the ingested metrics. + */ + targetLabels?: string[]; + /** + * `targetLimit` defines a limit on the number of scraped targets that will be accepted. + */ + targetLimit?: number; +} + +/** + * `attachMetadata` defines additional metadata which is added to the discovered targets. + * It requires Prometheus >= v2.37.0. + */ +export interface AttachMetadata { + /** + * When set to true, Prometheus must have the `get` permission on the `Nodes` objects. + */ + node?: boolean; +} + +/** + * Endpoint defines an endpoint serving Prometheus metrics to be scraped by Prometheus. + */ +export interface Endpoint { + /** + * `authorization` configures the Authorization header credentials to use when scraping the + * target. + * Cannot be set at the same time as `basicAuth`, or `oauth2`. + */ + authorization?: Authorization; + /** + * `basicAuth` configures the Basic Authentication credentials to use when scraping the + * target. + * Cannot be set at the same time as `authorization`, or `oauth2`. + */ + basicAuth?: BasicAuth; + /** + * File to read bearer token for scraping the target. + * Deprecated: use `authorization` instead. + */ + bearerTokenFile?: string; + /** + * `bearerTokenSecret` specifies a key of a Secret containing the bearer token for scraping + * targets. The secret needs to be in the same namespace as the ServiceMonitor object and + * readable by the Prometheus Operator. + * Deprecated: use `authorization` instead. + */ + bearerTokenSecret?: BearerTokenSecret; + /** + * `enableHttp2` can be used to disable HTTP2 when scraping the target. + */ + enableHttp2?: boolean; + /** + * When true, the pods which are not running (e.g. either in Failed or Succeeded state) are + * dropped during the target discovery. + * If unset, the filtering is enabled. + * More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#pod-phase + */ + filterRunning?: boolean; + /** + * `followRedirects` defines whether the scrape requests should follow HTTP 3xx redirects. + */ + followRedirects?: boolean; + /** + * When true, `honorLabels` preserves the metric's labels when they collide with the + * target's labels. + */ + honorLabels?: boolean; + /** + * `honorTimestamps` controls whether Prometheus preserves the timestamps when exposed by + * the target. + */ + honorTimestamps?: boolean; + /** + * Interval at which Prometheus scrapes the metrics from the target. + * If empty, Prometheus uses the global scrape interval. + */ + interval?: string; + /** + * `metricRelabelings` configures the relabeling rules to apply to the samples before + * ingestion. + */ + metricRelabelings?: MetricRelabeling[]; + /** + * `oauth2` configures the OAuth2 settings to use when scraping the target. + * It requires Prometheus >= 2.27.0. + * Cannot be set at the same time as `authorization`, or `basicAuth`. + */ + oauth2?: Oauth2; + /** + * params define optional HTTP URL parameters. + */ + params?: { [key: string]: string[] }; + /** + * HTTP path from which to scrape for metrics. + * If empty, Prometheus uses the default value (e.g. `/metrics`). + */ + path?: string; + /** + * Name of the Service port which this endpoint refers to. + * It takes precedence over `targetPort`. + */ + port?: string; + /** + * `proxyURL` configures the HTTP Proxy URL (e.g. "http://proxyserver:2195") to go through + * when scraping the target. + */ + proxyUrl?: string; + /** + * `relabelings` configures the relabeling rules to apply the target's metadata labels. + * The Operator automatically adds relabelings for a few standard Kubernetes fields. + * The original scrape job's name is available via the `__tmp_prometheus_job_name` label. + * More info: + * https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config + */ + relabelings?: Relabeling[]; + /** + * HTTP scheme to use for scraping. + * `http` and `https` are the expected values unless you rewrite the `__scheme__` label via + * relabeling. + * If empty, Prometheus uses the default value `http`. + */ + scheme?: Scheme; + /** + * Timeout after which Prometheus considers the scrape to be failed. + * If empty, Prometheus uses the global scrape timeout unless it is less than the target's + * scrape interval value in which the latter is used. + */ + scrapeTimeout?: string; + /** + * Name or number of the target port of the `Pod` object behind the Service, the port must + * be specified with container port property. + * Deprecated: use `port` instead. + */ + targetPort?: number | string; + /** + * TLS configuration to use when scraping the target. + */ + tlsConfig?: TLSConfig; + /** + * `trackTimestampsStaleness` defines whether Prometheus tracks staleness of the metrics + * that have an explicit timestamp present in scraped data. Has no effect if + * `honorTimestamps` is false. + * It requires Prometheus >= v2.48.0. + */ + trackTimestampsStaleness?: boolean; +} + +/** + * `authorization` configures the Authorization header credentials to use when scraping the + * target. + * Cannot be set at the same time as `basicAuth`, or `oauth2`. + */ +export interface Authorization { + /** + * Selects a key of a Secret in the namespace that contains the credentials for + * authentication. + */ + credentials?: Credentials; + /** + * Defines the authentication type. The value is case-insensitive. + * "Basic" is not a supported value. + * Default: "Bearer" + */ + type?: string; +} + +/** + * Selects a key of a Secret in the namespace that contains the credentials for + * authentication. + */ +export interface Credentials { + /** + * The key of the secret to select from. Must be a valid secret key. + */ + key: string; + /** + * Name of the referent. More info: + * https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add + * other useful fields. apiVersion, kind, uid? + */ + name?: string; + /** + * Specify whether the Secret or its key must be defined + */ + optional?: boolean; +} + +/** + * `basicAuth` configures the Basic Authentication credentials to use when scraping the + * target. + * Cannot be set at the same time as `authorization`, or `oauth2`. + */ +export interface BasicAuth { + /** + * `password` specifies a key of a Secret containing the password for authentication. + */ + password?: Password; + /** + * `username` specifies a key of a Secret containing the username for authentication. + */ + username?: Username; +} + +/** + * `password` specifies a key of a Secret containing the password for authentication. + */ +export interface Password { + /** + * The key of the secret to select from. Must be a valid secret key. + */ + key: string; + /** + * Name of the referent. More info: + * https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add + * other useful fields. apiVersion, kind, uid? + */ + name?: string; + /** + * Specify whether the Secret or its key must be defined + */ + optional?: boolean; +} + +/** + * `username` specifies a key of a Secret containing the username for authentication. + */ +export interface Username { + /** + * The key of the secret to select from. Must be a valid secret key. + */ + key: string; + /** + * Name of the referent. More info: + * https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add + * other useful fields. apiVersion, kind, uid? + */ + name?: string; + /** + * Specify whether the Secret or its key must be defined + */ + optional?: boolean; +} + +/** + * `bearerTokenSecret` specifies a key of a Secret containing the bearer token for scraping + * targets. The secret needs to be in the same namespace as the ServiceMonitor object and + * readable by the Prometheus Operator. + * Deprecated: use `authorization` instead. + */ +export interface BearerTokenSecret { + /** + * The key of the secret to select from. Must be a valid secret key. + */ + key: string; + /** + * Name of the referent. More info: + * https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add + * other useful fields. apiVersion, kind, uid? + */ + name?: string; + /** + * Specify whether the Secret or its key must be defined + */ + optional?: boolean; +} + +/** + * RelabelConfig allows dynamic rewriting of the label set for targets, alerts, scraped + * samples and remote write samples. + * More info: + * https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config + */ +export interface MetricRelabeling { + /** + * Action to perform based on the regex matching. + * `Uppercase` and `Lowercase` actions require Prometheus >= v2.36.0. `DropEqual` and + * `KeepEqual` actions require Prometheus >= v2.41.0. + * Default: "Replace" + */ + action?: Action; + /** + * Modulus to take of the hash of the source label values. + * Only applicable when the action is `HashMod`. + */ + modulus?: number; + /** + * Regular expression against which the extracted value is matched. + */ + regex?: string; + /** + * Replacement value against which a Replace action is performed if the regular expression + * matches. + * Regex capture groups are available. + */ + replacement?: string; + /** + * Separator is the string between concatenated SourceLabels. + */ + separator?: string; + /** + * The source labels select values from existing labels. Their content is concatenated using + * the configured Separator and matched against the configured regular expression. + */ + sourceLabels?: string[]; + /** + * Label to which the resulting string is written in a replacement. + * It is mandatory for `Replace`, `HashMod`, `Lowercase`, `Uppercase`, `KeepEqual` and + * `DropEqual` actions. + * Regex capture groups are available. + */ + targetLabel?: string; +} + +/** + * Action to perform based on the regex matching. + * `Uppercase` and `Lowercase` actions require Prometheus >= v2.36.0. `DropEqual` and + * `KeepEqual` actions require Prometheus >= v2.41.0. + * Default: "Replace" + */ +export enum Action { + ActionDrop = "Drop", + ActionKeep = "Keep", + ActionLowercase = "Lowercase", + ActionReplace = "Replace", + ActionUppercase = "Uppercase", + Drop = "drop", + DropEqual = "DropEqual", + Dropequal = "dropequal", + HashMod = "HashMod", + Hashmod = "hashmod", + Keep = "keep", + KeepEqual = "KeepEqual", + Keepequal = "keepequal", + LabelDrop = "LabelDrop", + LabelKeep = "LabelKeep", + LabelMap = "LabelMap", + Labeldrop = "labeldrop", + Labelkeep = "labelkeep", + Labelmap = "labelmap", + Lowercase = "lowercase", + Replace = "replace", + Uppercase = "uppercase", +} + +/** + * `oauth2` configures the OAuth2 settings to use when scraping the target. + * It requires Prometheus >= 2.27.0. + * Cannot be set at the same time as `authorization`, or `basicAuth`. + */ +export interface Oauth2 { + /** + * `clientId` specifies a key of a Secret or ConfigMap containing the OAuth2 client's ID. + */ + clientId: ClientID; + /** + * `clientSecret` specifies a key of a Secret containing the OAuth2 client's secret. + */ + clientSecret: ClientSecret; + /** + * `endpointParams` configures the HTTP parameters to append to the token URL. + */ + endpointParams?: { [key: string]: string }; + /** + * `scopes` defines the OAuth2 scopes used for the token request. + */ + scopes?: string[]; + /** + * `tokenURL` configures the URL to fetch the token from. + */ + tokenUrl: string; +} + +/** + * `clientId` specifies a key of a Secret or ConfigMap containing the OAuth2 client's ID. + */ +export interface ClientID { + /** + * ConfigMap containing data to use for the targets. + */ + configMap?: ClientIDConfigMap; + /** + * Secret containing data to use for the targets. + */ + secret?: ClientIDSecret; +} + +/** + * ConfigMap containing data to use for the targets. + */ +export interface ClientIDConfigMap { + /** + * The key to select. + */ + key: string; + /** + * Name of the referent. More info: + * https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add + * other useful fields. apiVersion, kind, uid? + */ + name?: string; + /** + * Specify whether the ConfigMap or its key must be defined + */ + optional?: boolean; +} + +/** + * Secret containing data to use for the targets. + */ +export interface ClientIDSecret { + /** + * The key of the secret to select from. Must be a valid secret key. + */ + key: string; + /** + * Name of the referent. More info: + * https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add + * other useful fields. apiVersion, kind, uid? + */ + name?: string; + /** + * Specify whether the Secret or its key must be defined + */ + optional?: boolean; +} + +/** + * `clientSecret` specifies a key of a Secret containing the OAuth2 client's secret. + */ +export interface ClientSecret { + /** + * The key of the secret to select from. Must be a valid secret key. + */ + key: string; + /** + * Name of the referent. More info: + * https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add + * other useful fields. apiVersion, kind, uid? + */ + name?: string; + /** + * Specify whether the Secret or its key must be defined + */ + optional?: boolean; +} + +/** + * RelabelConfig allows dynamic rewriting of the label set for targets, alerts, scraped + * samples and remote write samples. + * More info: + * https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config + */ +export interface Relabeling { + /** + * Action to perform based on the regex matching. + * `Uppercase` and `Lowercase` actions require Prometheus >= v2.36.0. `DropEqual` and + * `KeepEqual` actions require Prometheus >= v2.41.0. + * Default: "Replace" + */ + action?: Action; + /** + * Modulus to take of the hash of the source label values. + * Only applicable when the action is `HashMod`. + */ + modulus?: number; + /** + * Regular expression against which the extracted value is matched. + */ + regex?: string; + /** + * Replacement value against which a Replace action is performed if the regular expression + * matches. + * Regex capture groups are available. + */ + replacement?: string; + /** + * Separator is the string between concatenated SourceLabels. + */ + separator?: string; + /** + * The source labels select values from existing labels. Their content is concatenated using + * the configured Separator and matched against the configured regular expression. + */ + sourceLabels?: string[]; + /** + * Label to which the resulting string is written in a replacement. + * It is mandatory for `Replace`, `HashMod`, `Lowercase`, `Uppercase`, `KeepEqual` and + * `DropEqual` actions. + * Regex capture groups are available. + */ + targetLabel?: string; +} + +/** + * HTTP scheme to use for scraping. + * `http` and `https` are the expected values unless you rewrite the `__scheme__` label via + * relabeling. + * If empty, Prometheus uses the default value `http`. + */ +export enum Scheme { + HTTP = "http", + HTTPS = "https", +} + +/** + * TLS configuration to use when scraping the target. + */ +export interface TLSConfig { + /** + * Certificate authority used when verifying server certificates. + */ + ca?: CA; + /** + * Path to the CA cert in the Prometheus container to use for the targets. + */ + caFile?: string; + /** + * Client certificate to present when doing client-authentication. + */ + cert?: CERT; + /** + * Path to the client cert file in the Prometheus container for the targets. + */ + certFile?: string; + /** + * Disable target certificate validation. + */ + insecureSkipVerify?: boolean; + /** + * Path to the client key file in the Prometheus container for the targets. + */ + keyFile?: string; + /** + * Secret containing the client key file for the targets. + */ + keySecret?: KeySecret; + /** + * Used to verify the hostname for the targets. + */ + serverName?: string; +} + +/** + * Certificate authority used when verifying server certificates. + */ +export interface CA { + /** + * ConfigMap containing data to use for the targets. + */ + configMap?: CAConfigMap; + /** + * Secret containing data to use for the targets. + */ + secret?: CASecret; +} + +/** + * ConfigMap containing data to use for the targets. + */ +export interface CAConfigMap { + /** + * The key to select. + */ + key: string; + /** + * Name of the referent. More info: + * https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add + * other useful fields. apiVersion, kind, uid? + */ + name?: string; + /** + * Specify whether the ConfigMap or its key must be defined + */ + optional?: boolean; +} + +/** + * Secret containing data to use for the targets. + */ +export interface CASecret { + /** + * The key of the secret to select from. Must be a valid secret key. + */ + key: string; + /** + * Name of the referent. More info: + * https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add + * other useful fields. apiVersion, kind, uid? + */ + name?: string; + /** + * Specify whether the Secret or its key must be defined + */ + optional?: boolean; +} + +/** + * Client certificate to present when doing client-authentication. + */ +export interface CERT { + /** + * ConfigMap containing data to use for the targets. + */ + configMap?: CERTConfigMap; + /** + * Secret containing data to use for the targets. + */ + secret?: CERTSecret; +} + +/** + * ConfigMap containing data to use for the targets. + */ +export interface CERTConfigMap { + /** + * The key to select. + */ + key: string; + /** + * Name of the referent. More info: + * https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add + * other useful fields. apiVersion, kind, uid? + */ + name?: string; + /** + * Specify whether the ConfigMap or its key must be defined + */ + optional?: boolean; +} + +/** + * Secret containing data to use for the targets. + */ +export interface CERTSecret { + /** + * The key of the secret to select from. Must be a valid secret key. + */ + key: string; + /** + * Name of the referent. More info: + * https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add + * other useful fields. apiVersion, kind, uid? + */ + name?: string; + /** + * Specify whether the Secret or its key must be defined + */ + optional?: boolean; +} + +/** + * Secret containing the client key file for the targets. + */ +export interface KeySecret { + /** + * The key of the secret to select from. Must be a valid secret key. + */ + key: string; + /** + * Name of the referent. More info: + * https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add + * other useful fields. apiVersion, kind, uid? + */ + name?: string; + /** + * Specify whether the Secret or its key must be defined + */ + optional?: boolean; +} + +/** + * Selector to select which namespaces the Kubernetes `Endpoints` objects are discovered + * from. + */ +export interface NamespaceSelector { + /** + * Boolean describing whether all namespaces are selected in contrast to a list restricting + * them. + */ + any?: boolean; + /** + * List of namespace names to select from. + */ + matchNames?: string[]; +} + +/** + * Label selector to select the Kubernetes `Endpoints` objects. + */ +export interface Selector { + /** + * matchExpressions is a list of label selector requirements. The requirements are ANDed. + */ + matchExpressions?: MatchExpression[]; + /** + * matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels map is + * equivalent to an element of matchExpressions, whose key field is "key", the operator is + * "In", and the values array contains only "value". The requirements are ANDed. + */ + matchLabels?: { [key: string]: string }; +} + +/** + * A label selector requirement is a selector that contains values, a key, and an operator + * that relates the key and values. + */ +export interface MatchExpression { + /** + * key is the label key that the selector applies to. + */ + key: string; + /** + * operator represents a key's relationship to a set of values. Valid operators are In, + * NotIn, Exists and DoesNotExist. + */ + operator: string; + /** + * values is an array of string values. If the operator is In or NotIn, the values array + * must be non-empty. If the operator is Exists or DoesNotExist, the values array must be + * empty. This array is replaced during a strategic merge patch. + */ + values?: string[]; +} + +RegisterKind(ServiceMonitor, { + group: "monitoring.coreos.com", + version: "v1", + kind: "ServiceMonitor", +}); diff --git a/src/pepr/operator/crd/index.ts b/src/pepr/operator/crd/index.ts index eae36d256..11982ead1 100644 --- a/src/pepr/operator/crd/index.ts +++ b/src/pepr/operator/crd/index.ts @@ -21,3 +21,4 @@ export { } from "./generated/exemption-v1alpha1"; export * as Istio from "./generated/istio/virtualservice-v1beta1"; +export * as Prometheus from "./generated/prometheus/servicemonitor-v1"; diff --git a/src/pepr/operator/crd/sources/package/v1alpha1.ts b/src/pepr/operator/crd/sources/package/v1alpha1.ts index 1317103e9..34f2d4d60 100644 --- a/src/pepr/operator/crd/sources/package/v1alpha1.ts +++ b/src/pepr/operator/crd/sources/package/v1alpha1.ts @@ -159,6 +159,53 @@ const expose = { } as V1JSONSchemaProps, } as V1JSONSchemaProps; +const monitor = { + description: "Create Service Monitor configurations", + type: "array", + items: { + type: "object", + required: ["portName", "selector", "targetPort"], + properties: { + description: { + type: "string", + description: + "A description of this monitor entry, this will become part of the ServiceMonitor name", + }, + portName: { + description: "The port name for the serviceMonitor", + type: "string", + }, + targetPort: { + description: + "The service targetPort. This is required so the NetworkPolicy can be generated correctly.", + minimum: 1, + maximum: 65535, + type: "number", + }, + selector: { + description: + "Labels to match pods in the namespace to apply the policy to. Leave empty to apply to all pods in the namespace", + type: "object", + additionalProperties: { + type: "string", + }, + }, + podSelector: { + description: + "Labels to match pods in the namespace to apply the policy to. Leave empty to apply to all pods in the namespace", + type: "object", + additionalProperties: { + type: "string", + }, + }, + path: { + description: "HTTP path from which to scrape for metrics, defaults to `/metrics`", + type: "string", + }, + }, + }, +}; + const sso = { description: "Create SSO client configurations", type: "array", @@ -283,6 +330,12 @@ export const v1alpha1: V1CustomResourceDefinitionVersion = { description: "Service endpoints exposed by the package", jsonPath: ".status.endpoints", }, + { + name: "Monitors", + type: "string", + description: "Service monitors for the package", + jsonPath: ".status.monitors", + }, { name: "Network Policies", type: "integer", @@ -325,6 +378,12 @@ export const v1alpha1: V1CustomResourceDefinitionVersion = { type: "string", }, }, + monitors: { + type: "array", + items: { + type: "string", + }, + }, networkPolicyCount: { type: "integer", }, @@ -341,6 +400,7 @@ export const v1alpha1: V1CustomResourceDefinitionVersion = { allow, }, }, + monitor, sso, }, } as V1JSONSchemaProps, diff --git a/src/pepr/operator/reconcilers/package-reconciler.ts b/src/pepr/operator/reconcilers/package-reconciler.ts index ef8a1eace..2c500301d 100644 --- a/src/pepr/operator/reconcilers/package-reconciler.ts +++ b/src/pepr/operator/reconcilers/package-reconciler.ts @@ -5,6 +5,7 @@ import { UDSConfig } from "../../config"; import { enableInjection } from "../controllers/istio/injection"; import { virtualService } from "../controllers/istio/virtual-service"; import { keycloak } from "../controllers/keycloak/client-sync"; +import { serviceMonitor } from "../controllers/monitoring/service-monitor"; import { networkPolicies } from "../controllers/network/policies"; import { Phase, UDSPackage } from "../crd"; import { migrate } from "../crd/migrate"; @@ -35,16 +36,20 @@ export async function packageReconciler(pkg: UDSPackage) { const netPol = await networkPolicies(pkg, namespace!); - // Only configure the VirtualService if not running in single test mode let endpoints: string[] = []; - if (!UDSConfig.isSingleTest) { - // Update the namespace to ensure the istio-injection label is set - await enableInjection(pkg); + // Update the namespace to ensure the istio-injection label is set + await enableInjection(pkg); + + // Create the VirtualService for each exposed service + endpoints = await virtualService(pkg, namespace!); - // Create the VirtualService for each exposed service - endpoints = await virtualService(pkg, namespace!); + // Only configure the ServiceMonitors if not running in single test mode + let monitors: string[] = []; + if (!UDSConfig.isSingleTest) { + // Create the ServiceMonitor for each monitored service + monitors = await serviceMonitor(pkg, namespace!); } else { - Log.warn(`Running in single test mode, skipping ${name} VirtualService.`); + Log.warn(`Running in single test mode, skipping ${name} ServiceMonitors.`); } // Configure SSO @@ -54,6 +59,7 @@ export async function packageReconciler(pkg: UDSPackage) { phase: Phase.Ready, ssoClients, endpoints, + monitors, networkPolicyCount: netPol.length, observedGeneration: metadata.generation, }); diff --git a/src/pepr/prometheus/index.ts b/src/pepr/prometheus/index.ts new file mode 100644 index 000000000..bc471cac4 --- /dev/null +++ b/src/pepr/prometheus/index.ts @@ -0,0 +1,58 @@ +import { Capability, K8s, kind, Log } from "pepr"; +import { Prometheus } from "../operator/crd"; + +export const prometheus = new Capability({ + name: "prometheus", + description: "UDS Core Capability for the Prometheus stack.", +}); + +const { When } = prometheus; + +/** + * Mutate a service monitor to enable mTLS metrics + */ +When(Prometheus.ServiceMonitor) + .IsCreatedOrUpdated() + .Mutate(async sm => { + // Provide an opt-out of mutation to handle complicated scenarios + if (sm.Raw.metadata?.annotations?.["uds/skip-sm-mutate"]) { + return; + } + + // This assumes istio-injection == strict mTLS due to complexity around mTLS lookup + if (await isIstioInjected(sm)) { + if (sm.Raw.spec?.endpoints === undefined) { + return; + } + + Log.info(`Patching service monitor ${sm.Raw.metadata?.name} for mTLS metrics`); + const tlsConfig = { + caFile: "/etc/prom-certs/root-cert.pem", + certFile: "/etc/prom-certs/cert-chain.pem", + keyFile: "/etc/prom-certs/key.pem", + insecureSkipVerify: true, + }; + const endpoints: Prometheus.Endpoint[] = sm.Raw.spec.endpoints; + endpoints.forEach(endpoint => { + endpoint.scheme = Prometheus.Scheme.HTTPS; + endpoint.tlsConfig = tlsConfig; + }); + sm.Raw.spec.endpoints = endpoints; + } else { + Log.info(`No mutations needed for service monitor ${sm.Raw.metadata?.name}`); + } + }); + +async function isIstioInjected(sm: Prometheus.ServiceMonitor) { + const namespaces = sm.Raw.spec?.namespaceSelector?.matchNames || [sm.Raw.metadata?.namespace] || [ + "default", + ]; + + for (const ns of namespaces) { + const namespace = await K8s(kind.Namespace).Get(ns); + if (namespace.metadata?.labels && namespace.metadata.labels["istio-injection"] === "enabled") { + return true; + } + } + return false; +} diff --git a/src/prometheus-stack/chart/templates/istio-monitor.yaml b/src/prometheus-stack/chart/templates/istio-monitor.yaml new file mode 100644 index 000000000..e82a0d23e --- /dev/null +++ b/src/prometheus-stack/chart/templates/istio-monitor.yaml @@ -0,0 +1,44 @@ +# This podmonitor will pick up envoy stats for all Istio sidecars across the cluster +apiVersion: monitoring.coreos.com/v1 +kind: PodMonitor +metadata: + name: envoy-stats-monitor + namespace: istio-system +spec: + selector: + matchExpressions: + - {key: istio-prometheus-ignore, operator: DoesNotExist} + namespaceSelector: + any: true + jobLabel: envoy-stats + podMetricsEndpoints: + - path: /stats/prometheus + interval: 15s + relabelings: + - action: keep + sourceLabels: [__meta_kubernetes_pod_container_name] + regex: "istio-proxy" + - action: keep + sourceLabels: [__meta_kubernetes_pod_annotationpresent_prometheus_io_scrape] + - action: replace + regex: (\d+);(([A-Fa-f0-9]{1,4}::?){1,7}[A-Fa-f0-9]{1,4}) + replacement: '[$2]:$1' + sourceLabels: + - __meta_kubernetes_pod_annotation_prometheus_io_port + - __meta_kubernetes_pod_ip + targetLabel: __address__ + - action: replace + regex: (\d+);((([0-9]+?)(\.|$)){4}) + replacement: $2:$1 + sourceLabels: + - __meta_kubernetes_pod_annotation_prometheus_io_port + - __meta_kubernetes_pod_ip + targetLabel: __address__ + - action: labeldrop + regex: "__meta_kubernetes_pod_label_(.+)" + - sourceLabels: [__meta_kubernetes_namespace] + action: replace + targetLabel: namespace + - sourceLabels: [__meta_kubernetes_pod_name] + action: replace + targetLabel: pod_name diff --git a/src/prometheus-stack/chart/templates/prometheus-pod-monitor.yaml b/src/prometheus-stack/chart/templates/prometheus-pod-monitor.yaml new file mode 100644 index 000000000..51e17961d --- /dev/null +++ b/src/prometheus-stack/chart/templates/prometheus-pod-monitor.yaml @@ -0,0 +1,16 @@ +# This pod monitor is used instead of a service monitor to handle mTLS with self-monitoring +apiVersion: monitoring.coreos.com/v1 +kind: PodMonitor +metadata: + name: prometheus-pod-monitor + namespace: monitoring +spec: + selector: + matchLabels: + app: prometheus + podMetricsEndpoints: + - port: http-web + - port: reloader-web + namespaceSelector: + matchNames: + - monitoring diff --git a/src/prometheus-stack/chart/templates/uds-package.yaml b/src/prometheus-stack/chart/templates/uds-package.yaml index 11e6d2954..746a08692 100644 --- a/src/prometheus-stack/chart/templates/uds-package.yaml +++ b/src/prometheus-stack/chart/templates/uds-package.yaml @@ -18,6 +18,11 @@ spec: selector: app: kube-prometheus-stack-operator + - direction: Egress + remoteGenerated: KubeAPI + selector: + app: prometheus + - direction: Egress remoteGenerated: KubeAPI selector: diff --git a/src/prometheus-stack/values/values.yaml b/src/prometheus-stack/values/values.yaml index c0dc155d6..30d2b6559 100644 --- a/src/prometheus-stack/values/values.yaml +++ b/src/prometheus-stack/values/values.yaml @@ -1,24 +1,8 @@ -alertmanager: - alertmanagerSpec: - resources: - limits: - cpu: 500m - memory: 256Mi - requests: - cpu: 100m - memory: 256Mi crds: enabled: false grafana: enabled: false -kube-state-metrics: - resources: - limits: - cpu: 500m - memory: 128Mi - requests: - cpu: 10m - memory: 128Mi + forceDeployDashboards: true kubeStateMetrics: serviceMonitor: interval: "" @@ -35,6 +19,8 @@ nodeExporter: serviceMonitor: interval: "" prometheus: + serviceMonitor: + selfMonitor: false prometheusSpec: enableFeatures: - remote-write-receiver @@ -55,7 +41,7 @@ prometheus: memory: 2Gi requests: cpu: 100m - memory: 2Gi + memory: 512Mi ruleSelectorNilUsesHelmValues: false serviceMonitorSelectorNilUsesHelmValues: false storageSpec: @@ -76,37 +62,14 @@ prometheus: name: istio-certs prometheus-node-exporter: containerSecurityContext: - capabilities: - drop: - - ALL readOnlyRootFilesystem: true hostNetwork: false hostPID: false podAnnotations: cluster-autoscaler.kubernetes.io/safe-to-evict: "true" - prometheus: - resources: - limits: - cpu: 500m - memory: 128Mi - requests: - cpu: 100m - memory: 128Mi - serviceAccount: - name: null - serviceMonitor: - attachMetadata: {} prometheusOperator: admissionWebhooks: - containerSecurityContext: - capabilities: - drop: - - ALL patch: - containerSecurityContext: - capabilities: - drop: - - ALL resources: limits: cpu: 100m @@ -114,7 +77,6 @@ prometheusOperator: requests: cpu: 50m memory: 128Mi - prometheusConfigReloader: enableProbe: false @@ -132,3 +94,12 @@ prometheusOperator: requests: cpu: 100m memory: 512Mi + +alertmanager: + alertmanagerSpec: + scheme: "https" + tlsConfig: + caFile: /etc/prom-certs/root-cert.pem + certFile: /etc/prom-certs/cert-chain.pem + insecureSkipVerify: true + keyFile: /etc/prom-certs/key.pem diff --git a/src/promtail/chart/templates/service.yaml b/src/promtail/chart/templates/service.yaml new file mode 100644 index 000000000..23c6a4429 --- /dev/null +++ b/src/promtail/chart/templates/service.yaml @@ -0,0 +1,18 @@ +# Upstream chart can create this service but it is conditionally tied to the serviceMonitor which would cause errors in single package testing +# This would be resolved by https://github.com/grafana/helm-charts/pull/3083 when merged and released +apiVersion: v1 +kind: Service +metadata: + name: promtail-metrics + namespace: {{ .Release.Namespace }} + labels: + app.kubernetes.io/name: promtail +spec: + clusterIP: None + ports: + - name: http-metrics + port: 3101 + targetPort: http-metrics + protocol: TCP + selector: + app.kubernetes.io/name: promtail diff --git a/src/promtail/chart/templates/uds-package.yaml b/src/promtail/chart/templates/uds-package.yaml index 65669e985..4875d82ec 100644 --- a/src/promtail/chart/templates/uds-package.yaml +++ b/src/promtail/chart/templates/uds-package.yaml @@ -4,6 +4,13 @@ metadata: name: promtail namespace: {{ .Release.Namespace }} spec: + monitor: + - selector: + app.kubernetes.io/name: promtail + targetPort: 3101 + portName: http-metrics + description: Metrics + network: allow: - direction: Ingress diff --git a/src/promtail/values/values.yaml b/src/promtail/values/values.yaml index da8124aad..54698b2ae 100644 --- a/src/promtail/values/values.yaml +++ b/src/promtail/values/values.yaml @@ -65,26 +65,3 @@ resources: requests: cpu: 100m memory: 256Mi - -# Below fails individual test when enabled since the crd is not there -serviceMonitor: - annotations: {} - enabled: false - interval: null - labels: {} - metricRelabelings: [] - namespace: null - namespaceSelector: {} - prometheusRule: - additionalLabels: {} - enabled: false - rules: [] - relabelings: [] - scheme: https - scrapeTimeout: null - targetLabels: [] - tlsConfig: - caFile: /etc/prom-certs/root-cert.pem - certFile: /etc/prom-certs/cert-chain.pem - insecureSkipVerify: true - keyFile: /etc/prom-certs/key.pem diff --git a/src/test/tasks.yaml b/src/test/tasks.yaml index 9ef4e9be1..2166e70cb 100644 --- a/src/test/tasks.yaml +++ b/src/test/tasks.yaml @@ -3,9 +3,9 @@ tasks: description: Test app used for UDS Core validation actions: - description: Create zarf package for the test resources - cmd: "uds zarf package create src/test --confirm" + cmd: "uds zarf package create src/test --confirm --no-progress" - description: Apply the test resources - cmd: "uds zarf package deploy build/zarf-package-uds-core-test-apps-*.zst --confirm" + cmd: "uds zarf package deploy build/zarf-package-uds-core-test-apps-*.zst --confirm --no-progress" - description: Wait for the admin app to be ready wait: diff --git a/src/velero/chart/templates/uds-package.yaml b/src/velero/chart/templates/uds-package.yaml index b43e6232b..f483aa28f 100644 --- a/src/velero/chart/templates/uds-package.yaml +++ b/src/velero/chart/templates/uds-package.yaml @@ -23,5 +23,5 @@ spec: app: prometheus selector: app.kubernetes.io/name: velero - port: 8068 - description: "Prometheus Metrics" \ No newline at end of file + port: 8085 + description: "Prometheus Metrics" diff --git a/src/velero/values/values.yaml b/src/velero/values/values.yaml index 53f80976a..c77c7334c 100644 --- a/src/velero/values/values.yaml +++ b/src/velero/values/values.yaml @@ -42,3 +42,6 @@ schedules: - kube-system - velero ttl: "240h" +metrics: + serviceMonitor: + enabled: true diff --git a/tasks/create.yaml b/tasks/create.yaml index 412b0a730..3bfae435e 100644 --- a/tasks/create.yaml +++ b/tasks/create.yaml @@ -30,7 +30,7 @@ tasks: - task: pepr-build - description: "Create the UDS Core Istio Zarf Package" - cmd: "uds zarf package create packages/slim-dev --confirm --flavor ${FLAVOR}" + cmd: "uds zarf package create packages/slim-dev --confirm --no-progress --flavor ${FLAVOR}" - name: k3d-slim-dev-bundle description: "Create the K3d-UDS Core (Istio and Keycloak Only) Bundle" @@ -49,6 +49,12 @@ tasks: - description: "Create the requested Zarf Package (must set UDS_PKG environment variable)" cmd: "uds zarf package create src/${UDS_PKG} --confirm --no-progress --flavor ${FLAVOR}" + - description: "Create the Istio Zarf Package, if UDS_PKG != istio" + cmd: | + if [ "${UDS_PKG}" != "istio" ]; then + uds zarf package create src/istio --confirm --no-progress --flavor ${FLAVOR} + fi + - name: pepr-build description: "Build the UDS Core Pepr Module" actions: diff --git a/tasks/deploy.yaml b/tasks/deploy.yaml index d5e5bf06e..f1b96a8c6 100644 --- a/tasks/deploy.yaml +++ b/tasks/deploy.yaml @@ -20,12 +20,17 @@ tasks: - name: single-package actions: + - description: "Deploy the Istio package, if UDS_PKG != istio" + cmd: | + if [ "${UDS_PKG}" != "istio" ]; then + uds zarf package deploy build/zarf-package-uds-core-istio-${UDS_ARCH}.tar.zst --confirm --no-progress + fi - description: "Deploy the Pepr Module" cmd: | PEPR_VERSION=$(npm pkg get version | tr -d '"') - uds zarf package deploy build/zarf-package-pepr-uds-core-${UDS_ARCH}-${PEPR_VERSION}.tar.zst --confirm --set UDS_SINGLE_TEST=true + uds zarf package deploy build/zarf-package-pepr-uds-core-${UDS_ARCH}-${PEPR_VERSION}.tar.zst --confirm --no-progress --set UDS_SINGLE_TEST=true - description: "Deploy the requested Zarf Package (must set UDS_PKG environment variable)" - cmd: uds zarf package deploy build/zarf-package-uds-core-${UDS_PKG}-${UDS_ARCH}.tar.zst --confirm + cmd: uds zarf package deploy build/zarf-package-uds-core-${UDS_PKG}-${UDS_ARCH}.tar.zst --confirm --no-progress - name: latest-package-release actions: @@ -34,9 +39,9 @@ tasks: setVariables: - name: LATEST_VERSION - description: "Deploy the latest UDS Core package release" - cmd: uds zarf package deploy oci://ghcr.io/defenseunicorns/packages/uds/core:${LATEST_VERSION} --confirm --components="metrics-server" # Temporary addition to workaround https://github.com/defenseunicorns/zarf/issues/2320 + cmd: uds zarf package deploy oci://ghcr.io/defenseunicorns/packages/uds/core:${LATEST_VERSION} --confirm --no-progress - name: standard-package actions: - description: "Deploy the standard UDS Core zarf package" - cmd: uds zarf package deploy build/zarf-package-core-${UDS_ARCH}-${VERSION}.tar.zst --confirm + cmd: uds zarf package deploy build/zarf-package-core-${UDS_ARCH}-${VERSION}.tar.zst --confirm --no-progress diff --git a/tasks/setup.yaml b/tasks/setup.yaml index d82a70361..5e13c8660 100644 --- a/tasks/setup.yaml +++ b/tasks/setup.yaml @@ -3,7 +3,7 @@ tasks: actions: - description: "Create the K3d cluster" # renovate: datasource=github-tags depName=defenseunicorns/uds-k3d versioning=semver - cmd: "uds zarf package deploy oci://defenseunicorns/uds-k3d:0.5.0 --set=K3D_IMAGE=${K3D_IMAGE} --confirm" + cmd: "uds zarf package deploy oci://defenseunicorns/uds-k3d:0.5.0 --set=K3D_IMAGE=${K3D_IMAGE} --confirm --no-progress" - name: k3d-test-cluster actions: @@ -11,4 +11,4 @@ tasks: - description: "Initialize the cluster with Zarf" # renovate: datasource=github-tags depName=defenseunicorns/zarf versioning=semver - cmd: "uds zarf package deploy oci://defenseunicorns/init:v0.32.3 --confirm" + cmd: "uds zarf package deploy oci://defenseunicorns/init:v0.32.3 --confirm --no-progress"