diff --git a/charts/kubernetes/templates/_helpers.tpl b/charts/kubernetes/templates/_helpers.tpl index c6c004e..7aa7883 100644 --- a/charts/kubernetes/templates/_helpers.tpl +++ b/charts/kubernetes/templates/_helpers.tpl @@ -71,7 +71,7 @@ Metrics - name: cpu lookup: prometheus: - - query: '1000 * sum(rate(container_cpu_usage_seconds_total{container!=""{{.Values.prometheus.labels | default .Values.prometheusLabels}}}[5m]))' + - query: {{tpl .Values.metrics.queries.prometheus.cluster_cpu .}} connection: {{ .Values.prometheus.connection }} display: expr: | @@ -79,7 +79,7 @@ Metrics - name: memory lookup: prometheus: - - query: 'sum(container_memory_working_set_bytes{container!=""{{.Values.prometheus.labels | default .Values.prometheusLabels}}})' + - query: {{tpl .Values.metrics.queries.prometheus.cluster_memory .}} connection: {{ .Values.prometheus.connection }} display: expr: | @@ -92,7 +92,7 @@ Metrics - name: cpu lookup: prometheus: - - query: '1000 * sum(rate(container_cpu_usage_seconds_total{container!=""{{.Values.prometheus.labels | default .Values.prometheusLabels}}}[5m])) by (node)' + - query: {{tpl .Values.metrics.queries.prometheus.node_cpu .}} connection: {{ .Values.prometheus.connection }} display: expr: | @@ -103,7 +103,7 @@ Metrics - name: memory lookup: prometheus: - - query: 'sum(container_memory_working_set_bytes{container!="",pod!=""{{.Values.prometheus.labels | default .Values.prometheusLabels}}} * on(pod, namespace) group_left kube_pod_status_phase{phase="Running"{{.Values.prometheus.labels | default .Values.prometheusLabels}}} > 0) by (node)' + - query: {{tpl .Values.metrics.queries.prometheus.node_memory .}} connection: {{ .Values.prometheus.connection }} display: expr: | @@ -115,7 +115,7 @@ Metrics - name: ephemeral-storage lookup: prometheus: - - query: 'max by (instance) (avg_over_time(node_filesystem_avail_bytes{mountpoint="/",fstype!="rootfs"{{.Values.prometheus.labels | default .Values.prometheusLabels}}}[5m]))' + - query: {{tpl .Values.metrics.queries.prometheus.node_storage .}} connection: {{ .Values.prometheus.connection }} display: expr: | @@ -130,7 +130,7 @@ Metrics - name: cpu lookup: prometheus: - - query: '1000 * sum(rate(container_cpu_usage_seconds_total{container!=""{{.Values.prometheus.labels | default .Values.prometheusLabels}}}[5m])) by (pod)' + - query: {{tpl .Values.metrics.queries.prometheus.pod_cpu .}} connection: {{ .Values.prometheus.connection }} display: expr: | @@ -141,7 +141,7 @@ Metrics - name: memory lookup: prometheus: - - query: 'sum(container_memory_working_set_bytes{container!=""{{.Values.prometheus.labels | default .Values.prometheusLabels}}}) by (pod)' + - query: {{tpl .Values.metrics.queries.prometheus.pod_memory .}} connection: {{ .Values.prometheus.connection }} display: expr: | @@ -156,7 +156,7 @@ Metrics - name: cpu lookup: prometheus: - - query: '1000 * sum(rate(container_cpu_usage_seconds_total{container!=""{{.Values.prometheus.labels | default .Values.prometheusLabels}}}[5m])) by (namespace)' + - query: {{tpl .Values.metrics.queries.prometheus.namespace_cpu .}} connection: {{ .Values.prometheus.connection }} display: expr: | @@ -167,7 +167,7 @@ Metrics - name: memory lookup: prometheus: - - query: 'sum(container_memory_working_set_bytes{container!="",pod!=""{{.Values.prometheus.labels | default .Values.prometheusLabels}}} * on(pod, namespace) group_left kube_pod_status_phase{phase="Running"{{.Values.prometheus.labels | default .Values.prometheusLabels}}} > 0) by (namespace)' + - query: {{tpl .Values.metrics.queries.prometheus.namespace_memory .}} connection: {{ .Values.prometheus.connection }} display: expr: | @@ -251,7 +251,7 @@ Metrics {{- end }} {{- define "kubernetes.topology.metricProperties.cluster" -}} -{{- if (.Values.prometheus.url | default .Values.prometheusURL) }} +{{- if .Values.prometheus.url }} {{- include "kubernetes.topology.metricProperties.prometheus.cluster" . }} {{- else if .Values.metrics.enabled }} {{- include "kubernetes.topology.metricProperties.k8sMetrics.cluster" . }} @@ -259,7 +259,7 @@ Metrics {{- end }} {{- define "kubernetes.topology.metricProperties.node" -}} -{{- if (.Values.prometheus.url | default .Values.prometheusURL) }} +{{- if .Values.prometheus.url }} {{- include "kubernetes.topology.metricProperties.prometheus.node" . }} {{- else if .Values.metrics.enabled }} {{- include "kubernetes.topology.metricProperties.k8sMetrics.node" . }} @@ -267,7 +267,7 @@ Metrics {{- end }} {{- define "kubernetes.topology.metricProperties.pod" -}} -{{- if (.Values.prometheus.url | default .Values.prometheusURL) }} +{{- if .Values.prometheus.url }} {{- include "kubernetes.topology.metricProperties.prometheus.pod" . }} {{- else if .Values.metrics.enabled }} {{- include "kubernetes.topology.metricProperties.k8sMetrics.pod" . }} @@ -275,7 +275,7 @@ Metrics {{- end }} {{- define "kubernetes.topology.metricProperties.namespace" -}} -{{- if (.Values.prometheus.url | default .Values.prometheusURL) }} +{{- if .Values.prometheus.url }} {{- include "kubernetes.topology.metricProperties.prometheus.namespace" . }} {{- else if .Values.metrics.enabled }} {{- include "kubernetes.topology.metricProperties.k8sMetrics.namespace" . }} diff --git a/charts/kubernetes/templates/topology.yaml b/charts/kubernetes/templates/topology.yaml index a7a0853..acfd960 100644 --- a/charts/kubernetes/templates/topology.yaml +++ b/charts/kubernetes/templates/topology.yaml @@ -1,15 +1,17 @@ --- +{{- if and .Values.prometheus.createConnection .Values.prometheus.url }} apiVersion: mission-control.flanksource.com/v1 kind: Connection metadata: name: {{ .Values.prometheus.connection }} spec: prometheus: - url: {{ .Values.prometheus.url | default .Values.prometheusURL }} + url: {{ .Values.prometheus.url }} {{- if .Values.prometheus.auth }} auth: {{ .Values.prometheus.auth | toYaml | nindent 6}} {{- end }} +{{- end }} --- apiVersion: canaries.flanksource.com/v1 kind: Topology diff --git a/charts/kubernetes/values.schema.json b/charts/kubernetes/values.schema.json index 7aca2de..8b334a5 100644 --- a/charts/kubernetes/values.schema.json +++ b/charts/kubernetes/values.schema.json @@ -1,301 +1,393 @@ { - "$schema": "http://json-schema.org/draft-07/schema#", - "title": "Schema for Kubernetes bundle for Flanksource Mission Control", - "type": "object", + "$schema": "https://json-schema.org/draft/2020-12/schema", "properties": { - "nameOverride": { + "clusterName": { "type": "string" }, "fullnameOverride": { "type": "string" }, + "kubeconfig": { + "properties": {}, + "type": "object" + }, "labels": { - "type": "object", "properties": {}, - "required": [] + "type": "object" }, - "clusterName": { - "type": "string" + "metrics": { + "properties": { + "enabled": { + "type": "boolean" + }, + "queries": { + "properties": { + "gke": { + "properties": { + "node_cpu": { + "type": "null" + } + }, + "type": "object" + }, + "prometheus": { + "properties": { + "cluster_cpu": { + "type": "string" + }, + "cluster_memory": { + "type": "string" + }, + "namespace_cpu": { + "type": "string" + }, + "namespace_memory": { + "type": "string" + }, + "node_cpu": { + "type": "string" + }, + "node_memory": { + "type": "string" + }, + "node_storage": { + "type": "string" + }, + "pod_cpu": { + "type": "string" + }, + "pod_memory": { + "type": "string" + } + }, + "type": "object" + } + }, + "type": "object" + }, + "type": { + "pattern": "^(gke|prometheus)$", + "type": "string" + } + }, + "type": "object" }, - "prometheusURL": { + "nameOverride": { "type": "string" }, - "topology": { - "type": "object", + "playbooks": { "properties": { - "name": { + "cleanupFailedPods": { + "type": "boolean" + }, + "deletePod": { + "type": "boolean" + }, + "edit_kubernetes_manifests": { + "properties": { + "enabled": { + "type": "boolean" + }, + "git_connection": { + "type": "string" + } + }, + "type": "object" + }, + "enabled": { + "type": "boolean" + }, + "podSnapshot": { + "type": "boolean" + }, + "requestNamespaceAccess": { + "type": "boolean" + }, + "restartDeployment": { + "type": "boolean" + }, + "scaleDeployment": { + "type": "boolean" + } + }, + "type": "object" + }, + "prometheus": { + "properties": { + "auth": { + "type": "null" + }, + "connection": { "type": "string" }, - "schedule": { + "createConnection": { + "type": "boolean" + }, + "labels": { "type": "string" }, - "icon": { + "url": { "type": "string" } }, - "required": [ - "name", - "schedule", - "icon" - ] + "type": "object" }, "scraper": { - "type": "object", "properties": { - "name": { - "type": "string" - }, - "exclusions": { - "type": "object", + "defaultExclusions": { "properties": { - "name": { - "type": "array", - "items": {} - }, - "namespace": { - "type": "array", - "items": {} - }, "kind": { - "type": "array", - "items": {} + "items": { + "type": "string" + }, + "type": "array" }, "labels": { - "type": "object", - "properties": {}, - "required": [] - } - }, - "required": [ - ] - }, - "defaultExclusions": { - "type": "object", - "properties": { + "properties": { + "canary-checker.flanksource.com/check-id": { + "type": "string" + }, + "canary-checker.flanksource.com/generated": { + "type": "string" + } + }, + "type": "object" + }, "name": { - "type": "array", - "items": {} + "type": "array" }, "namespace": { - "type": "array", - "items": {} - }, - "labels": { - "type": "object", - "properties": {}, - "required": [] - }, - "kind": { - "type": "array", - "items": { - "type": "string" - } + "type": "array" } }, - "required": [ - - ] + "type": "object" }, - "relationships": { - "type": "array", + "defaultRelationships": { "items": { - "type": "object", "properties": { "kind": { - "type": "object", "properties": { "expr": { "type": "string" - }, - "value": { - "type": "string" } }, - "required": [] + "type": "object" }, "name": { - "type": "object", "properties": { "expr": { "type": "string" - }, - "label": { - "type": "string" } }, - "required": [] + "type": "object" }, "namespace": { - "type": "object", "properties": { "expr": { "type": "string" - }, - "label": { - "type": "string" } }, - "required": [] + "type": "object" } }, - "required": [ - "kind", - "name" - ] - } + "type": "object" + }, + "type": "array" }, - "defaultRelationships": { - "type": "array", - "items": { - "type": "object", - "properties": { - "kind": { - "type": "object", - "properties": { - "expr": { + "defaultTransform": { + "properties": { + "changes": { + "properties": { + "exclude": { + "items": { "type": "string" }, - "value": { - "type": "string" - } + "type": "array" }, - "required": [] - }, - "name": { - "type": "object", - "properties": { - "expr": { - "type": "string" + "mapping": { + "items": { + "properties": { + "filter": { + "type": "string" + }, + "type": { + "type": "string" + } + }, + "type": "object" }, - "label": { - "type": "string" - } - }, - "required": [] + "type": "array" + } }, - "namespace": { - "type": "object", + "type": "object" + }, + "exclude": { + "type": "array" + }, + "mask": { + "type": "array" + }, + "relationship": { + "items": { "properties": { - "expr": { + "filter": { "type": "string" }, - "label": { - "type": "string" + "name": { + "properties": { + "expr": { + "type": "string" + } + }, + "type": "object" + }, + "type": { + "properties": { + "value": { + "type": "string" + } + }, + "type": "object" } }, - "required": [] - } - }, - "required": [ - "kind", - "name" - ] - } + "type": "object" + }, + "type": "array" + } + }, + "type": "object" }, "event": { - "type": "object", "properties": { "exclusions": { - "type": "object", "properties": { "reason": { - "type": "array", "items": { "type": "string" - } + }, + "type": "array" } }, - "required": [ - ] + "type": "object" }, "severityKeywords": { - "type": "object", "properties": { "error": { - "type": "array", "items": { "type": "string" - } + }, + "type": "array" }, "warn": { - "type": "array", "items": { "type": "string" - } + }, + "type": "array" } }, - "required": [ - "error", - "warn" - ] + "type": "object" } }, - "required": [ - "exclusions", - "severityKeywords" - ] + "type": "object" }, - "transform": { - "type": "object", + "exclusions": { "properties": { - "changes": { - "type": "object", - "properties": { - "exclude": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "required": [ - - ] + "kind": { + "type": "array" + }, + "labels": { + "properties": {}, + "type": "object" + }, + "name": { + "type": "array" + }, + "namespace": { + "type": "array" } }, - "required": [ - - ] + "type": "object" + }, + "name": { + "type": "string" + }, + "relationships": { + "type": "array" }, "retention": { - "type": "object", "properties": { "changes": { - "type": "array", + "type": "array" + }, + "defaultChanges": { "items": { - "type": "object", "properties": { - "name": { - "type": "string" - }, "age": { "type": "string" }, - "count": { - "type": "number" + "name": { + "type": "string" } }, - "required": [ - "name" - ] - } + "type": "object" + }, + "type": "array" + }, + "staleItemAge": { + "type": "string" + } + }, + "type": "object" + }, + "schedule": { + "type": "string" + }, + "transform": { + "properties": { + "changes": { + "properties": { + "exclude": { + "type": "array" + }, + "mapping": { + "type": "array" + } + }, + "type": "object" + }, + "exclude": { + "type": "array" + }, + "expr": { + "type": "string" + }, + "mask": { + "type": "array" + }, + "relationship": { + "type": "array" } }, - "required": [ - ] + "type": "object" } }, - "required": [ - "name", - "defaultExclusions", - "defaultRelationships" - ] + "type": "object" + }, + "topology": { + "properties": { + "icon": { + "type": "string" + }, + "name": { + "type": "string" + }, + "schedule": { + "type": "string" + } + }, + "type": "object" } }, - "required": [ - "clusterName", - "prometheusURL", - "topology", - "scraper" - ] + "title": "Schema for Kubernetes bundle for Flanksource Mission Control", + "type": "object" } diff --git a/charts/kubernetes/values.yaml b/charts/kubernetes/values.yaml index 35fd45f..4c1f08e 100644 --- a/charts/kubernetes/values.yaml +++ b/charts/kubernetes/values.yaml @@ -7,19 +7,12 @@ kubeconfig: {} clusterName: kubernetes -metrics: - enabled: true - -# Deprecated: use prometheus.url instead -prometheusURL: "" - -# Deprecated: use prometheus.labels instead -prometheusLabels: "" - prometheus: + createConnection: true connection: prometheus url: '' - # prometheus labels to inject: "label1=key1,label2=key2,label3=~key3" + # prometheus labels to inject: "\,label1=key1\,label2=key2\,label3=~key3" + # Note: comma(,) needs to be escaped as /, and prometheus.labels should always start with '/,' to work with existing labels labels: '' # Leave auth blank or use one of [username+password / bearer / oauth] auth: @@ -53,6 +46,26 @@ prometheus: # tokenURL: '' # params: {} +metrics: + enabled: true + # Allowed: [prometheus|gke] + type: prometheus # @schema pattern:^(gke|prometheus)$ + + queries: + prometheus: + cluster_cpu: '1000 * sum(rate(container_cpu_usage_seconds_total{container!=""{{.Values.prometheus.labels}}}[5m]))' + cluster_memory: 'sum(container_memory_working_set_bytes{container!=""{{.Values.prometheus.labels | default .Values.prometheusLabels}}})' + node_cpu: '1000 * sum(rate(container_cpu_usage_seconds_total{container!=""{{.Values.prometheus.labels | default .Values.prometheusLabels}}}[5m])) by (node)' + node_memory: 'sum(container_memory_working_set_bytes{container!="",pod!=""{{.Values.prometheus.labels | default .Values.prometheusLabels}}} * on(pod, namespace) group_left kube_pod_status_phase{phase="Running"{{.Values.prometheus.labels | default .Values.prometheusLabels}}} > 0) by (node)' + node_storage: 'max by (instance) (avg_over_time(node_filesystem_avail_bytes{mountpoint="/",fstype!="rootfs"{{.Values.prometheus.labels | default .Values.prometheusLabels}}}[5m]))' + pod_cpu: '1000 * sum(rate(container_cpu_usage_seconds_total{container!=""{{.Values.prometheus.labels | default .Values.prometheusLabels}}}[5m])) by (pod)' + pod_memory: 'sum(container_memory_working_set_bytes{container!=""{{.Values.prometheus.labels | default .Values.prometheusLabels}}}) by (pod)' + namespace_cpu: '1000 * sum(rate(container_cpu_usage_seconds_total{container!=""{{.Values.prometheus.labels | default .Values.prometheusLabels}}}[5m])) by (namespace)' + namespace_memory: 'sum(container_memory_working_set_bytes{container!="",pod!=""{{.Values.prometheus.labels | default .Values.prometheusLabels}}} * on(pod, namespace) group_left kube_pod_status_phase{phase="Running"{{.Values.prometheus.labels | default .Values.prometheusLabels}}} > 0) by (namespace)' + gke: + node_cpu: + + topology: name: cluster schedule: "@every 5m"