Skip to content

Commit

Permalink
Merge pull request #1807 from eclipse-ditto/feature/1806-prometheus-e…
Browse files Browse the repository at this point in the history
…xporters-for-search-counts

#1806 Provide configurable search count metrics to be exposed via Prometheus
  • Loading branch information
thjaeckle committed Nov 30, 2023
2 parents 62728f3 + 459376e commit 9d5ec89
Show file tree
Hide file tree
Showing 37 changed files with 1,051 additions and 107 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/helm-chart.yml
Expand Up @@ -65,7 +65,7 @@ jobs:
k8s:
- v1.26.6
- v1.27.3
- v1.28.3
- v1.28.0
steps:
- name: Checkout
uses: actions/checkout@v4
Expand Down
Expand Up @@ -39,6 +39,11 @@ public final class DittoSystemProperties {
*/
public static final String DITTO_LIMITS_POLICY_IMPORTS_LIMIT = "ditto.limits.policy.imports-limit";

/**
* System property name of the property defining a common prefix for all metrics Ditto reports via Prometheus.
*/
public static final String DITTO_METRICS_METRIC_PREFIX = "ditto.metrics.metric-prefix";

private DittoSystemProperties() {
throw new AssertionError();
}
Expand Down
Expand Up @@ -24,6 +24,17 @@
import javax.annotation.Nullable;
import javax.annotation.concurrent.NotThreadSafe;

import org.apache.pekko.Done;
import org.apache.pekko.actor.ActorRef;
import org.apache.pekko.actor.ActorSystem;
import org.apache.pekko.actor.CoordinatedShutdown;
import org.apache.pekko.actor.Props;
import org.apache.pekko.cluster.Cluster;
import org.apache.pekko.cluster.pubsub.DistributedPubSub;
import org.apache.pekko.http.javadsl.Http;
import org.apache.pekko.http.javadsl.model.Uri;
import org.apache.pekko.management.cluster.bootstrap.ClusterBootstrap;
import org.apache.pekko.management.javadsl.PekkoManagement;
import org.eclipse.ditto.base.model.common.DittoSystemProperties;
import org.eclipse.ditto.base.model.signals.FeatureToggle;
import org.eclipse.ditto.base.service.config.ServiceSpecificConfig;
Expand All @@ -35,6 +46,7 @@
import org.eclipse.ditto.internal.utils.config.ScopedConfig;
import org.eclipse.ditto.internal.utils.config.raw.RawConfigSupplier;
import org.eclipse.ditto.internal.utils.health.status.StatusSupplierActor;
import org.eclipse.ditto.internal.utils.metrics.config.MetricsConfig;
import org.eclipse.ditto.internal.utils.metrics.prometheus.PrometheusReporterRoute;
import org.eclipse.ditto.internal.utils.tracing.DittoTracing;
import org.slf4j.Logger;
Expand All @@ -47,17 +59,6 @@
import com.typesafe.config.ConfigValue;
import com.typesafe.config.ConfigValueFactory;

import org.apache.pekko.Done;
import org.apache.pekko.actor.ActorRef;
import org.apache.pekko.actor.ActorSystem;
import org.apache.pekko.actor.CoordinatedShutdown;
import org.apache.pekko.actor.Props;
import org.apache.pekko.cluster.Cluster;
import org.apache.pekko.cluster.pubsub.DistributedPubSub;
import org.apache.pekko.http.javadsl.Http;
import org.apache.pekko.http.javadsl.model.Uri;
import org.apache.pekko.management.cluster.bootstrap.ClusterBootstrap;
import org.apache.pekko.management.javadsl.PekkoManagement;
import ch.qos.logback.classic.LoggerContext;
import kamon.Kamon;
import kamon.prometheus.PrometheusReporter;
Expand Down Expand Up @@ -121,10 +122,6 @@ protected DittoService(final Logger logger, final String serviceName, final Stri
this.rootActorName = argumentNotEmpty(rootActorName, "root actor name");
rawConfig = determineRawConfig();
serviceSpecificConfig = getServiceSpecificConfig(tryToGetDittoConfigOrEmpty(rawConfig));
if (null == serviceSpecificConfig) {
throw new DittoConfigError("The service specific config must not be null!");
}

logger.debug("Using service specific config: <{}>.", serviceSpecificConfig);
}

Expand Down Expand Up @@ -208,6 +205,7 @@ private static ScopedConfig getDittoConfigOrEmpty(final Config rawConfig) {
private ActorSystem doStart() {
logRuntimeParameters();
final var actorSystemConfig = appendDittoInfo(appendPekkoPersistenceMongoUriToRawConfig());
injectSystemPropertiesLimits(serviceSpecificConfig);
startKamon();
final var actorSystem = createActorSystem(actorSystemConfig);
initializeActorSystem(actorSystem);
Expand Down Expand Up @@ -390,8 +388,6 @@ private void startServiceRootActors(final ActorSystem actorSystem, final C servi

final ActorRef pubSubMediator = getDistributedPubSubMediatorActor(actorSystem);

injectSystemPropertiesLimits(serviceSpecificConfig);

startMainRootActor(actorSystem, getMainRootActorProps(serviceSpecificConfig, pubSubMediator));
RootActorStarter.get(actorSystem, ScopedConfig.dittoExtension(actorSystem.settings().config())).execute();
});
Expand All @@ -418,6 +414,8 @@ private void injectSystemPropertiesLimits(final C serviceSpecificConfig) {
Boolean.toString(rawConfig.getBoolean(FeatureToggle.MERGE_THINGS_ENABLED)));
System.setProperty(DittoSystemProperties.DITTO_LIMITS_POLICY_IMPORTS_LIMIT,
Integer.toString(limitsConfig.getPolicyImportsLimit()));
final MetricsConfig metricsConfig = serviceSpecificConfig.getMetricsConfig();
System.setProperty(DittoSystemProperties.DITTO_METRICS_METRIC_PREFIX, metricsConfig.getMetricPrefix());
}

private static ActorRef getDistributedPubSubMediatorActor(final ActorSystem actorSystem) {
Expand Down
Expand Up @@ -123,7 +123,7 @@ public int hashCode() {
@Override
public String toString() {
return getClass().getSimpleName() + " [" +
", thingsMaxSize=" + thingsMaxSize +
"thingsMaxSize=" + thingsMaxSize +
", policiesMaxSize=" + policiesMaxSize +
", messagesMaxSize=" + messagesMaxSize +
", thingsSearchDefaultPageSize=" + thingsSearchDefaultPageSize +
Expand Down
2 changes: 1 addition & 1 deletion deployment/helm/ditto/Chart.yaml
Expand Up @@ -16,7 +16,7 @@ description: |
A digital twin is a virtual, cloud based, representation of his real world counterpart
(real world “Things”, e.g. devices like sensors, smart heating, connected cars, smart grids, EV charging stations etc).
type: application
version: 3.4.2 # chart version is effectively set by release-job
version: 3.4.3 # chart version is effectively set by release-job
appVersion: 3.4.1
keywords:
- iot-chart
Expand Down
10 changes: 7 additions & 3 deletions deployment/helm/ditto/templates/connectivity-deployment.yaml
Expand Up @@ -82,10 +82,14 @@ spec:
env:
{{- if not .Values.global.logging.customConfigFile.enabled }}
- name: DITTO_LOGGING_DISABLE_SYSOUT_LOG
value: "{{ if .Values.global.logging.sysout.enabled }}false{{ else }}true{{ end }}"
value: "{{ printf "%t" .Values.global.logging.sysout.enabled }}"
- name: DITTO_LOGGING_FILE_APPENDER
value: "{{ if .Values.global.logging.logFiles.enabled }}true{{ else }}false{{ end }}"
value: "{{ printf "%t" .Values.global.logging.logFiles.enabled }}"
{{- end }}
- name: DITTO_METRICS_METRIC_PREFIX
value: "{{ .Values.global.metrics.metricsPrefix }}"
- name: SYSTEM_METRICS_ENABLED
value: "{{ .Values.global.metrics.systemMetrics.enabled }}"
- name: DITTO_TRACING_ENABLED
value: "{{ .Values.global.tracing.enabled }}"
- name: DITTO_TRACING_OTEL_TRACE_REPORTER_ENABLED
Expand Down Expand Up @@ -137,7 +141,7 @@ spec:
{{- end }}
{{ join " " .Values.connectivity.systemProps }}
- name: MONGO_DB_SSL_ENABLED
value: "{{ if .Values.dbconfig.connectivity.ssl }}true{{ else }}false{{ end }}"
value: "{{ printf "%t" .Values.dbconfig.connectivity.ssl }}"
- name: MONGO_DB_URI
valueFrom:
secretKeyRef:
Expand Down
8 changes: 6 additions & 2 deletions deployment/helm/ditto/templates/gateway-deployment.yaml
Expand Up @@ -83,10 +83,14 @@ spec:
env:
{{- if not .Values.global.logging.customConfigFile.enabled }}
- name: DITTO_LOGGING_DISABLE_SYSOUT_LOG
value: "{{ if .Values.global.logging.sysout.enabled }}false{{ else }}true{{ end }}"
value: "{{ printf "%t" .Values.global.logging.sysout.enabled }}"
- name: DITTO_LOGGING_FILE_APPENDER
value: "{{ if .Values.global.logging.logFiles.enabled }}true{{ else }}false{{ end }}"
value: "{{ printf "%t" .Values.global.logging.logFiles.enabled }}"
{{- end }}
- name: DITTO_METRICS_METRIC_PREFIX
value: "{{ .Values.global.metrics.metricsPrefix }}"
- name: SYSTEM_METRICS_ENABLED
value: "{{ .Values.global.metrics.systemMetrics.enabled }}"
- name: DITTO_TRACING_ENABLED
value: "{{ .Values.global.tracing.enabled }}"
- name: DITTO_TRACING_OTEL_TRACE_REPORTER_ENABLED
Expand Down
10 changes: 7 additions & 3 deletions deployment/helm/ditto/templates/policies-deployment.yaml
Expand Up @@ -82,10 +82,14 @@ spec:
env:
{{- if not .Values.global.logging.customConfigFile.enabled }}
- name: DITTO_LOGGING_DISABLE_SYSOUT_LOG
value: "{{ if .Values.global.logging.sysout.enabled }}false{{ else }}true{{ end }}"
value: "{{ printf "%t" .Values.global.logging.sysout.enabled }}"
- name: DITTO_LOGGING_FILE_APPENDER
value: "{{ if .Values.global.logging.logFiles.enabled }}true{{ else }}false{{ end }}"
value: "{{ printf "%t" .Values.global.logging.logFiles.enabled }}"
{{- end }}
- name: DITTO_METRICS_METRIC_PREFIX
value: "{{ .Values.global.metrics.metricsPrefix }}"
- name: SYSTEM_METRICS_ENABLED
value: "{{ .Values.global.metrics.systemMetrics.enabled }}"
- name: DITTO_TRACING_ENABLED
value: "{{ .Values.global.tracing.enabled }}"
- name: DITTO_TRACING_OTEL_TRACE_REPORTER_ENABLED
Expand Down Expand Up @@ -155,7 +159,7 @@ spec:
{{- end }}
{{ join " " .Values.policies.systemProps }}
- name: MONGO_DB_SSL_ENABLED
value: "{{ if .Values.dbconfig.policies.ssl }}true{{ else }}false{{ end }}"
value: "{{ printf "%t" .Values.dbconfig.policies.ssl }}"
- name: MONGO_DB_URI
valueFrom:
secretKeyRef:
Expand Down
10 changes: 7 additions & 3 deletions deployment/helm/ditto/templates/things-deployment.yaml
Expand Up @@ -82,10 +82,14 @@ spec:
env:
{{- if not .Values.global.logging.customConfigFile.enabled }}
- name: DITTO_LOGGING_DISABLE_SYSOUT_LOG
value: "{{ if .Values.global.logging.sysout.enabled }}false{{ else }}true{{ end }}"
value: "{{ printf "%t" .Values.global.logging.sysout.enabled }}"
- name: DITTO_LOGGING_FILE_APPENDER
value: "{{ if .Values.global.logging.logFiles.enabled }}true{{ else }}false{{ end }}"
value: "{{ printf "%t" .Values.global.logging.logFiles.enabled }}"
{{- end }}
- name: DITTO_METRICS_METRIC_PREFIX
value: "{{ .Values.global.metrics.metricsPrefix }}"
- name: SYSTEM_METRICS_ENABLED
value: "{{ .Values.global.metrics.systemMetrics.enabled }}"
- name: DITTO_TRACING_ENABLED
value: "{{ .Values.global.tracing.enabled }}"
- name: DITTO_TRACING_OTEL_TRACE_REPORTER_ENABLED
Expand Down Expand Up @@ -156,7 +160,7 @@ spec:
'-Dditto.things.wot.to-thing-description.json-template={{ .Values.things.config.wot.tdJsonTemplate | replace "\n" "" | replace "\\\"" "\"" }}'
{{ join " " .Values.things.systemProps }}
- name: MONGO_DB_SSL_ENABLED
value: "{{ if .Values.dbconfig.things.ssl }}true{{ else }}false{{ end }}"
value: "{{ printf "%t" .Values.dbconfig.things.ssl }}"
- name: MONGO_DB_URI
valueFrom:
secretKeyRef:
Expand Down
25 changes: 22 additions & 3 deletions deployment/helm/ditto/templates/thingssearch-deployment.yaml
Expand Up @@ -82,10 +82,14 @@ spec:
env:
{{- if not .Values.global.logging.customConfigFile.enabled }}
- name: DITTO_LOGGING_DISABLE_SYSOUT_LOG
value: "{{ if .Values.global.logging.sysout.enabled }}false{{ else }}true{{ end }}"
value: "{{ printf "%t" .Values.global.logging.sysout.enabled }}"
- name: DITTO_LOGGING_FILE_APPENDER
value: "{{ if .Values.global.logging.logFiles.enabled }}true{{ else }}false{{ end }}"
value: "{{ printf "%t" .Values.global.logging.logFiles.enabled }}"
{{- end }}
- name: DITTO_METRICS_METRIC_PREFIX
value: "{{ .Values.global.metrics.metricsPrefix }}"
- name: SYSTEM_METRICS_ENABLED
value: "{{ .Values.global.metrics.systemMetrics.enabled }}"
- name: DITTO_TRACING_ENABLED
value: "{{ .Values.global.tracing.enabled }}"
- name: DITTO_TRACING_OTEL_TRACE_REPORTER_ENABLED
Expand Down Expand Up @@ -132,9 +136,20 @@ spec:
{{- if .Values.global.logging.customConfigFile.enabled }}
-Dlogback.configurationFile=/opt/ditto/{{ .Values.global.logging.customConfigFile.fileName }}
{{- end }}
{{- range $key, $value := .Values.thingsSearch.config.operatorMetrics.customMetrics }}
"{{ printf "%s%s%s=%t" "-Dditto.search.operator-metrics.custom-metrics." $key ".enabled" $value.enabled }}"
"{{ printf "%s%s%s=%s" "-Dditto.search.operator-metrics.custom-metrics." $key ".scrape-interval" $value.scrapeInterval }}"
{{- range $index, $namespace := $value.namespaces }}
"{{ printf "%s%s%s%d=%s" "-Dditto.search.operator-metrics.custom-metrics." $key ".namespaces." $index $namespace }}"
{{- end }}
"{{ printf "%s%s%s=%s" "-Dditto.search.operator-metrics.custom-metrics." $key ".filter" $value.filter }}"
{{- range $tagKey, $tagValue := $value.tags }}
"{{ printf "%s%s%s%s=%s" "-Dditto.search.operator-metrics.custom-metrics." $key ".tags." $tagKey $tagValue }}"
{{- end }}
{{- end }}
{{ join " " .Values.thingsSearch.systemProps }}
- name: MONGO_DB_SSL_ENABLED
value: "{{ if .Values.dbconfig.thingsSearch.ssl }}true{{ else }}false{{ end }}"
value: "{{ printf "%t" .Values.dbconfig.thingsSearch.ssl }}"
- name: MONGO_DB_URI
valueFrom:
secretKeyRef:
Expand Down Expand Up @@ -204,6 +219,10 @@ spec:
value: "{{ .Values.thingsSearch.config.updater.stream.retrievalParallelism }}"
- name: THINGS_SEARCH_UPDATER_STREAM_PERSISTENCE_PARALLELISM
value: "{{ .Values.thingsSearch.config.updater.stream.persistence.parallelism }}"
- name: THINGS_SEARCH_OPERATOR_METRICS_ENABLED
value: "{{ .Values.thingsSearch.config.operatorMetrics.enabled }}"
- name: THINGS_SEARCH_OPERATOR_METRICS_SCRAPE_INTERVAL
value: "{{ .Values.thingsSearch.config.operatorMetrics.scrapeInterval }}"
- name: ACTIVITY_CHECK_INTERVAL
value: "{{ .Values.thingsSearch.config.updater.activityCheckInterval }}"
- name: BACKGROUND_SYNC_ENABLED
Expand Down
24 changes: 24 additions & 0 deletions deployment/helm/ditto/values.yaml
Expand Up @@ -135,6 +135,14 @@ global:
enabled: true
# fileName passed as Java system property "-Dlogback.configurationFile"
fileName: logback.xml
# metrics configuration for Ditto
metrics:
# metricsPrefix defines a prefix to use for all Ditto created metrics (counters, gauges, histograms)
metricsPrefix: ""
# systemMetrics contains the configuration for obtaining system metrics (via Kamon)
systemMetrics:
# enabled if enabled, system metrics are gathered
enabled: true
# tracing configuration for Ditto
tracing:
# enabled whether tracing (via OpenTelemetry) is enabled
Expand Down Expand Up @@ -1069,6 +1077,22 @@ thingsSearch:
throughput: 100
# period the throttle period
period: 30s
# operatorMetrics contains configuration for operator defined custom metrics, using a search "count" with namespaces and filter
operatorMetrics:
# enabled configures whether operator metrics should be enabled or not
enabled: true
# scrapeInterval defines the default scrape interval if a "customMetric" did not specify a custom scrape interval
scrapeInterval: 15m
# customMetrics holds a map of metric-names as key (e.g. "ditto_my_awesome_things") and custom metric config as value
customMetrics:
# ditto_my_awesome_things:
# enabled: true
# scrapeInterval: 5m
# namespaces:
# - "org.eclipse.ditto"
# filter: "eq(attributes/awesome,true)"
# tags:
# foo: bar


## ----------------------------------------------------------------------------
Expand Down
@@ -1,7 +1,7 @@
---
title: Operating Ditto
tags: [installation]
keywords: operating, docker, docker-compose, devops, logging, logstash, elk, monitoring, prometheus, grafana
keywords: operating, docker, docker-compose, devops, logging, logstash, elk, monitoring, prometheus, grafana, tracing, metrics
permalink: installation-operating.html
---

Expand Down Expand Up @@ -423,6 +423,61 @@ Have a look at the
[example Grafana dashboards](https://github.com/eclipse-ditto/ditto/tree/master/deployment/operations/grafana-dashboards)
and build and share new ones back to the Ditto community.

### Operator defined custom metrics

Starting with Ditto 3.5.0, it is possible to configure "custom metrics" which are gathered by counting things matching
a defined namespace/filter combination.
This is configured via the [search](architecture-services-things-search.html) service configuration and builds on the
[count things](basic-search.html#search-count-queries) functionality.

The idea behind this is that you want to show some statistic (e.g. in Grafana) about the amount of "Things" managed in
Ditto fulfilling a certain condition.

This would be an example search service configuration snippet for e.g. providing a metric named
`all_produced_and_not_installed_devices` defining a query on existence of a `production-date` and absence of
an `installation-date` attribute:
```hocon
ditto {
search {
operator-metrics {
enabled = true
scrape-interval = 15m
custom-metrics {
all_produced_and_not_installed_devices {
scrape-interval = 5m # overwrite scrape interval, run each 5 minutes
namespaces = [
"org.eclipse.ditto.smokedetectors"
"org.eclipse.ditto.cameras"
]
filter = "and(exists(attributes/production-date),not(exists(attributes/installation-date)))"
tags {
company = "acme-corp"
}
}
}
}
}
}
```

In order to add custom metrics via System properties, the following example shows how the above metric can be configured:
```
-Dditto.search.operator-metrics.custom-metrics.all_produced_and_not_installed_devices.enabled=true
-Dditto.search.operator-metrics.custom-metrics.all_produced_and_not_installed_devices.scrape-interval=5m
-Dditto.search.operator-metrics.custom-metrics.all_produced_and_not_installed_devices.namespaces.0=org.eclipse.ditto.smokedetectors
-Dditto.search.operator-metrics.custom-metrics.all_produced_and_not_installed_devices.namespaces.1=org.eclipse.ditto.cameras
-Dditto.search.operator-metrics.custom-metrics.all_produced_and_not_installed_devices.filter=and(exists(attributes/production-date),not(exists(attributes/installation-date)))
-Dditto.search.operator-metrics.custom-metrics.all_produced_and_not_installed_devices.tags.company=acme-corp
```

Ditto will perform a [count things operation](basic-search.html#search-count-queries) each `5m` (5 minutes), providing
a gauge named `all_produced_and_not_installed_devices` with the count of the query, adding the tag `company="acme-corp"`.

In Prometheus format this would look like:
```
all_produced_and_not_installed_devices{company="acme-corp"} 42.0
```

## Tracing

Ditto supports reading and propagating [W3C trace context](https://www.w3.org/TR/trace-context/) headers at the
Expand Down
4 changes: 4 additions & 0 deletions internal/utils/config/src/main/resources/ditto-metrics.conf
Expand Up @@ -2,6 +2,10 @@ ditto.metrics {
systemMetrics.enabled = true
systemMetrics.enabled = ${?SYSTEM_METRICS_ENABLED}

# the metric prefix to apply for all gathered metrics in Ditto provided to Prometheus
metric-prefix = ""
metric-prefix = ${?DITTO_METRICS_METRIC_PREFIX}

prometheus {
enabled = true
enabled = ${?PROMETHEUS_ENABLED}
Expand Down

0 comments on commit 9d5ec89

Please sign in to comment.