From eeaff64f928257877feb1627609c50c5443187b9 Mon Sep 17 00:00:00 2001 From: Leon <82407168+sed-i@users.noreply.github.com> Date: Wed, 22 Nov 2023 12:44:18 -0500 Subject: [PATCH] Add config options for ingestion rate limits (#324) --- .gitignore | 1 + .jujuignore | 1 + config.yaml | 22 +++++++++ ...23-11-21 config options for rate limits.md | 48 +++++++++++++++++++ doc/adr/README.md | 37 ++++++++++++++ src/charm.py | 2 + src/config_builder.py | 25 +++++++++- 7 files changed, 135 insertions(+), 1 deletion(-) create mode 100644 doc/adr/2023-11-21 config options for rate limits.md create mode 100644 doc/adr/README.md diff --git a/.gitignore b/.gitignore index 5e1c26f12..c8b1462fa 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +.idea/ *~ *swp *.charm diff --git a/.jujuignore b/.jujuignore index 38fa87067..8bac1fe12 100644 --- a/.jujuignore +++ b/.jujuignore @@ -3,3 +3,4 @@ *.charm .#* cos-tool* +/doc diff --git a/config.yaml b/config.yaml index 2c32de336..f2a0f2b8d 100644 --- a/config.yaml +++ b/config.yaml @@ -15,3 +15,25 @@ options: automatically deduced from it). See https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ type: string + ingestion-rate-mb: + description: | + Per-user ingestion rate limit (MB/s). + This config option matches exactly Loki's `ingestion_rate_mb`, except that it is an integer here + (Loki takes a float). + This same value is used internally for setting `per_stream_rate_limit`. Loki uses a default of 3 for + `ingestion_rate_mb`, but 4 for `per_stream_rate_limit`. For this reason we use 4 as the default here. + + Ref: https://grafana.com/docs/loki/latest/configure/#limits_config + type: int + default: 4 + ingestion-burst-size-mb: + description: | + This config option matches exactly Loki's `ingestion_burst_size_mb`, except that it is an integer here + (Loki takes a float). + This same value is used internally for setting `per_stream_rate_limit_burst`. Loki uses a default of 6 for + `ingestion_burst_size_mb`, but 15 for `per_stream_rate_limit_burst`. For this reason we use 15 as the default + here. + + Ref: https://grafana.com/docs/loki/latest/configure/#limits_config + type: int + default: 15 diff --git a/doc/adr/2023-11-21 config options for rate limits.md b/doc/adr/2023-11-21 config options for rate limits.md new file mode 100644 index 000000000..d96f3db18 --- /dev/null +++ b/doc/adr/2023-11-21 config options for rate limits.md @@ -0,0 +1,48 @@ +# Charm config options for ingestion rate limits + +## Context and Problem Statement +The default ingestion rate limits for Loki result in resource utilization capped at ~30% +of a 8cpu16gb VM. Juju admins should have a way for configuring the rate limits to allow +for higher resource utilization. + +## Considered Options + +- Add individual config options on a per-need basis +- Config overlay option (deep merge) +- Juju feature request for nested config options + +## Decision Outcome + +Chosen option: "Add individual config options on a per-need basis", because we value UX over versatility. + +### Consequences + +* Good, because config options are easy to use. +* Good, because involves a maintainable, small volume of changes. +* Bad, because does not fully address the inherent versatility of Loki config. + +## Pros and Cons of the Options + +### Add individual config options on a per-need basis + +* Good, because config options are easy to use. +* Good, because involves a maintainable, small volume of changes. +* Bad, because does not fully address the inherent versatility of Loki config. + +### Config overlay option (deep merge) + +* Good, because would have a very small code change. +* Good, because offers the Juju admin the full versatility of Loki config. +* Bad, because config option would be not as easy to use. +* Bad, because user-set values may result in undesired overrides of charm options (and + we do not want to manage an exclude-/allow-list). + +### Juju feature request for nested config options +* Good, because we could mirror the entire Loki config as (nested) charm options. +* Bad, because we would need to maintain a "schema" compatibility with Loki. +* Bad, because charm revision may become coupled to workload version. +* Bad, because the feature request is likely to be rejected. + +## More Information +- https://juju.is/docs/sdk/config-yaml#heading--options +- https://grafana.com/docs/loki/latest/configure/#limits_config diff --git a/doc/adr/README.md b/doc/adr/README.md new file mode 100644 index 000000000..ecf090914 --- /dev/null +++ b/doc/adr/README.md @@ -0,0 +1,37 @@ +ADR template is based on https://adr.github.io/madr/examples.html. + +```markdown +# ADR topic + +## Context and Problem Statement + + +## Considered Options + +- Option 1 +- Option 2 + +## Decision Outcome + +Chosen option: ..., because ... + +### Consequences + +* Good, because ... +* Good, because ... +* Bad, because ... + +## Pros and Cons of the Options + +### Option 1 + +* Good, because ... +* Bad, because ... + +### Option 2 + +* Good, because ... +* Bad, because ... + +## More Information +``` diff --git a/src/charm.py b/src/charm.py index 2d5f2b5c4..5f4d640aa 100755 --- a/src/charm.py +++ b/src/charm.py @@ -309,6 +309,8 @@ def _configure(self): # noqa: C901 instance_addr=self.hostname, alertmanager_url=self._alerting_config(), external_url=self._external_url, + ingestion_rate_mb=int(self.config["ingestion-rate-mb"]), + ingestion_burst_size_mb=int(self.config["ingestion-burst-size-mb"]), http_tls=(self.server_cert.cert is not None), ).build() diff --git a/src/config_builder.py b/src/config_builder.py index f77d9a6d7..a774d1276 100644 --- a/src/config_builder.py +++ b/src/config_builder.py @@ -33,12 +33,21 @@ class ConfigBuilder: _auth_enabled: bool = False def __init__( - self, instance_addr: str, alertmanager_url: str, external_url: str, http_tls: bool = False + self, + *, + instance_addr: str, + alertmanager_url: str, + external_url: str, + ingestion_rate_mb: int, + ingestion_burst_size_mb: int, + http_tls: bool = False, ): """Init method.""" self.instance_addr = instance_addr self.alertmanager_url = alertmanager_url self.external_url = external_url + self.ingestion_rate_mb = ingestion_rate_mb + self.ingestion_burst_size_mb = ingestion_burst_size_mb self.http_tls = http_tls def build(self) -> dict: @@ -52,6 +61,7 @@ def build(self) -> dict: "schema_config": self._schema_config, "server": self._server, "storage_config": self._storage_config, + "limits_config": self._limits_config, } @property @@ -121,3 +131,16 @@ def _storage_config(self) -> dict: "boltdb": {"directory": BOLTDB_DIR}, "filesystem": {"directory": CHUNKS_DIR}, } + + @property + def _limits_config(self) -> dict: + # Ref: https://grafana.com/docs/loki/latest/configure/#limits_config + return { + # For convenience, we use an integer but Loki takes a float + "ingestion_rate_mb": float(self.ingestion_rate_mb), + "ingestion_burst_size_mb": float(self.ingestion_burst_size_mb), + # The per-stream limits are intentionally set to match the per-user limits, to simplify UX and address the + # case of one stream per user. + "per_stream_rate_limit": f"{self.ingestion_rate_mb}MB", + "per_stream_rate_limit_burst": f"{self.ingestion_burst_size_mb}MB", + }