From eeaff64f928257877feb1627609c50c5443187b9 Mon Sep 17 00:00:00 2001
From: Leon <82407168+sed-i@users.noreply.github.com>
Date: Wed, 22 Nov 2023 12:44:18 -0500
Subject: [PATCH] Add config options for ingestion rate limits (#324)

---
 .gitignore                                    |  1 +
 .jujuignore                                   |  1 +
 config.yaml                                   | 22 +++++++++
 ...23-11-21 config options for rate limits.md | 48 +++++++++++++++++++
 doc/adr/README.md                             | 37 ++++++++++++++
 src/charm.py                                  |  2 +
 src/config_builder.py                         | 25 +++++++++-
 7 files changed, 135 insertions(+), 1 deletion(-)
 create mode 100644 doc/adr/2023-11-21 config options for rate limits.md
 create mode 100644 doc/adr/README.md

diff --git a/.gitignore b/.gitignore
index 5e1c26f12..c8b1462fa 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
+.idea/
 *~
 *swp
 *.charm
diff --git a/.jujuignore b/.jujuignore
index 38fa87067..8bac1fe12 100644
--- a/.jujuignore
+++ b/.jujuignore
@@ -3,3 +3,4 @@
 *.charm
 .#*
 cos-tool*
+/doc
diff --git a/config.yaml b/config.yaml
index 2c32de336..f2a0f2b8d 100644
--- a/config.yaml
+++ b/config.yaml
@@ -15,3 +15,25 @@ options:
       automatically deduced from it).
       See https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
     type: string
+  ingestion-rate-mb:
+    description: |
+      Per-user ingestion rate limit (MB/s).
+      This config option matches exactly Loki's `ingestion_rate_mb`, except that it is an integer here
+      (Loki takes a float).
+      This same value is used internally for setting `per_stream_rate_limit`. Loki uses a default of 3 for
+      `ingestion_rate_mb`, but 4 for `per_stream_rate_limit`. For this reason we use 4 as the default here.
+      
+      Ref: https://grafana.com/docs/loki/latest/configure/#limits_config
+    type: int
+    default: 4
+  ingestion-burst-size-mb:
+    description: |
+      This config option matches exactly Loki's `ingestion_burst_size_mb`, except that it is an integer here
+      (Loki takes a float).
+      This same value is used internally for setting `per_stream_rate_limit_burst`. Loki uses a default of 6 for 
+      `ingestion_burst_size_mb`, but 15 for `per_stream_rate_limit_burst`. For this reason we use 15 as the default
+      here.
+      
+      Ref: https://grafana.com/docs/loki/latest/configure/#limits_config
+    type: int
+    default: 15
diff --git a/doc/adr/2023-11-21 config options for rate limits.md b/doc/adr/2023-11-21 config options for rate limits.md
new file mode 100644
index 000000000..d96f3db18
--- /dev/null
+++ b/doc/adr/2023-11-21 config options for rate limits.md	
@@ -0,0 +1,48 @@
+# Charm config options for ingestion rate limits
+
+## Context and Problem Statement
+The default ingestion rate limits for Loki result in resource utilization capped at ~30%
+of a 8cpu16gb VM. Juju admins should have a way for configuring the rate limits to allow
+for higher resource utilization.
+
+## Considered Options
+
+- Add individual config options on a per-need basis
+- Config overlay option (deep merge)
+- Juju feature request for nested config options
+
+## Decision Outcome
+
+Chosen option: "Add individual config options on a per-need basis", because we value UX over versatility.
+
+### Consequences
+
+* Good, because config options are easy to use.
+* Good, because involves a maintainable, small volume of changes.
+* Bad, because does not fully address the inherent versatility of Loki config.
+
+## Pros and Cons of the Options
+
+### Add individual config options on a per-need basis
+
+* Good, because config options are easy to use.
+* Good, because involves a maintainable, small volume of changes.
+* Bad, because does not fully address the inherent versatility of Loki config.
+
+### Config overlay option (deep merge)
+
+* Good, because would have a very small code change.
+* Good, because offers the Juju admin the full versatility of Loki config.
+* Bad, because config option would be not as easy to use.
+* Bad, because user-set values may result in undesired overrides of charm options (and 
+  we do not want to manage an exclude-/allow-list).
+
+### Juju feature request for nested config options
+* Good, because we could mirror the entire Loki config as (nested) charm options.
+* Bad, because we would need to maintain a "schema" compatibility with Loki.
+* Bad, because charm revision may become coupled to workload version.
+* Bad, because the feature request is likely to be rejected.
+
+## More Information
+- https://juju.is/docs/sdk/config-yaml#heading--options
+- https://grafana.com/docs/loki/latest/configure/#limits_config
diff --git a/doc/adr/README.md b/doc/adr/README.md
new file mode 100644
index 000000000..ecf090914
--- /dev/null
+++ b/doc/adr/README.md
@@ -0,0 +1,37 @@
+ADR template is based on https://adr.github.io/madr/examples.html.
+
+```markdown
+# ADR topic
+
+## Context and Problem Statement
+
+
+## Considered Options
+
+- Option 1
+- Option 2
+
+## Decision Outcome
+
+Chosen option: ..., because ...
+
+### Consequences
+
+* Good, because ...
+* Good, because ...
+* Bad, because ...
+
+## Pros and Cons of the Options
+
+### Option 1
+
+* Good, because ...
+* Bad, because ...
+
+### Option 2
+
+* Good, because ...
+* Bad, because ...
+
+## More Information
+```
diff --git a/src/charm.py b/src/charm.py
index 2d5f2b5c4..5f4d640aa 100755
--- a/src/charm.py
+++ b/src/charm.py
@@ -309,6 +309,8 @@ def _configure(self):  # noqa: C901
             instance_addr=self.hostname,
             alertmanager_url=self._alerting_config(),
             external_url=self._external_url,
+            ingestion_rate_mb=int(self.config["ingestion-rate-mb"]),
+            ingestion_burst_size_mb=int(self.config["ingestion-burst-size-mb"]),
             http_tls=(self.server_cert.cert is not None),
         ).build()
 
diff --git a/src/config_builder.py b/src/config_builder.py
index f77d9a6d7..a774d1276 100644
--- a/src/config_builder.py
+++ b/src/config_builder.py
@@ -33,12 +33,21 @@ class ConfigBuilder:
     _auth_enabled: bool = False
 
     def __init__(
-        self, instance_addr: str, alertmanager_url: str, external_url: str, http_tls: bool = False
+        self,
+        *,
+        instance_addr: str,
+        alertmanager_url: str,
+        external_url: str,
+        ingestion_rate_mb: int,
+        ingestion_burst_size_mb: int,
+        http_tls: bool = False,
     ):
         """Init method."""
         self.instance_addr = instance_addr
         self.alertmanager_url = alertmanager_url
         self.external_url = external_url
+        self.ingestion_rate_mb = ingestion_rate_mb
+        self.ingestion_burst_size_mb = ingestion_burst_size_mb
         self.http_tls = http_tls
 
     def build(self) -> dict:
@@ -52,6 +61,7 @@ def build(self) -> dict:
             "schema_config": self._schema_config,
             "server": self._server,
             "storage_config": self._storage_config,
+            "limits_config": self._limits_config,
         }
 
     @property
@@ -121,3 +131,16 @@ def _storage_config(self) -> dict:
             "boltdb": {"directory": BOLTDB_DIR},
             "filesystem": {"directory": CHUNKS_DIR},
         }
+
+    @property
+    def _limits_config(self) -> dict:
+        # Ref: https://grafana.com/docs/loki/latest/configure/#limits_config
+        return {
+            # For convenience, we use an integer but Loki takes a float
+            "ingestion_rate_mb": float(self.ingestion_rate_mb),
+            "ingestion_burst_size_mb": float(self.ingestion_burst_size_mb),
+            # The per-stream limits are intentionally set to match the per-user limits, to simplify UX and address the
+            # case of one stream per user.
+            "per_stream_rate_limit": f"{self.ingestion_rate_mb}MB",
+            "per_stream_rate_limit_burst": f"{self.ingestion_burst_size_mb}MB",
+        }