charts/regatta/values.yaml

## Regatta configuration

# -- nameOverride: Provide a name in place of `regatta`
nameOverride: "" # default: `"regatta"`
# -- fullnameOverride: String to fully override `"regatta.fullname"`
fullnameOverride: ""

# Image configuration
#   Note: Use overrides with caution since other Regatta versions might not be compatible with this helm chart!
image:
  # -- repository: Default image repository
  repository: ghcr.io/jamf/regatta
  # -- imagePullPolicy: ref: https://kubernetes.io/docs/concepts/containers/images/#image-pull-policy
  imagePullPolicy: IfNotPresent
  # -- tag: Override to use different image version
  tag: 0.1.0

# -- imagePullSecrets: For the Regatta image
imagePullSecrets: []

# -- replicas: Defines number of Regatta replicas
#   Note: This value must match the number of raft initial members `raft.initialMembers`.
replicas: 1

# Specifies the pod disruption budget
## ref: https://kubernetes.io/docs/tasks/run-application/configure-pdb/
podDisruptionBudget:
  # -- enabled: If true, the pdb object is created
  enabled: false
  # -- minAvailable: Sets the minAvailable field of the pdb object
  minAvailable: 2

# -- resources: Define the resources of the pods
resources: {}
  # requests:
  #   cpu: 1
  #   memory: 4Gi
  # limits:
  #   cpu: 2
  #   memory: 8Gi

# -- startupProbe: Defines the startupProbe for the Regatta container
## ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-startup-probes
startupProbe:
  initialDelaySeconds: 90
  periodSeconds: 60
  timeoutSeconds: 5
  failureThreshold: 3
  successThreshold: 1

# -- startupProbe: Defines the readinessProbe for the Regatta container
## ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-readiness-probes
readinessProbe:
  initialDelaySeconds: 5
  periodSeconds: 30
  timeoutSeconds: 5
  failureThreshold: 3
  successThreshold: 1

# -- priorityClassName: Defines the priorityClassName of the Regatta pods.
#   Leave empty string if you don't want to use this feature.
## ref: https://kubernetes.io/docs/concepts/scheduling-eviction/pod-priority-preemption/
priorityClassName: ""

# -- tolerations: Defines tolerations for the Regatta pods
## ref: https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/
tolerations: []
#- key: "key1"
#  operator: "Equal"
#  value: "value1"
#  effect: "NoSchedule"

# -- nodeSelector: Map of nodeSelector labels for the Regatta pods
## ref: https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#nodeselector
nodeSelector: {}
#  nodeLabel: value

# Settings of Regatta pods anti-affinity
## ref: https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#affinity-and-anti-affinity
podAntiAffinity:
  # -- enabled: Enable or disable the pod anti-affinity
  enabled: false
  # -- topologyKey: Use to override the topologyKey value
  topologyKey: kubernetes.io/hostname

# -- additionalPodLabels: Optional map of additional pod labels
additionalPodLabels: {}

# -- podAnnotations: Optional map of pod annotations
podAnnotations: {}

# Allows definition of the persistentVolumeClaim.spec of the Regatta StatefulSet
persistentVolumeClaim:
  # -- spec: The full content of the persistentVolumeClaim.spec
  spec:
    accessModes:
    - ReadWriteOnce
    resources:
      requests:
        storage: 10Gi

# ServiceAccount configuration
serviceAccount:
  # -- create: Create the ServiceAccount for regatta
  create: true
  # -- name: ServiceAccount name override
  name: "" # default: `"regatta.fullname"`

# Regatta main gRPC API configuration
api:
  # -- port: gRPC API port
  port: 8443

  tls:
    # -- mode:
    #    May be one of:
    #    - certificate: the certificate is generated by `cert-manager.io/v1/Certificate` object
    #    - plaintext: enter `cert` and `key` content directly into values
    #    - none: no certificate nor secret is created, you need to provide a secret separately
    #
    #     Secret example:
    #
    #       apiVersion: v1
    #       kind: Secret
    #       metadata:
    #         name: regatta-api-cert
    #       data:
    #         tls.crt: Y2xpZW50LWNlcnQK
    #         tls.key: Y2xpZW50LWtleQo=
    mode: plaintext

    # -- issuerRef: IssuerRef configuration that is passed to the Certificate object
    #   Note: applicable only if `mode: certificate`
    issuerRef: {}
    # Example issuerRef configuration:
    #   kind: ClusterIssuer
    #   name: issuer-name

    # -- cert: TLS cert in plaintext
    # Note: applicable only if `mode: plaintext`
    cert: |
      plaintext server certificate

    # -- key: TLS key in plaintext
    # Note: applicable only if `mode: plaintext`
    key: |
      plaintext server certificate key

  # -- externalLoadBalancer: If enabled, the Service of type LoadBalancer is created
  ## ref: https://kubernetes.io/docs/concepts/services-networking/service/#loadbalancer
  externalLoadBalancer:
    # -- enabled: true/false
    enabled: false

    # -- externalDomain: External Regatta API domain name
    externalDomain: regatta.example.com

    # -- annotations: Service annotations
    annotations: {}
    # Example annotations for K8S cluster running in AWS:
    #
    #  external-dns.alpha.kubernetes.io/hostname: regatta.example.com
    #  service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "900"
    #  service.beta.kubernetes.io/aws-load-balancer-backend-protocol: ssl

    # -- loadBalancerSourceRanges: external access whitelist, available on AWS only
    ## ref: https://kubernetes.io/docs/concepts/services-networking/service/#aws-nlb-support
    loadBalancerSourceRanges: {}
    # - 0.0.0.0/0

# -- metricsPort: Regatta metrics port
metricsPort: 8079

# -- mode: Regatta mode
#   Can be either leader or follower.
mode: leader

# Regatta replication setup
replication:
  # -- server: The replication server may be used when Regatta is in the leader mode (`mode: leader`).
  #   Follower Regatta replicates data from this server.
  server:
    # -- enabled: Enables the replication server
    enabled: true

    # -- port: Replication server port
    port: 8444

    # -- externalDomain:
    externalDomain: "leader.regatta.example.com"

    # -- serviceAnnotations: Replication server LoadBalancer service annotations
    serviceAnnotations: {}
    # external-dns.alpha.kubernetes.io/hostname: leader.regatta.example.com
    # service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "900"

  tls:
    # -- mode:
    #    May be one of:
    #    - certificate: the certificate is generated by `cert-manager.io/v1/Certificate` object
    #    - plaintext: enter `cert` and `key` content directly into values
    #    - none: no certificate nor secret is created, you need to provide a secret separately
    #
    #     Secret example:
    #
    #       apiVersion: v1
    #       kind: Secret
    #       metadata:
    #         name: regatta-replication-cert
    #       data:
    #         tls.crt: Y2xpZW50LWNlcnQK
    #         tls.key: Y2xpZW50LWtleQo=
    mode: plaintext

    # -- issuerRef: issuerRef configuration that is passed to the Certificate object
    # Note: applicable only if `mode: certificate`
    issuerRef: {}
    # Example issuerRef configuration:
    #   kind: ClusterIssuer
    #   name: issuer-name

    # -- cert: TLS certificate in plaintext
    # Note: Applicable only if `mode: plaintext`
    cert: |
      plaintext server certificate

    # -- cert: CA in plaintext
    # Note: Applicable only if `mode: plaintext`
    ca: |
      plaintext server ca

    # -- key: TLS key in plaintext
    # Note: Applicable only if `mode: plaintext`
    key: |
      plaintext server certificate key

  # -- leaderAddress: The address of the leader to replicate from
  # Note: Applicable only if the Regatta mode is follower (`mode: follower`)
  leaderAddress: "leader.regatta.example.com"

  # -- maxSnapshotRecvBytesPerSecond: Maximum number of bytes received per second by the snapshot API client,
  #   default value 0 means unlimited.
  maxSnapshotRecvBytesPerSecond: 0

  # -- logRpcTimeout: The log RPC timeout.
  logRpcTimeout: 5m

reflectionAPI:
  # -- enabled: Whether reflection API is provided. Should be false on in production.
  enabled: false

# Maintenance API configuration
maintenance:
  # -- secretKind
  #   May be one of:
  #   - sealedSecret: Use if you have SealedSecrets support on your cluster. (https://sealed-secrets.netlify.app/)
  #   - plaintext: Use to create Opaque Secret from the plaintext.
  #   - none: Do not create the secret with the token at all. The secret must be provided externally.
  #
  #     Secret example:
  #
  #        apiVersion: v1
  #        kind: Secret
  #        metadata:
  #          name: regatta-maintenance-token
  #        data:
  #          token: c2VjcmV0LXRva2Vu
  #
  secretKind: plaintext

  # -- token:
  #   Depending on value of `secretKind`
  #     - sealedSecret: enter the encrypted value
  #     - plaintext: enter the plaintext secret value
  #     - none: the field is ignored
  token: "secret-token"

  server:
    # -- enabled: Maintenance API enabled
    enabled: true

    # -- port: Port of maintenance server to listen on
    port: 8445

    tls:
      # -- mode:
      #    May be one of:
      #    - certificate: the certificate is generated by `cert-manager.io/v1/Certificate` object
      #    - plaintext: enter `cert` and `key` content directly into values
      #    - none: no certificate nor secret is created, you need to provide a secret separately
      #
      #     Secret example:
      #
      #       apiVersion: v1
      #       kind: Secret
      #       metadata:
      #         name: regatta-maintenance-cert
      #       data:
      #         tls.crt: Y2xpZW50LWNlcnQK
      #         tls.key: Y2xpZW50LWtleQo=
      mode: plaintext

      # -- issuerRef: issuerRef configuration that is passed to the Certificate object
      #   Note: Applicable only if `mode: certificate`
      issuerRef: { }
      # Example issuerRef configuration:
      #   kind: ClusterIssuer
      #   name: issuer-name

      # -- cert: TLS certificate in plaintext
      #   Note: Applicable only if `mode: plaintext`
      cert: |
        plaintext server certificate

      # -- key: TLS key in plaintext
      # Note: Applicable only if `mode: plaintext`
      key: |
        plaintext server certificate key

  # -- Controls the creation of the backup CronJob that uses the Regatta maintenance API
  # Note: the `maintenance.server.enabled` must be set to `true`
  backup:
    # -- enabled: Enable the backup CronJob
    #   Note: the maintenance server must be enabled
    enabled: true
    # -- successfulJobsHistoryLimit: CronJob config field
    successfulJobsHistoryLimit: 4
    # -- failedJobsHistoryLimit: CronJob config field
    failedJobsHistoryLimit: 2
    # -- schedule: Cron expression defining how often the backup is executed
    schedule: "0 */4 * * *"
    # -- bucket: Address of the s3 bucket where to upload backup
    bucket: "s3-bucket-name"

# Kafka client configuration
# Note: Kafka client available ony if Regatta is in the leader mode (`mode: leader`)
kafka:
  # -- enabled: Enables Kafka client
  enabled: false

  # -- brokers: The list of Kafka brokers
  brokers: ""

  # -- checkTopics: Checks the configured topics for existence if set to true
  checkTopics: false

  # -- dialerTimeout: Kafka dialer timeout
  dialerTimeout: 10s

  # -- groupID: Kafka consumer group ID
  groupID: ""

  # -- topics: Comma-separated list of Kafka topics to consume
  topics: ""

  # -- tls: Kafka client TLS configuration
  tls:
    # -- enabled: Enables kafka client TLS
    enabled: false
    
    # -- secretKind
    #   May be one of:
    #   - sealedSecret: Use if you have SealedSecrets support on your cluster. (https://sealed-secrets.netlify.app/)
    #   - plaintext: Use to create Opaque Secret from the plaintext.
    #   - none: Do not create the secret with the token at all. The secret must be provided externally.
    #
    #     Secret example:
    #
    #       apiVersion: v1
    #       kind: Secret
    #       metadata:
    #         name: regatta-kafka-cert
    #       data:
    #         ca.crt: c2VydmVyLWNlcnQK
    #         tls.crt: Y2xpZW50LWNlcnQK
    #         tls.key: Y2xpZW50LWtleQo=
    #
    secretKind: plaintext
    
    # -- serverCert:
    #   Depending on value of `secretKind`
    #     - sealedSecret: enter the encrypted value
    #     - plaintext: enter the plaintext secret value
    #     - none: the field is ignored
    serverCert: |
      server-cert

    # -- clientCert:
    #   Depending on value of `secretKind`
    #     - sealedSecret: enter the encrypted value
    #     - plaintext: enter the plaintext secret value
    #     - none: the field is ignored
    clientCert: | 
      client-cert

    # -- clientKey:
    #   Depending on value of `secretKind`
    #     - sealedSecret: enter the encrypted value
    #     - plaintext: enter the plaintext secret value
    #     - none: the field is ignored
    clientKey: |
      client-key

# -- tables: Comma separated list of Regatta tables
tables: table-1,table-2

# Raft configuration
raft:
  # -- initialMembers: Raft cluster initial members defines a mapping of node IDs to their respective Raft address.
  #   The node ID must be Integer >= 1. Example for the initial 3 node cluster setup on the localhost:
  #     "1=127.0.0.1:5012,2=127.0.0.1:5013,3=127.0.0.1:5014"
  initialMembers: "1=regatta-0.regatta.regatta.svc.cluster.local:5012"

  # -- rtt: Defines the average Round Trip Time (RTT) between two NodeHost instances.
  #   Such an RTT interval is internally used as a logical clock tick, Raft heartbeat and election intervals
  #   are both defined in terms of how many such RTT intervals. Note that RTTMillisecond is the combined delays
  #   between two NodeHost instances including all delays caused by network transmission,
  #   delays caused by NodeHost queuing and processing.
  #   Specified as Go's duration string (https://pkg.go.dev/maze.io/x/duration#ParseDuration).
  rtt: 50ms

  # -- snapshotEntries: SnapshotEntries defines how often the state machine should be snapshotted automatically.
  #   It is defined in terms of the number of applied Raft log entries.
  snapshotEntries: 10000

  # -- compactionOverhead: Defines the number of most recent entries to keep after each Raft log compaction.
  #   Raft log compaction is performed automatically every time when a snapshot is created.
  compactionOverhead: 5000

  # TODO: revise raft.rtt
  # -- heartbeatRTT: The number of message RTT between heartbeats. Message RTT is defined by raft.rtt.
  #   The Raft paper suggest the heartbeat interval to be close to the average RTT between nodes.
  #   As an example, assuming raft.rtt is 100 millisecond, to set the heartbeat interval to be every 200 milliseconds,
  #   then heartbeatRTT should be set to 2.
  heartbeatRTT: 4

  # TODO: revise raft.rtt
  # -- electionRTT: The minimum number of message RTT between elections. Message RTT is defined by raft.rtt.
  #   The Raft paper suggests it to be a magnitude greater than heartbeatRTT, which is the interval between two heartbeats.
  #   In Raft, the actual interval between elections is randomized to be between electionRTT and 2 * electionRTT.
  #   As an example, assuming raft.rtt is 100 millisecond, to set the election interval to be 1 second,
  #   then electionRTT should be set to 10.
  electionRTT: 100

# -- dragonboatSoftSettings: ref: https://github.com/lni/dragonboat/blob/v3.3.6/internal/settings/soft.go#L27
dragonboatSoftSettings: |
  {
    "TaskBatchSize": 128,
    "PerConnectionSendBufSize": 10485760,
    "MaxConcurrentStreamingSnapshot": 1
  }

# Storage configuration
storage:
  # -- blockCacheSize: Shared block cache size in bytes. The cache is used to hold uncompressed blocks of data in memory
  blockCacheSize: 796917760

# Experimental features
experimental:
  # -- tanlogdb: Whether experimental LogDB implementation Tan is used in-place of Pebble based one
  tanlogdb: false

# -- serviceMonitorEnabled: ServiceMonitor object is created if set to true
serviceMonitorEnabled: false

# Defines the PrometheusRule object
prometheusRules:
  # -- enabled: PrometheusRule object created if true
  enabled: false

  # -- groups: List of the PrometheusRule groups
  groups: []
  #  Example groups:
  #
  #  - name: regatta.rules
  #    rules:
  #    # RAFT
  #    - alert: RaftLeaderNotAvailable
  #      expr: min(sum(dragonboat_raftnode_has_leader{namespace="{{ .Release.Namespace }}",job="{{ template "regatta.name" . }}"}) by (shardid,job,namespace)) by (job,namespace) < 2
  #      for: 5m
  #      labels:
  #        severity: critical
  #      annotations:
  #        description: '{{ $labels.job }}: Raft leader not available, cluster is not able to serve requests properly.'
  #        summary: '{{ $labels.job }}: Raft leader not available'
  #    - alert: RaftLeaderNotAvailable
  #      expr: min(sum(dragonboat_raftnode_has_leader{namespace="{{ .Release.Namespace }}",job="{{ template "regatta.name" . }}"}) by (shardid,job,namespace)) by (job,namespace) < 2
  #      for: 5m
  #      labels:
  #        severity: critical
  #      annotations:
  #        description: '{{ $labels.job }}: Raft leader not available, cluster is not able to serve requests properly.'
  #        summary: '{{ $labels.job }}: Raft leader not available'
  #    - alert: RaftClusterNodeNotAvailableCritical
  #      expr: min(sum(dragonboat_raftnode_has_leader{namespace="{{ .Release.Namespace }}",job="{{ template "regatta.name" . }}"}) by (shardid,job,namespace)) by (job,namespace) < 3
  #      for: 30m
  #      labels:
  #        severity: critical
  #      annotations:
  #        description: '{{ $labels.job }}: Raft cluster has only {{ $value }} nodes for time period longer than 30m.'
  #        summary: '{{ $labels.job }}: Raft cluster node not available'
  #    - alert: RaftClusterNodeNotAvailableWarn
  #      expr: min(sum(dragonboat_raftnode_has_leader{namespace="{{ .Release.Namespace }}",job="{{ template "regatta.name" . }}"}) by (shardid,job,namespace)) by (job,namespace) < 3
  #      for: 2m
  #      labels:
  #        severity: warning
  #      annotations:
  #        description: '{{ $labels.job }}: Raft cluster has only {{ $value }} nodes.'
  #        summary: '{{ $labels.job }}: Raft cluster node not available'
  #    - alert: TooManyRaftnodeCampaignsLaunched
  #      expr: sum(increase(dragonboat_raftnode_campaign_launched_total{namespace="{{ .Release.Namespace }}",job="{{ template "regatta.name" . }}"}[5m])) by (shardid,nodeid,job,namespace,pod) > 50
  #      for: 10m
  #      labels:
  #        severity: warning
  #        type: raftTransport
  #      annotations:
  #        description: There was {{ $value }} raft node campaign launched in the last 30 minutes for {{ $labels.job }}
  #        summary: '{{ $labels.job }}: Too many Raft node campaign launched'
  #    - alert: TooManyRaftnodeCampaignsSkipped
  #      expr: sum(increase(dragonboat_raftnode_campaign_skipped_total{namespace="{{ .Release.Namespace }}",job="{{ template "regatta.name" . }}"}[5m])) by (shardid,nodeid,job,namespace,pod) > 50
  #      for: 10m
  #      labels:
  #        severity: warning
  #        type: raftTransport
  #      annotations:
  #        description: There was {{ $value }} raft node campaign skipped in the last 30 minutes for {{ $labels.job }}
  #        summary: '{{ $labels.job }}: Too many Raft node campaign skipped'
  #    - alert: TooManyRaftNodeProposalsDropped
  #      expr: sum(increase(dragonboat_raftnode_proposal_dropped_total{namespace="{{ .Release.Namespace }}",job="{{ template "regatta.name" . }}"}[5m])) by (shardid,nodeid,job,namespace,pod) > 50
  #      for: 10m
  #      labels:
  #        severity: warning
  #        type: raftTransport
  #      annotations:
  #        description: 'Dropped {{ $value }} raft node proposals in 5 minutes for {{ $labels.job }}'
  #        summary: '{{ $labels.job }}: Too many raft node proposals dropped'
  #    - alert: TooManyRaftNodeReadIndexesDropped
  #      expr: sum(increase(dragonboat_raftnode_read_index_dropped_total{namespace="{{ .Release.Namespace }}",job="{{ template "regatta.name" . }}"}[5m])) by (shardid,nodeid,job,namespace,pod) > 50
  #      for: 10m
  #      labels:
  #        severity: warning
  #        type: raftTransport
  #      annotations:
  #        description: 'Dropped {{ $value }} raft node read index in 5 minutes for {{ $labels.job }}'
  #        summary: '{{ $labels.job }}: Too many raft node read indexes dropped'
  #    - alert: TooManyRaftNodeReplicationsRejected
  #      expr: sum(increase(dragonboat_raftnode_replication_rejected_total{namespace="{{ .Release.Namespace }}",job="{{ template "regatta.name" . }}"}[5m])) by (shardid,nodeid,job,namespace,pod) > 50
  #      for: 10m
  #      labels:
  #        severity: warning
  #        type: raftTransport
  #      annotations:
  #        description: 'Rejected {{ $value }} replications in 5 minutes for {{ $labels.job }}'
  #        summary: '{{ $labels.job }}: Too many raft node replications rejected'
  #    - alert: TooManyFailedTransportMessageConnectionAttempts
  #      expr: increase(dragonboat_transport_failed_message_connection_attempt_total{namespace="{{ .Release.Namespace }}",job="{{ template "regatta.name" . }}"}[5m]) > 20
  #      for: 10m
  #      labels:
  #        severity: warning
  #        type: raftTransport
  #      annotations:
  #        description: 'Failed {{ $value }} transport message connection attempts in 5 minutes for {{ $labels.job }}'
  #        summary: '{{ $labels.job }}: Too many failed transport message connection attempts'
  #    - alert: TooManyFailedSnapshotConnectionAttempts
  #      expr: increase(dragonboat_transport_failed_snapshot_connection_attempt_total{namespace="{{ .Release.Namespace }}",job="{{ template "regatta.name" . }}"}[5m]) > 50
  #      for: 10m
  #      labels:
  #        severity: warning
  #        type: raftTransport
  #      annotations:
  #        description: 'Failed {{ $value }} snapshot connection attempts in 5 minutes for {{ $labels.job }}'
  #        summary: '{{ $labels.job }}: Too many failed snapshot connection attempts'
  #    - alert: TooManyTransportMessageSendFailures
  #      expr: increase(dragonboat_transport_failed_snapshot_connection_attempt_total{namespace="{{ .Release.Namespace }}",job="{{ template "regatta.name" . }}"}[5m]) > 50
  #      for: 10m
  #      labels:
  #        severity: warning
  #        type: raftTransport
  #      annotations:
  #        description: 'Failed {{ $value }} transport message send in 5 minutes for {{ $labels.job }}'
  #        summary: '{{ $labels.job }}: Too many failed transport message send'
  #    - alert: TooManyTransportReceiveMessagesDropped
  #      expr: increase(dragonboat_transport_received_message_dropped_total{namespace="{{ .Release.Namespace }}",job="{{ template "regatta.name" . }}"}[5m]) > 50
  #      for: 10m
  #      labels:
  #        severity: warning
  #        type: raftTransport
  #      annotations:
  #        description: 'Dropped {{ $value }} transport message receive in 5 minutes for {{ $labels.job }}'
  #        summary: '{{ $labels.job }}: Too many dropped transport message receive'
  #    - alert: TooManyTransportSnapshotSendFailures
  #      expr: increase(dragonboat_transport_snapshot_send_failure_total{namespace="{{ .Release.Namespace }}",job="{{ template "regatta.name" . }}"}[5m]) > 50
  #      for: 10m
  #      labels:
  #        severity: warning
  #        type: raftTransport
  #      annotations:
  #        description: 'Failed {{ $value }} transport snapshot send in 5 minutes for {{ $labels.job }}'
  #        summary: '{{ $labels.job }}: Too many failed transport snapshot send'
  #
  #    # GRPC
  #    - alert: GRPCFailedRequests
  #      expr: increase(grpc_server_handled_total{namespace="{{ .Release.Namespace }}",grpc_code!~"OK|NotFound|InvalidArgument", job="{{ template "regatta.name" . }}",grpc_service!="grpc.reflection.v1alpha.ServerReflection"}[5m]) > {{ .Values.prometheusRules.grpcFailedRequests }}
  #      for: 1m
  #      labels:
  #        severity: warning
  #      annotations:
  #        description: 'Failed {{ $value }} GRPC requests in 5 minutes for {{ $labels.job }}'
  #        summary: '{{ $labels.job }}: GRPC request error'
  #    - alert: TooSlowGRPCResponseP99
  #      expr: histogram_quantile(0.99, sum by (job,namespace,pod,grpc_method,le) (rate(grpc_server_handling_seconds_bucket{namespace="{{ .Release.Namespace }}",job="{{ template "regatta.name" . }}",grpc_type="unary",grpc_service="regatta.v1.KV",grpc_method="Range"}[5m]))) > 0.5
  #      for: 1m
  #      labels:
  #        severity: critical
  #      annotations:
  #        description: '99 quantile of regatta.v1.KV response time is {{ $value }} s for {{ $labels.job }}'
  #        summary: '{{ $labels.job }}: GRPC response is too slow (P99)'
  #    - alert: TooSlowGRPCResponseP50
  #      expr: histogram_quantile(0.50, sum by (job,namespace,pod,grpc_method,le) (rate(grpc_server_handling_seconds_bucket{namespace="{{ .Release.Namespace }}",job="{{ template "regatta.name" . }}",grpc_type="unary",grpc_service="regatta.v1.KV",grpc_method="Range"}[5m]))) > 0.05
  #      for: 1m
  #      labels:
  #        severity: warning
  #      annotations:
  #        description: '50 quantile of regatta.v1.KV response time is {{ $value }} s for {{ $labels.job }}'
  #        summary: '{{ $labels.job }}: GRPC response is too slow (P50)'