Skip to content

Commit

Permalink
Promtail: (and also fluent-bit) change the max batch size to 1MB (#2710)
Browse files Browse the repository at this point in the history
* change the max batch size to 1MB for all the defaults including helm and fluent-bit, attempt to centralize this config a little where possible.

* fix test
  • Loading branch information
slim-bean authored Oct 1, 2020
1 parent 9e6afea commit d3bf21e
Show file tree
Hide file tree
Showing 8 changed files with 44 additions and 34 deletions.
16 changes: 8 additions & 8 deletions cmd/docker-driver/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,14 +67,14 @@ const (

var (
defaultClientConfig = client.Config{
BatchWait: 1 * time.Second,
BatchSize: 100 * 1024,
BatchWait: client.BatchWait,
BatchSize: client.BatchSize,
BackoffConfig: cortex_util.BackoffConfig{
MinBackoff: 100 * time.Millisecond,
MaxBackoff: 10 * time.Second,
MaxRetries: 10,
MinBackoff: client.MinBackoff,
MaxBackoff: client.MaxBackoff,
MaxRetries: client.MaxRetries,
},
Timeout: 10 * time.Second,
Timeout: client.Timeout,
}
)

Expand Down Expand Up @@ -242,8 +242,8 @@ func parseConfig(logCtx logger.Info) (*config, error) {

// other labels coming from docker labels or env selected by user labels, labels-regex, env, env-regex config.
attrs, err := logCtx.ExtraAttributes(func(label string) string {
return strings.ReplaceAll(strings.ReplaceAll(label, "-", "_"), ".", "_")
})
return strings.ReplaceAll(strings.ReplaceAll(label, "-", "_"), ".", "_")
})
if err != nil {
return nil, err
}
Expand Down
34 changes: 22 additions & 12 deletions pkg/promtail/client/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,16 @@ import (
lokiflag "github.com/grafana/loki/pkg/util/flagext"
)

// NOTE the helm chart for promtail and fluent-bit also have defaults for these values, please update to match if you make changes here.
const (
BatchWait = 1 * time.Second
BatchSize int = 1024 * 1024
MinBackoff = 500 * time.Millisecond
MaxBackoff = 5 * time.Minute
MaxRetries int = 10
Timeout = 10 * time.Second
)

// Config describes configuration for a HTTP pusher client.
type Config struct {
URL flagext.URLValue
Expand All @@ -33,13 +43,13 @@ type Config struct {
// prefix. If prefix is a non-empty string, prefix should end with a period.
func (c *Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) {
f.Var(&c.URL, prefix+"client.url", "URL of log server")
f.DurationVar(&c.BatchWait, prefix+"client.batch-wait", 1*time.Second, "Maximum wait period before sending batch.")
f.IntVar(&c.BatchSize, prefix+"client.batch-size-bytes", 1024*1024, "Maximum batch size to accrue before sending. ")
f.DurationVar(&c.BatchWait, prefix+"client.batch-wait", BatchWait, "Maximum wait period before sending batch.")
f.IntVar(&c.BatchSize, prefix+"client.batch-size-bytes", BatchSize, "Maximum batch size to accrue before sending. ")
// Default backoff schedule: 0.5s, 1s, 2s, 4s, 8s, 16s, 32s, 64s, 128s, 256s(4.267m) For a total time of 511.5s(8.5m) before logs are lost
f.IntVar(&c.BackoffConfig.MaxRetries, prefix+"client.max-retries", 10, "Maximum number of retires when sending batches.")
f.DurationVar(&c.BackoffConfig.MinBackoff, prefix+"client.min-backoff", 500*time.Millisecond, "Initial backoff time between retries.")
f.DurationVar(&c.BackoffConfig.MaxBackoff, prefix+"client.max-backoff", 5*time.Minute, "Maximum backoff time between retries.")
f.DurationVar(&c.Timeout, prefix+"client.timeout", 10*time.Second, "Maximum time to wait for server to respond to a request")
f.IntVar(&c.BackoffConfig.MaxRetries, prefix+"client.max-retries", MaxRetries, "Maximum number of retires when sending batches.")
f.DurationVar(&c.BackoffConfig.MinBackoff, prefix+"client.min-backoff", MinBackoff, "Initial backoff time between retries.")
f.DurationVar(&c.BackoffConfig.MaxBackoff, prefix+"client.max-backoff", MaxBackoff, "Maximum backoff time between retries.")
f.DurationVar(&c.Timeout, prefix+"client.timeout", Timeout, "Maximum time to wait for server to respond to a request")
f.Var(&c.ExternalLabels, prefix+"client.external-labels", "list of external labels to add to each log (e.g: --client.external-labels=lb1=v1,lb2=v2)")

f.StringVar(&c.TenantID, prefix+"client.tenant-id", "", "Tenant ID to use when pushing logs to Loki.")
Expand All @@ -61,13 +71,13 @@ func (c *Config) UnmarshalYAML(unmarshal func(interface{}) error) error {
// force sane defaults.
cfg = raw{
BackoffConfig: util.BackoffConfig{
MaxBackoff: 5 * time.Minute,
MaxRetries: 10,
MinBackoff: 500 * time.Millisecond,
MaxBackoff: MaxBackoff,
MaxRetries: MaxRetries,
MinBackoff: MinBackoff,
},
BatchSize: 100 * 1024,
BatchWait: 1 * time.Second,
Timeout: 10 * time.Second,
BatchSize: BatchSize,
BatchWait: BatchWait,
Timeout: Timeout,
}
}

Expand Down
12 changes: 6 additions & 6 deletions pkg/promtail/client/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,13 @@ func Test_Config(t *testing.T) {
URL: u,
},
BackoffConfig: util.BackoffConfig{
MaxBackoff: 5 * time.Minute,
MaxRetries: 10,
MinBackoff: 500 * time.Millisecond,
MaxBackoff: MaxBackoff,
MaxRetries: MaxRetries,
MinBackoff: MinBackoff,
},
BatchSize: 100 * 1024,
BatchWait: 1 * time.Second,
Timeout: 10 * time.Second,
BatchSize: BatchSize,
BatchWait: BatchWait,
Timeout: Timeout,
},
},
{
Expand Down
2 changes: 1 addition & 1 deletion production/helm/fluent-bit/Chart.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
apiVersion: "v1"
name: fluent-bit
version: 0.3.0
version: 0.3.1
appVersion: v1.6.0
kubeVersion: "^1.10.0-0"
description: "Uses fluent-bit Loki go plugin for gathering logs and sending them to Loki"
Expand Down
2 changes: 1 addition & 1 deletion production/helm/fluent-bit/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ config:
port: 2020
tenantID: '""'
batchWait: 1
batchSize: 10240
batchSize: 1048576
loglevel: warn
lineFormat: json
k8sLoggingParser: "Off"
Expand Down
2 changes: 1 addition & 1 deletion production/helm/loki-stack/Chart.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
apiVersion: "v1"
name: loki-stack
version: 0.41.0
version: 0.41.1
appVersion: v1.6.0
kubeVersion: "^1.10.0-0"
description: "Loki: like Prometheus, but for logs."
Expand Down
2 changes: 1 addition & 1 deletion production/helm/promtail/Chart.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
apiVersion: "v1"
name: promtail
version: 0.25.0
version: 0.25.1
appVersion: v1.6.0
kubeVersion: "^1.10.0-0"
description: "Responsible for gathering logs and sending them to Loki"
Expand Down
8 changes: 4 additions & 4 deletions production/helm/promtail/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -159,18 +159,18 @@ config:
# Maximum wait period before sending batch
batchwait: 1s
# Maximum batch size to accrue before sending, unit is byte
batchsize: 102400
batchsize: 1048576

# Maximum time to wait for server to respond to a request
timeout: 10s

backoff_config:
# Initial backoff time between retries
min_period: 100ms
min_period: 500ms
# Maximum backoff time between retries
max_period: 5s
max_period: 5m
# Maximum number of retries when sending batches, 0 means infinite retries
max_retries: 20
max_retries: 10

# The labels to add to any time series or alerts when communicating with loki
external_labels: {}
Expand Down

0 comments on commit d3bf21e

Please sign in to comment.