Getting err="too many failed ingesters" when running on docker #2155

dorroddorrod · 2020-06-01T09:31:31Z

When running loki on separate components mode ( ingester,querier,distibuter,table-manager)
Getting the following error :
loki-querier | level=error ts=2020-06-01T09:22:37.9300754Z caller=pool.go:161 msg="error removing stale clients" err="too many failed ingesters" loki-distributor | level=error ts=2020-06-01T09:22:38.2747819Z caller=pool.go:161 msg="error removing stale clients" err="too many failed ingesters"

My docker-compose.yaml :

version: '3.8'

services:

  redis:
    image: bitnami/redis:latest
    container_name: redis
    environment:
      ALLOW_EMPTY_PASSWORD: "yes"
    ports:
      - 6379

  loki-distributor:
    image: grafana/loki:1.5.0
    container_name: loki-distributor
    ports:
      - 3100
    volumes:
      - ./config.yaml:/etc/loki/config.yaml
    command: -config.file=/etc/loki/config.yaml -target=distributor

  loki-querier:
    image: grafana/loki:1.5.0
    container_name: loki-querier
    ports:
      - 3100
    volumes:
      - ./config.yaml:/etc/loki/config.yaml
    command: -config.file=/etc/loki/config.yaml -target=querier

  loki-ingester:
    image: grafana/loki:1.5.0
    container_name: loki-ingester
    ports:
      - 3100
    volumes:
      - ./config.yaml:/etc/loki/config.yaml
    command: -config.file=/etc/loki/config.yaml -target=ingester

  loki-table-manager:
    image: grafana/loki:1.5.0
    container_name: loki-table-manager
    ports:
      - 3100
    volumes:
      - ./config.yaml:/etc/loki/config.yaml
    command: -config.file=/etc/loki/config.yaml -target=table-manager

  etcd-1:
    container_name: etcd1
    image: quay.io/coreos/etcd:latest
    entrypoint: /usr/local/bin/etcd
    command:
      - '--name=etcd-1'
      - '--initial-advertise-peer-urls=http://etcd-1:2380'
      - '--listen-peer-urls=http://0.0.0.0:2380'
      - '--listen-client-urls=http://0.0.0.0:2379'
      - '--advertise-client-urls=http://etcd-1:2379'
      - '--initial-cluster-token=mys3cr3ttok3n'
      - '--heartbeat-interval=250'
      - '--election-timeout=1250'
      - '--initial-cluster=etcd-1=http://etcd-1:2380,etcd-2=http://etcd-2:2380,etcd-3=http://etcd-3:2380'
      - '--initial-cluster-state=new'
    ports:
      - 2379
    volumes:
      - etcd1:/etcd_data

  etcd-2:
    container_name: etcd2
    image: quay.io/coreos/etcd:latest
    entrypoint: /usr/local/bin/etcd
    command:
      - '--name=etcd-2'
      - '--initial-advertise-peer-urls=http://etcd-2:2380'
      - '--listen-peer-urls=http://0.0.0.0:2380'
      - '--listen-client-urls=http://0.0.0.0:2379'
      - '--advertise-client-urls=http://etcd-2:2379'
      - '--initial-cluster-token=mys3cr3ttok3n'
      - '--heartbeat-interval=250'
      - '--election-timeout=1250'
      - '--initial-cluster=etcd-1=http://etcd-1:2380,etcd-2=http://etcd-2:2380,etcd-3=http://etcd-3:2380'
      - '--initial-cluster-state=new'
    ports:
      - 2379
    volumes:
      - etcd2:/etcd_data

  etcd-3:
    container_name: etcd3
    image: quay.io/coreos/etcd:latest
    entrypoint: /usr/local/bin/etcd
    command:
      - '--name=etcd-3'
      - '--initial-advertise-peer-urls=http://etcd-3:2380'
      - '--listen-peer-urls=http://0.0.0.0:2380'
      - '--listen-client-urls=http://0.0.0.0:2379'
      - '--advertise-client-urls=http://etcd-3:2379'
      - '--initial-cluster-token=mys3cr3ttok3n'
      - '--heartbeat-interval=250'
      - '--election-timeout=1250'
      - '--initial-cluster=etcd-1=http://etcd-1:2380,etcd-2=http://etcd-2:2380,etcd-3=http://etcd-3:2380'
      - '--initial-cluster-state=new'
    ports:
      - 2379
    volumes:
      - etcd3:/etcd_data

  promtail:
    image:  grafana/promtail:latest
    volumes:
      - /var/log:/var/log
      - ./promtail-config.yaml:/etc/promtail/docker-config.yaml
    command: -config.file=/etc/promtail/docker-config.yaml

  grafana:
    image: grafana/grafana:latest
    ports:
      - "3000:3000"

  e3w:
    image: soyking/e3w:latest
    volumes:
      - ./conf/config.default.ini:/app/conf/config.default.ini
    ports:
      - "8080:8080"
    depends_on:
      - etcd-3

volumes:
  etcd1:
  etcd2:
  etcd3:

loki config file :

auth_enabled: false

server:
  http_listen_port: 3100

ingester:
  lifecycler:
    ring:
      kvstore:
        store: etcd
        etcd:
          endpoints:
            - http://etcd-1:2379
            - http://etcd-2:2379
            - http://etcd-3:2379
          dial_timeout: 10s
          max_retries: 10
      heartbeat_timeout: 1m
      replication_factor: 3
    num_tokens: 128
    heartbeat_period: 5s
    join_after: 0s
    min_ready_duration: 10s
    interface_names:
      - "eth0"
    final_sleep: 30s
  chunk_idle_period: 5m
  chunk_retain_period: 30s

schema_config:
  configs:
    - from: 2020-05-15
      store: aws
      object_store: s3
      schema: v11
      index:
        prefix: loki_

storage_config:
  aws:
    s3: s3://*****@eu-west-1/eu-west-1-test-loki
    dynamodb:
      dynamodb_url: dynamodb://*******@eu-west-1

limits_config:
  enforce_metric_name: false
  reject_old_samples: true
  reject_old_samples_max_age: 168h

chunk_store_config:
  chunk_cache_config:
    redis:
        endpoint: "redis:6379"
        timeout: 100ms
        expiration: 0s
        max_idle_conns: 80
        max_active_conns: 0
  max_look_back_period: 0s
  write_dedupe_cache_config:
    redis:
      endpoint: "redis:6379"
      timeout: 100ms
      expiration: 0s
      max_idle_conns: 80
      max_active_conns: 0

table_manager:
  chunk_tables_provisioning:
    inactive_read_throughput: 1
    inactive_write_throughput: 1
    provisioned_read_throughput: 5
    provisioned_write_throughput: 5
  index_tables_provisioning:
    inactive_read_throughput: 1
    inactive_write_throughput: 1
    provisioned_read_throughput: 5
    provisioned_write_throughput: 5
  retention_deletes_enabled: false
  retention_period: 0s

The text was updated successfully, but these errors were encountered:

owen-d · 2020-06-01T12:29:10Z

Hey, let's see if I can help. The current bottleneck is due to the combination of the replication_factor: 3 config and running one replica of the ingester. It's basically saying "all data must be replicated 3 times", but only presents it one ingester to replicate the data to. You'll need to either scale back the replication factor or scale out the ingesters.

dorroddorrod · 2020-06-01T14:43:40Z

Thanks, it works !

dorroddorrod changed the title ~~Getting err="too many failed ingesters" when running in docker~~ Getting err="too many failed ingesters" when running on docker Jun 1, 2020

cyriltovena closed this as completed Jun 2, 2020

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Getting err="too many failed ingesters" when running on docker #2155

Getting err="too many failed ingesters" when running on docker #2155

dorroddorrod commented Jun 1, 2020 •

edited

owen-d commented Jun 1, 2020

dorroddorrod commented Jun 1, 2020

Getting err="too many failed ingesters" when running on docker #2155

Getting err="too many failed ingesters" when running on docker #2155

Comments

dorroddorrod commented Jun 1, 2020 • edited

owen-d commented Jun 1, 2020

dorroddorrod commented Jun 1, 2020

dorroddorrod commented Jun 1, 2020 •

edited