Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Getting err="too many failed ingesters" when running on docker #2155

Closed
dorroddorrod opened this issue Jun 1, 2020 · 2 comments
Closed

Getting err="too many failed ingesters" when running on docker #2155

dorroddorrod opened this issue Jun 1, 2020 · 2 comments

Comments

@dorroddorrod
Copy link

dorroddorrod commented Jun 1, 2020

When running loki on separate components mode ( ingester,querier,distibuter,table-manager)
Getting the following error :
loki-querier | level=error ts=2020-06-01T09:22:37.9300754Z caller=pool.go:161 msg="error removing stale clients" err="too many failed ingesters" loki-distributor | level=error ts=2020-06-01T09:22:38.2747819Z caller=pool.go:161 msg="error removing stale clients" err="too many failed ingesters"

My docker-compose.yaml :

version: '3.8'

services:

  redis:
    image: bitnami/redis:latest
    container_name: redis
    environment:
      ALLOW_EMPTY_PASSWORD: "yes"
    ports:
      - 6379

  loki-distributor:
    image: grafana/loki:1.5.0
    container_name: loki-distributor
    ports:
      - 3100
    volumes:
      - ./config.yaml:/etc/loki/config.yaml
    command: -config.file=/etc/loki/config.yaml -target=distributor

  loki-querier:
    image: grafana/loki:1.5.0
    container_name: loki-querier
    ports:
      - 3100
    volumes:
      - ./config.yaml:/etc/loki/config.yaml
    command: -config.file=/etc/loki/config.yaml -target=querier

  loki-ingester:
    image: grafana/loki:1.5.0
    container_name: loki-ingester
    ports:
      - 3100
    volumes:
      - ./config.yaml:/etc/loki/config.yaml
    command: -config.file=/etc/loki/config.yaml -target=ingester

  loki-table-manager:
    image: grafana/loki:1.5.0
    container_name: loki-table-manager
    ports:
      - 3100
    volumes:
      - ./config.yaml:/etc/loki/config.yaml
    command: -config.file=/etc/loki/config.yaml -target=table-manager

  etcd-1:
    container_name: etcd1
    image: quay.io/coreos/etcd:latest
    entrypoint: /usr/local/bin/etcd
    command:
      - '--name=etcd-1'
      - '--initial-advertise-peer-urls=http://etcd-1:2380'
      - '--listen-peer-urls=http://0.0.0.0:2380'
      - '--listen-client-urls=http://0.0.0.0:2379'
      - '--advertise-client-urls=http://etcd-1:2379'
      - '--initial-cluster-token=mys3cr3ttok3n'
      - '--heartbeat-interval=250'
      - '--election-timeout=1250'
      - '--initial-cluster=etcd-1=http://etcd-1:2380,etcd-2=http://etcd-2:2380,etcd-3=http://etcd-3:2380'
      - '--initial-cluster-state=new'
    ports:
      - 2379
    volumes:
      - etcd1:/etcd_data

  etcd-2:
    container_name: etcd2
    image: quay.io/coreos/etcd:latest
    entrypoint: /usr/local/bin/etcd
    command:
      - '--name=etcd-2'
      - '--initial-advertise-peer-urls=http://etcd-2:2380'
      - '--listen-peer-urls=http://0.0.0.0:2380'
      - '--listen-client-urls=http://0.0.0.0:2379'
      - '--advertise-client-urls=http://etcd-2:2379'
      - '--initial-cluster-token=mys3cr3ttok3n'
      - '--heartbeat-interval=250'
      - '--election-timeout=1250'
      - '--initial-cluster=etcd-1=http://etcd-1:2380,etcd-2=http://etcd-2:2380,etcd-3=http://etcd-3:2380'
      - '--initial-cluster-state=new'
    ports:
      - 2379
    volumes:
      - etcd2:/etcd_data

  etcd-3:
    container_name: etcd3
    image: quay.io/coreos/etcd:latest
    entrypoint: /usr/local/bin/etcd
    command:
      - '--name=etcd-3'
      - '--initial-advertise-peer-urls=http://etcd-3:2380'
      - '--listen-peer-urls=http://0.0.0.0:2380'
      - '--listen-client-urls=http://0.0.0.0:2379'
      - '--advertise-client-urls=http://etcd-3:2379'
      - '--initial-cluster-token=mys3cr3ttok3n'
      - '--heartbeat-interval=250'
      - '--election-timeout=1250'
      - '--initial-cluster=etcd-1=http://etcd-1:2380,etcd-2=http://etcd-2:2380,etcd-3=http://etcd-3:2380'
      - '--initial-cluster-state=new'
    ports:
      - 2379
    volumes:
      - etcd3:/etcd_data

  promtail:
    image:  grafana/promtail:latest
    volumes:
      - /var/log:/var/log
      - ./promtail-config.yaml:/etc/promtail/docker-config.yaml
    command: -config.file=/etc/promtail/docker-config.yaml

  grafana:
    image: grafana/grafana:latest
    ports:
      - "3000:3000"

  e3w:
    image: soyking/e3w:latest
    volumes:
      - ./conf/config.default.ini:/app/conf/config.default.ini
    ports:
      - "8080:8080"
    depends_on:
      - etcd-3

volumes:
  etcd1:
  etcd2:
  etcd3:

loki config file :

auth_enabled: false

server:
  http_listen_port: 3100

ingester:
  lifecycler:
    ring:
      kvstore:
        store: etcd
        etcd:
          endpoints:
            - http://etcd-1:2379
            - http://etcd-2:2379
            - http://etcd-3:2379
          dial_timeout: 10s
          max_retries: 10
      heartbeat_timeout: 1m
      replication_factor: 3
    num_tokens: 128
    heartbeat_period: 5s
    join_after: 0s
    min_ready_duration: 10s
    interface_names:
      - "eth0"
    final_sleep: 30s
  chunk_idle_period: 5m
  chunk_retain_period: 30s

schema_config:
  configs:
    - from: 2020-05-15
      store: aws
      object_store: s3
      schema: v11
      index:
        prefix: loki_

storage_config:
  aws:
    s3: s3://*****@eu-west-1/eu-west-1-test-loki
    dynamodb:
      dynamodb_url: dynamodb://*******@eu-west-1

limits_config:
  enforce_metric_name: false
  reject_old_samples: true
  reject_old_samples_max_age: 168h

chunk_store_config:
  chunk_cache_config:
    redis:
        endpoint: "redis:6379"
        timeout: 100ms
        expiration: 0s
        max_idle_conns: 80
        max_active_conns: 0
  max_look_back_period: 0s
  write_dedupe_cache_config:
    redis:
      endpoint: "redis:6379"
      timeout: 100ms
      expiration: 0s
      max_idle_conns: 80
      max_active_conns: 0

table_manager:
  chunk_tables_provisioning:
    inactive_read_throughput: 1
    inactive_write_throughput: 1
    provisioned_read_throughput: 5
    provisioned_write_throughput: 5
  index_tables_provisioning:
    inactive_read_throughput: 1
    inactive_write_throughput: 1
    provisioned_read_throughput: 5
    provisioned_write_throughput: 5
  retention_deletes_enabled: false
  retention_period: 0s
@dorroddorrod dorroddorrod changed the title Getting err="too many failed ingesters" when running in docker Getting err="too many failed ingesters" when running on docker Jun 1, 2020
@owen-d
Copy link
Member

owen-d commented Jun 1, 2020

Hey, let's see if I can help. The current bottleneck is due to the combination of the replication_factor: 3 config and running one replica of the ingester. It's basically saying "all data must be replicated 3 times", but only presents it one ingester to replicate the data to. You'll need to either scale back the replication factor or scale out the ingesters.

@dorroddorrod
Copy link
Author

Thanks, it works !

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

3 participants