You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
We have about 100 identical Linux servers running a Django application that uses celery with Redis storage.
Since we switched from processes to -P threads (or also -P greenlet: same problem), we have been seeing an error
approx once per day on some of the servers.
Actual Behavior
2024-01-31 19:57:05.000
Internal Server Error: /api/status/
Traceback (most recent call last):
File "/opt/xxx-secure/xxx/backend/venv/lib/python3.10/site-packages/django/core/handlers/exception.py", line 55, in inner
response = get_response(request)
File "/opt/xxx-secure/xxx/backend/venv/lib/python3.10/site-packages/django/core/handlers/base.py", line 197, in _get_response
response = wrapped_callback(request, *callback_args, **callback_kwargs)
File "/opt/xxx-secure/xxx/backend/venv/lib/python3.10/site-packages/sentry_sdk/integrations/django/views.py", line 84, in sentry_wrapped_callback
return callback(request, *args, **kwargs)
File "/opt/xxx-secure/xxx/backend/status/views.py", line 11, in status
for s in StatusCheck.collect()
File "/opt/xxx-secure/xxx/backend/status/status.py", line 118, in collect
return [
File "/opt/xxx-secure/xxx/backend/status/status.py", line 118, in <listcomp>
return [
File "/opt/xxx-secure/xxx/backend/status/status.py", line 121, in <genexpr>
check.maybe_run(run_all_checks) for check in cls.checks.values()
File "/opt/xxx-secure/xxx/backend/status/status.py", line 69, in maybe_run
return self.run()
File "/opt/xxx-secure/xxx/backend/status/status.py", line 75, in run
self.func(self, *self.args)
File "/opt/xxx-secure/xxx/backend/status/status.py", line 274, in check_celery
check_celery_task.apply_async(expires=180).get(propagate=False)
File "/opt/xxx-secure/xxx/backend/venv/lib/python3.10/site-packages/celery/result.py", line 251, in get
return self.backend.wait_for_pending(
File "/opt/xxx-secure/xxx/backend/venv/lib/python3.10/site-packages/celery/backends/asynchronous.py", line 221, in wait_for_pending
for _ in self._wait_for_pending(result, **kwargs):
File "/opt/xxx-secure/xxx/backend/venv/lib/python3.10/site-packages/celery/backends/asynchronous.py", line 287, in _wait_for_pending
for _ in self.drain_events_until(
File "/opt/xxx-secure/xxx/backend/venv/lib/python3.10/site-packages/celery/backends/asynchronous.py", line 54, in drain_events_until
yield self.wait_for(p, wait, timeout=interval)
File "/opt/xxx-secure/xxx/backend/venv/lib/python3.10/site-packages/celery/backends/asynchronous.py", line 63, in wait_for
wait(timeout=timeout)
File "/opt/xxx-secure/xxx/backend/venv/lib/python3.10/site-packages/celery/backends/rpc.py", line 59, in drain_events
return self._connection.drain_events(timeout=timeout)
File "/opt/xxx-secure/xxx/backend/venv/lib/python3.10/site-packages/kombu/connection.py", line 341, in drain_events
return self.transport.drain_events(self.connection, **kwargs)
File "/opt/xxx-secure/xxx/backend/venv/lib/python3.10/site-packages/kombu/transport/virtual/base.py", line 997, in drain_events
get(self._deliver, timeout=timeout)
File "/opt/xxx-secure/xxx/backend/venv/lib/python3.10/site-packages/kombu/transport/redis.py", line 591, in get
ret = self.handle_event(fileno, event)
File "/opt/xxx-secure/xxx/backend/venv/lib/python3.10/site-packages/kombu/transport/redis.py", line 573, in handle_event
return self.on_readable(fileno), self
File "/opt/xxx-secure/xxx/backend/venv/lib/python3.10/site-packages/kombu/transport/redis.py", line 569, in on_readable
chan.handlers[type]()
File "/opt/xxx-secure/xxx/backend/venv/lib/python3.10/site-packages/kombu/transport/redis.py", line 971, in _brpop_read
dest, item = dest__item
ValueError: too many values to unpack (expected 2)
2024-01-31 19:58:04.000
Internal Server Error: /api/status/
Traceback (most recent call last):
File "/opt/xxx-secure/xxx/backend/venv/lib/python3.10/site-packages/django/core/handlers/exception.py", line 55, in inner
response = get_response(request)
File "/opt/xxx-secure/xxx/backend/venv/lib/python3.10/site-packages/django/core/handlers/base.py", line 197, in _get_response
response = wrapped_callback(request, *callback_args, **callback_kwargs)
File "/opt/xxx-secure/xxx/backend/venv/lib/python3.10/site-packages/sentry_sdk/integrations/django/views.py", line 84, in sentry_wrapped_callback
return callback(request, *args, **kwargs)
File "/opt/xxx-secure/xxx/backend/status/views.py", line 11, in status
for s in StatusCheck.collect()
File "/opt/xxx-secure/xxx/backend/status/status.py", line 118, in collect
return [
File "/opt/xxx-secure/xxx/backend/status/status.py", line 118, in <listcomp>
return [
File "/opt/xxx-secure/xxx/backend/status/status.py", line 121, in <genexpr>
check.maybe_run(run_all_checks) for check in cls.checks.values()
File "/opt/xxx-secure/xxx/backend/status/status.py", line 69, in maybe_run
return self.run()
File "/opt/xxx-secure/xxx/backend/status/status.py", line 75, in run
self.func(self, *self.args)
File "/opt/xxx-secure/xxx/backend/status/status.py", line 274, in check_celery
check_celery_task.apply_async(expires=180).get(propagate=False)
File "/opt/xxx-secure/xxx/backend/venv/lib/python3.10/site-packages/celery/result.py", line 251, in get
return self.backend.wait_for_pending(
File "/opt/xxx-secure/xxx/backend/venv/lib/python3.10/site-packages/celery/backends/asynchronous.py", line 221, in wait_for_pending
for _ in self._wait_for_pending(result, **kwargs):
File "/opt/xxx-secure/xxx/backend/venv/lib/python3.10/site-packages/celery/backends/asynchronous.py", line 287, in _wait_for_pending
for _ in self.drain_events_until(
File "/opt/xxx-secure/xxx/backend/venv/lib/python3.10/site-packages/celery/backends/asynchronous.py", line 54, in drain_events_until
yield self.wait_for(p, wait, timeout=interval)
File "/opt/xxx-secure/xxx/backend/venv/lib/python3.10/site-packages/celery/backends/asynchronous.py", line 63, in wait_for
wait(timeout=timeout)
File "/opt/xxx-secure/xxx/backend/venv/lib/python3.10/site-packages/celery/backends/rpc.py", line 59, in drain_events
return self._connection.drain_events(timeout=timeout)
File "/opt/xxx-secure/xxx/backend/venv/lib/python3.10/site-packages/kombu/connection.py", line 341, in drain_events
return self.transport.drain_events(self.connection, **kwargs)
File "/opt/xxx-secure/xxx/backend/venv/lib/python3.10/site-packages/kombu/transport/virtual/base.py", line 997, in drain_events
get(self._deliver, timeout=timeout)
File "/opt/xxx-secure/xxx/backend/venv/lib/python3.10/site-packages/kombu/transport/redis.py", line 584, in get
self._register_BRPOP(channel)
File "/opt/xxx-secure/xxx/backend/venv/lib/python3.10/site-packages/kombu/transport/redis.py", line 525, in _register_BRPOP
channel._brpop_start()
File "/opt/xxx-secure/xxx/backend/venv/lib/python3.10/site-packages/kombu/transport/redis.py", line 957, in _brpop_start
self.client.connection.send_command(*command_args)
File "/opt/xxx-secure/xxx/backend/venv/lib/python3.10/site-packages/redis/connection.py", line 464, in send_command
self.send_packed_command(
File "/opt/xxx-secure/xxx/backend/venv/lib/python3.10/site-packages/redis/connection.py", line 437, in send_packed_command
self.check_health()
File "/opt/xxx-secure/xxx/backend/venv/lib/python3.10/site-packages/redis/connection.py", line 429, in check_health
self.retry.call_with_retry(self._send_ping, self._ping_failed)
File "/opt/xxx-secure/xxx/backend/venv/lib/python3.10/site-packages/redis/retry.py", line 51, in call_with_retry
raise error
File "/opt/xxx-secure/xxx/backend/venv/lib/python3.10/site-packages/redis/retry.py", line 46, in call_with_retry
return do()
File "/opt/xxx-secure/xxx/backend/venv/lib/python3.10/site-packages/redis/connection.py", line 420, in _send_ping
raise ConnectionError("Bad response from PING health check")
redis.exceptions.ConnectionError: Bad response from PING health check
Best I can tell from the logs, there is nothing else going on before during or after this error, anywhere (django, celery, redis), for at least one minute.
The text was updated successfully, but these errors were encountered:
Checklist
main
branch of Celery.contribution guide
on reporting bugs.
for similar or identical bug reports.
for existing proposed fixes.
to find out if the bug was already fixed in the main branch.
in this issue (If there are none, check this box anyway).
Mandatory Debugging Information
celery -A proj report
in the issue.(if you are not able to do this, then at least specify the Celery
version affected).
main
branch of Celery.pip freeze
in the issue.to reproduce this bug.
Optional Debugging Information
and/or implementation.
result backend.
broker and/or result backend.
ETA/Countdown & rate limits disabled.
and/or upgrading Celery and its dependencies.
Related Issues and Possible Duplicates
Related Issues
Environment & Settings
Celery version: 5.3.6 (emerald-rush)
celery report
Output:software -> celery:5.3.6 (emerald-rush) kombu:5.3.5 py:3.10.12
billiard:4.2.0 redis:5.0.1
platform -> system:Linux arch:64bit, ELF
kernel version:5.15.0-76-generic imp:CPython
loader -> celery.loaders.app.AppLoader
settings -> transport:redis results:rpc:///
Steps to Reproduce
Required Dependencies
Python Packages
pip freeze
Output:amqp==5.2.0 arrow==1.3.0 asgiref==3.7.2 async-timeout==4.0.3 attrs==23.2.0 Babel==2.14.0 beautifulsoup4==4.12.3 billiard==4.2.0 bleach==6.1.0 boto3==1.34.13 botocore==1.34.24 build==1.0.3 celery==5.3.6 certifi==2023.11.17 cffi==1.16.0 charset-normalizer==3.3.2 clamd==1.0.2 click==8.1.7 click-didyoumean==0.3.0 click-plugins==1.1.1 click-repl==0.3.0 cryptography==42.0.0 Django==4.2.9 django-appconf==1.0.6 django-cryptography==1.1 django-filter==23.5 django-ordered-model==3.7.4 django-redis==5.4.0 django-simple-history==3.4.0 django-solo==2.2.0 django-storages==1.14.2 djangorestframework==3.14.0 djangorestframework-simplejwt==5.3.1 dnspython==2.5.0 docx2txt==0.8 docxcompose==1.4.0 docxtpl==0.16.7 drf-excel==2.4.0 drf-writable-nested==0.7.0 et-xmlfile==1.1.0 eventlet==0.34.3 greenlet==3.0.3 gunicorn==21.2.0 hiredis==2.3.2 html5lib==1.1 idna==3.6 isodate==0.6.1 Jinja2==3.1.3 jmespath==1.0.1 jsonschema==4.21.0 jsonschema-specifications==2023.12.1 kombu==5.3.5 lxml==4.9.4 MarkupSafe==2.1.4 more-itertools==10.2.0 numpy==1.26.3 openpyxl==3.1.2 opensearch-py==2.4.2 packaging==23.2 pdfminer.six==20221105 pdfplumber==0.10.3 pillow==10.2.0 pip-tools==7.3.0 prompt-toolkit==3.0.43 psutil==5.9.8 psycopg==3.1.17 psycopg-c==3.1.17 py3langid==0.2.2 pycparser==2.21 pycryptodome==3.20.0 pydenticon==0.3.1 PyJWT==2.8.0 pypdfium2==4.26.0 pyproject_hooks==1.0.0 python-datemath==1.5.5 python-dateutil==2.8.2 python-docx==1.1.0 python-magic==0.4.27 python3-saml==1.16.0 pytz==2023.3.post1 pyzstd==0.15.9 redis==5.0.1 referencing==0.32.1 regex==2023.12.25 requests==2.31.0 requests-aws4auth==1.2.3 rpds-py==0.17.1 s3cmd==2.4.0 s3transfer==0.10.0 sentry-sdk==1.39.1 six==1.16.0 soupsieve==2.5 sqlparse==0.4.4 tinycss2==1.2.1 tomli==2.0.1 types-python-dateutil==2.8.19.20240106 typing_extensions==4.9.0 tzdata==2023.4 urllib3==2.0.7 vine==5.1.0 wcwidth==0.2.13 webcolors==1.13 webencodings==0.5.1 xmlsec==1.3.13
Other Dependencies
N/A
Minimally Reproducible Test Case
Expected Behavior
We have about 100 identical Linux servers running a Django application that uses celery with Redis storage.
Since we switched from processes to
-P threads
(or also-P greenlet
: same problem), we have been seeing an errorapprox once per day on some of the servers.
Actual Behavior
Best I can tell from the logs, there is nothing else going on before during or after this error, anywhere (django, celery, redis), for at least one minute.
The text was updated successfully, but these errors were encountered: