Celery work process consumes 100% CPU after running for several days #1558

dafang · 2013-09-26T07:30:08Z

After in production for around several days or weeks, some of the Celery worker process will in the state of consuming 100% CPU, following are what we found through the stack dump:

fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
epoll_wait(46, {{EPOLLIN|EPOLLHUP, {u32=55, u64=21512400263970871}}}, 1023, 1000) = 1
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
epoll_wait(46, {{EPOLLIN|EPOLLHUP, {u32=55, u64=21512400263970871}}}, 1023, 1000) = 1
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
epoll_wait(46, {{EPOLLIN|EPOLLHUP, {u32=55, u64=21512400263970871}}}, 1023, 1000) = 1
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
epoll_wait(46, {{EPOLLIN|EPOLLHUP, {u32=55, u64=21512400263970871}}}, 1023, 1000) = 1
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
epoll_wait(46, {{EPOLLIN|EPOLLHUP, {u32=55, u64=21512400263970871}}}, 1023, 1000) = 1
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
epoll_wait(46, {{EPOLLIN|EPOLLHUP, {u32=55, u64=21512400263970871}}}, 1023, 1000) = 1
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
epoll_wait(46, {{EPOLLIN|EPOLLHUP, {u32=55, u64=21512400263970871}}}, 1023, 1000) = 1
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
epoll_wait(46, {{EPOLLIN|EPOLLHUP, {u32=55, u64=21512400263970871}}}, 1023, 1000) = 1
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
epoll_wait(46, {{EPOLLIN|EPOLLHUP, {u32=55, u64=21512400263970871}}}, 1023, 1000) = 1
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
epoll_wait(46, {{EPOLLIN|EPOLLHUP, {u32=55, u64=21512400263970871}}}, 1023, 1000) = 1
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
epoll_wait(46, {{EPOLLIN|EPOLLHUP, {u32=55, u64=21512400263970871}}}, 1023, 1000) = 1
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
epoll_wait(46, {{EPOLLIN|EPOLLHUP, {u32=55, u64=21512400263970871}}}, 1023, 1000) = 1
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
epoll_wait(46, {{EPOLLIN|EPOLLHUP, {u32=55, u64=21512400263970871}}}, 1023, 1000) = 1
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
epoll_wait(46, {{EPOLLIN|EPOLLHUP, {u32=55, u64=21512400263970871}}}, 1023, 1000) = 1
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
epoll_wait(46, {{EPOLLIN|EPOLLHUP, {u32=55, u64=21512400263970871}}}, 1023, 1000) = 1
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
epoll_wait(46, {{EPOLLIN|EPOLLHUP, {u32=55, u64=21512400263970871}}}, 1023, 1000) = 1
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
epoll_wait(46, {{EPOLLIN|EPOLLHUP, {u32=55, u64=21512400263970871}}}, 1023, 1000) = 1
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
epoll_wait(46, {{EPOLLIN|EPOLLHUP, {u32=55, u64=21512400263970871}}}, 1023, 1000) = 1
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
epoll_wait(46, {{EPOLLIN|EPOLLHUP, {u32=55, u64=21512400263970871}}}, 1023, 1000) = 1
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
poll([{fd=57, events=POLLIN}], 1, 0)    = 0 (Timeout)
fcntl(57, F_GETFL)                      = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl(57, F_SETFL, O_RDWR)              = 0
fcntl(57, F_GETFL)                      = 0x2 (flags O_RDWR)
fcntl(57, F_SETFL, O_RDWR|O_NONBLOCK)   = 0
recvfrom(57, 0x30d03e4, 65536, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)

It seems that there is one EAGAIN error, but celery didn't handle it, so it continuously poll and read.

The text was updated successfully, but these errors were encountered:

dafang · 2013-09-27T07:32:02Z

@ask

After running Celery worker process for some days, we found that the Python process consumes the 100% CPU load. I used the strace to dump the runtime stacktrace, I found that the CPU 100% process continuously pool and read. It must be related with the Consumer.consume_message.

Do you have any suggestions how to debug this problem.

ask · 2013-09-27T20:54:21Z

What version of Celery is this?

dafang · 2013-09-28T00:45:34Z

@ask

I should give your more details:

Python: 2.7.3
Celery: 3.0.21
Server: Ubuntu 12.04 LTS

We have more than 10 servers and each is running around 20 Celery workers. We didn't directly start our Celery work from celeryd, instead, our Worker class wraps the Celery Worker object, see below:

class OurWorker(object):

    def __init__(self, hostname=None, loglevel=None, logfile=None, autoscale=None):
        self.hostname = hostname if hostname is not None else "our.worker.name"
        if not self.hostname.startswith("our.worker.name"):
            self.hostname = "our.worker.name.%s" % self.hostname
        self.hostname = "%s.%s" % (self.hostname, SOCKET_HOSTNAME)

        self.loglevel = loglevel if loglevel is not None  \
            else ("INFO", "DEBUG")[conf.PUSHD_DEBUG]
        self.logfile = logfile if logfile is not None else self.hostname
        self._worker = Worker(app=celery,
            hostname=self.hostname,
            include=["push.task.task"],
            loglevel=self.loglevel,
            queues=[conf.PUSHD_DISPATCH_QUEUE_NAME, 
                    conf.PUSHD_SCHEDULED_DISPATCH_QUEUE_NAME],
            autoscale=autoscale
        )
        self._worker.logfile = "%s/%s.log" % (conf.PUSHD_LOG_PATH, self.logfile)
        #This line is dangerous, do not use and do not delete
        #worker_ready.connect(self.dispatch_worker_ready)

    def start(self):
        """
        Start dispatch worker
        """
        self._worker.run()

After running for some weeks, some of the Celery worker starts consuming 100% CPU, it seems that the worker cannot get break from the following while..

while connection.more_to_read:
                        try:
                            events = poll(poll_timeout)
                        except ValueError:  # Issue 882
                            return
                        if not events:
                            on_poll_empty()
                        for fileno, event in events or ():
                            try:
                                if event & READ:
                                    readers[fileno](fileno, event)
                                if event & WRITE:
                                    writers[fileno](fileno, event)
                                if event & ERR:
                                    for handlermap in readers, writers:
                                        try:
                                            handlermap[fileno](fileno, event)
                                        except KeyError:
                                            pass
                            except (KeyError, Empty):
                                continue
                            except socket.error:
                                if self._state != CLOSE:  # pragma: no cover
                                    raise
                        if keep_draining:
                            drain_nowait()
                            poll_timeout = 0
                        else:
                            connection.more_to_read = False

dafang · 2013-09-30T04:07:13Z

@ask any updates? We are deeply using Celery now.

ask · 2013-10-01T12:15:09Z

I'm not sure what causes this, but the loop in question is now also rewritten. It could be interesting to know what file descriptor 57 is in this case (the number is likely to change between runs). You can use lsof to get this information , e.g.: lsof -p <pid> using the pid of the MainProcess.

dn0 · 2013-10-04T13:46:13Z

Found similar problem. Usually the CPU usage increases after worker reconnects to rabbitmq after some network issues. Celery version is 3.0.23 and BROKER_HEARTBEAT=10.

# truss -p 50782
...
/1:     pollsys(0xFFFFFD7FFFDF6BE0, 13, 0xFFFFFD7FFFDF6DA0, 0x00000000) = 3
/1:     fcntl(24, F_GETFL)                              = 2
/1:     fcntl(24, F_SETFL, FWRITE|FNONBLOCK)            = 0
/1:     recv(24, 0x01575034, 65536, 0)                  Err#11 EAGAIN
/1:     fcntl(24, F_GETFL)                              = 130
/1:     fcntl(24, F_SETFL, FWRITE)                      = 0
/1:     pollsys(0xFFFFFD7FFFDF6BE0, 13, 0xFFFFFD7FFFDF6DA0, 0x00000000) = 3
/1:     fcntl(24, F_GETFL)                              = 2
/1:     fcntl(24, F_SETFL, FWRITE|FNONBLOCK)            = 0
/1:     recv(24, 0x01575034, 65536, 0)                  Err#11 EAGAIN
/1:     fcntl(24, F_GETFL)                              = 130
/1:     fcntl(24, F_SETFL, FWRITE)                      = 0

# pfiles 50782
...
  24: S_IFSOCK mode:0666 dev:542,0 ino:23763 uid:0 gid:0 rdev:0,0
      O_RDWR
        SOCK_STREAM
        SO_KEEPALIVE,SO_SNDBUF(49152),SO_RCVBUF(128872)
        sockname: AF_INET 172.16.2.3  port: 61163
        peername: AF_INET 172.16.4.5  port: 5672

viaregio · 2013-10-25T08:36:33Z

I'm also having some issues with the cpu usage. I have celery version 3.0.24, django-celery 3.0.23 and kombu 2.5.16 installed. After seeing the following lines in the log file, I'm getting a python process (from celeryd) with 100% cpu usage:

[2013-10-25 01:57:54,073: ERROR/MainProcess] consumer: Connection to broker lost. Trying to re-establish the connection...
Traceback (most recent call last):
  File "/Users/uli/Entwicklung/WebApps/django/lib/python2.7/site-packages/celery/worker/consumer.py", line 410, in start
    self.consume_messages()
  File "/Users/uli/Entwicklung/WebApps/django/lib/python2.7/site-packages/celery/worker/consumer.py", line 495, in consume_messages
    readers[fileno](fileno, event)
  File "/Users/uli/Entwicklung/WebApps/django/lib/python2.7/site-packages/kombu/connection.py", line 292, in drain_nowait
    self.drain_events(timeout=0)
  File "/Users/uli/Entwicklung/WebApps/django/lib/python2.7/site-packages/kombu/connection.py", line 281, in drain_events
    return self.transport.drain_events(self.connection, **kwargs)
  File "/Users/uli/Entwicklung/WebApps/django/lib/python2.7/site-packages/kombu/transport/pyamqp.py", line 94, in drain_events
    return connection.drain_events(**kwargs)
  File "/Users/uli/Entwicklung/WebApps/django/lib/python2.7/site-packages/amqp/connection.py", line 286, in drain_events
    return amqp_method(channel, args)
  File "/Users/uli/Entwicklung/WebApps/django/lib/python2.7/site-packages/amqp/connection.py", line 491, in _close
    raise ConnectionError(reply_code, reply_text, (class_id, method_id))
ConnectionError: 541: (INTERNAL_ERROR, (0, 0), None)
[2013-10-25 01:57:58,195: INFO/MainProcess] consumer: Connected to amqp://uli@127.0.0.1:5672/webapps.

celeryd is started by the following command:

python manage.py celeryd -B -E

dn0 · 2013-10-25T10:24:18Z

I'm testing celery dev version for a week or so and today I tried to stop/kill and start the rabbitmq service, after which the workers with default pool class were having high CPU usage and truss (running on solaris) showed only lot of pollsys() calls.

ask · 2013-10-25T10:59:22Z

Thanks @dn0

Do you people use py-amqp or librabbitmq?

Celery uses poll/select/epoll to see if the socket is readable,
it seems that in this case it always returns that the amqp socket is readable, but when it tries to read from
it always says that there is nothing to read, causing an insanely fast spinloop going like this (pseudocode):

while 1:
    readable = poll(fds, timeout=1)   # always returns immediately
    try:
        readable.recv()  # always raises EAGAIN
    except socket.error as exc:
        if exc.errno != errno.EAGAIN:
            raise

Currently I have no idea how this happens, maybe there is some way to detect that the socket is broken
or something that we are not doing.

ask · 2013-10-25T11:08:37Z

It seems it happens because the socket is disconnected, so I think I may have a solution for this.

dn0 · 2013-10-25T11:17:03Z

I'm using latest amqp from github.

viaregio · 2013-10-25T11:24:32Z

I had amqp 1.0.13 installed.

dafang · 2013-10-25T11:33:56Z

We are using librabbitmq.

@ask, what's the solution? may share your thoughts?

currently, we are using another monitor script to restart our workers if we found the CPU is 100%

ask · 2013-10-25T11:34:00Z

Thanks, I have tried to improve disconnection detection in the development version.
To test it you have to install py-amqp master, kombu master and celery master.

There is a second way to fix this and that is to simply count the number of errors and reconnect if it
exceeds a limit. But I want to make sure we are handling the obvious cases first.

ask · 2013-10-25T11:40:32Z

Also found a discussion that seems relevant here: http://trac.wxwidgets.org/ticket/7504

ask · 2013-10-25T12:23:39Z

@dafang: You should also upgrade to librabbitmq 1.0.2 then, as the ChannelError('bad frame read') is now a ConnectionError which is necessary in celery 3.1 for the connection to be restablished.

ask · 2013-10-25T12:30:26Z

I think I may have found a bug where this could happen,

the select eventio implementation stores the raw socket objects when registering, but the poller works in filenos,
so poller.unregister(fileno) would not properly unregister the sockets as it would not match the socket object
(pyamqp registers a socket, librabbitmq registers a fileno, so pyamqp specific)

That would not explain the problem when using epoll/kqueue though (linux/bsd), but in latest kombu it will now check that the socket is connected before continuing. There is no safe way to verify a socket, but the latest amqp will keep a 'connected' flag that is reset whenever a connection related error occurs while reading/writing to the socket.librabbitmq already implemented Connection.connected so no change required there (just have to upgrade kombu)

ask · 2013-10-30T12:07:11Z

Anyone managed to test this yet?

dafang · 2013-10-30T12:31:01Z

@ask We will upgrade the kombu to the latest stable version on our production, will try and see whether this can fix the issue. Will post back after we monitor the server resource for some days.

nbleisch · 2013-10-31T14:32:56Z

It seems that I've got the same issue. I'll try it with 3.1 in the next few days. Any idea when 3.1 will be released?
Also, I like to appreciate your work.

dn0 · 2013-11-01T11:53:22Z

@ask I've updated celery/kombu/... from master 2 or 3 days ago and now I tried to stop/start rabbitmq and everything went fine - after reconnecting the CPU usage stayed low.

ghost · 2013-11-07T23:19:51Z

I have the same issue.

The server runs for some days while the memory usage of the 31 erlang processes grows (slowly but it grows):
This is the process: /usr/lib/erlang/erts-5.8.5/bin/beam

Suddenly the celery process consumes 100% cpu.

We are using just the default configuration for rabbitmq with a single queue and only two different processes that run every 10 seconds and two processes that run every day.

dpkg --list | grep rabbit
ii rabbitmq-server 3.2.0-1 AMQP server written in Erlang

pip freeze | grep rabbit
librabbitmq==1.0.2

pip freeze | grep celery
celery==3.0.24
django-celery==3.0.23

I am happy to provide more info to help fixing this issue.

Update:

Same issue after updating.

dpkg --list | grep rabbit
ii rabbitmq-server 3.2.1-1 AMQP server written in Erlang

pip freeze | grep rabbit
librabbitmq==1.0.2

pip freeze | grep celery
celery==3.1.0
django-celery==3.1.0

dafang · 2013-12-29T06:40:28Z

@ask We have updated kombu to the latest version, and the code is running on our production for one and a half month, no CPU issues.

I think we can close this issue now.

rahul16101989 · 2015-05-07T11:47:20Z

I have upgrade alll software that is required for celery . but CPU consuption become high.
Any help related it ?

ionelmc · 2015-05-07T11:53:39Z

@rahul16101989 Hey, can you make a new issue with details that would help in reproducing the issue? Celery settings, package versions, logs, strace logs can help.

pyankoff · 2016-11-02T14:18:37Z

I had the same issue with Celery 3.1.25 / Kombu 3.0.37.
Seems like update to Celery 4.0.0rc6 helped solve it.

amille44420 · 2016-11-22T11:29:46Z

I met the issue with Celery 4.0.0 / Kombu 4.0.0. I didn't meet such issue before to upgrade celery from 3.x to 4 and I was running with RabbitMQ as broker. My CPU usage on the celery worker went from 3% to 11% in 24H atfer the upgrade (get close to 40% after 3 days). However, on the migration I switched for Redis instead. I'm going to check that out with RabbitMQ to see if the issue is linked to the broker.

amille44420 · 2016-11-23T08:08:41Z

Yesterday I restarted Celery using RabbitMQ as worker to check out if the increasing usage of CPU comes from the Redis broker. The results this morning is I've the same issue with RabbitMQ.

As I said on my previous post, I didn't have the issue before the upgrade to Celery 4.X. I'm using Celery beat to schedule periodic tasks if that may be useful.

amille44420 · 2016-12-09T14:05:43Z

FYI: The issue is gone with Kombu 4.0.1

dafang mentioned this issue Sep 27, 2013

Long-running workers eat 100% cpu (ffmpeg) #653

Closed

ionelmc closed this as completed Mar 24, 2014

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Celery work process consumes 100% CPU after running for several days #1558

Celery work process consumes 100% CPU after running for several days #1558

dafang commented Sep 26, 2013

dafang commented Sep 27, 2013

ask commented Sep 27, 2013

dafang commented Sep 28, 2013

dafang commented Sep 30, 2013

ask commented Oct 1, 2013

dn0 commented Oct 4, 2013

viaregio commented Oct 25, 2013

dn0 commented Oct 25, 2013

ask commented Oct 25, 2013

ask commented Oct 25, 2013

dn0 commented Oct 25, 2013

viaregio commented Oct 25, 2013

dafang commented Oct 25, 2013

ask commented Oct 25, 2013

ask commented Oct 25, 2013

ask commented Oct 25, 2013

ask commented Oct 25, 2013

ask commented Oct 30, 2013

dafang commented Oct 30, 2013

nbleisch commented Oct 31, 2013

dn0 commented Nov 1, 2013

ghost commented Nov 7, 2013

dafang commented Dec 29, 2013

rahul16101989 commented May 7, 2015

ionelmc commented May 7, 2015

pyankoff commented Nov 2, 2016

amille44420 commented Nov 22, 2016 •

edited

amille44420 commented Nov 23, 2016

amille44420 commented Dec 9, 2016

Celery work process consumes 100% CPU after running for several days #1558

Celery work process consumes 100% CPU after running for several days #1558

Comments

dafang commented Sep 26, 2013

dafang commented Sep 27, 2013

ask commented Sep 27, 2013

dafang commented Sep 28, 2013

dafang commented Sep 30, 2013

ask commented Oct 1, 2013

dn0 commented Oct 4, 2013

viaregio commented Oct 25, 2013

dn0 commented Oct 25, 2013

ask commented Oct 25, 2013

ask commented Oct 25, 2013

dn0 commented Oct 25, 2013

viaregio commented Oct 25, 2013

dafang commented Oct 25, 2013

ask commented Oct 25, 2013

ask commented Oct 25, 2013

ask commented Oct 25, 2013

ask commented Oct 25, 2013

ask commented Oct 30, 2013

dafang commented Oct 30, 2013

nbleisch commented Oct 31, 2013

dn0 commented Nov 1, 2013

ghost commented Nov 7, 2013

dafang commented Dec 29, 2013

rahul16101989 commented May 7, 2015

ionelmc commented May 7, 2015

pyankoff commented Nov 2, 2016

amille44420 commented Nov 22, 2016 • edited

amille44420 commented Nov 23, 2016

amille44420 commented Dec 9, 2016

amille44420 commented Nov 22, 2016 •

edited