Skip to content
Permalink
Browse files

issue #202: ansible: forget all dependent contexts on Stream disconnect

This is a partial fix, there are still at least 2 cases needing covered:

- In-progress connections must have CallError or similar sent to any
  waiters
- Once connection delegation exists, it is possible for other worker
  processes to be active (and in any step in the process), trying to
  communicate with a context that we know can no longer be communicated
  with. The solution to that isn't clear yet.

Additionally ensure root has /bin/bash shell in both Docker images.
  • Loading branch information...
dw committed Apr 21, 2018
1 parent c5fe817 commit dc4433ace6ce2f19b5dca3c25d7fb1de40a771ff
@@ -199,6 +199,29 @@ def shutdown_all(self):
self._shutdown(context)
self._lru_by_via = {}

def _on_stream_disconnect(self, stream):
"""
Respond to Stream disconnection by deleting any record of contexts
reached via that stream. This method runs in the Broker thread and must
not to block.
"""
# TODO: there is a race between creation of a context and disconnection
# of its related stream. An error reply should be sent to any message
# in _waiters_by_key below.
self._lock.acquire()
try:
for context, key in list(self._key_by_context.items()):
if context.context_id in stream.routes:
LOG.info('Dropping %r due to disconnect of %r',
context, stream)
self._response_by_key.pop(key, None)
self._waiters_by_key.pop(key, None)
self._refs_by_context.pop(context, None)
self._lru_by_via.pop(context, None)
self._refs_by_context.pop(context, None)
finally:
self._lock.release()

def _connect(self, key, method_name, **kwargs):
"""
Actual connect implementation. Arranges for the Mitogen connection to
@@ -240,14 +263,24 @@ def _connect(self, key, method_name, **kwargs):

if kwargs.get('via'):
self._update_lru(context, method_name=method_name, **kwargs)
else:
# For directly connected contexts, listen to the associated
# Stream's disconnect event and use it to invalidate dependent
# Contexts.
stream = self.router.stream_by_id(context.context_id)
mitogen.core.listen(stream, 'disconnect',
lambda: self._on_stream_disconnect(stream))

home_dir = context.call(os.path.expanduser, '~')

# We don't need to wait for the result of this. Ideally we'd check its
# return value somewhere, but logs will catch a failure anyway.
context.call_async(ansible_mitogen.target.start_fork_parent)

if os.environ.get('MITOGEN_DUMP_THREAD_STACKS'):
from mitogen import debug
context.call(debug.dump_to_logger)

self._key_by_context[context] = key
self._refs_by_context[context] = 0
return {
@@ -1 +1,2 @@
- import_playbook: lru_one_target.yml
- import_playbook: reconnection.yml
@@ -0,0 +1,30 @@
# Test ContextService ability to handle disconnections, including handling
# cleanup of dependent (via=) contexts.

- name: integration/context_service/reconnection.yml
hosts: all
any_errors_fatal: true
tasks:

- become: true
custom_python_detect_environment:
register: old_become_env

- become: true
# This must be >1 for vanilla Ansible.
shell: |
bash -c "( sleep 3; pkill -f sshd:; ) & disown"
- connection: local
shell: sleep 3

- wait_for_connection:

- become: true
custom_python_detect_environment:
register: new_become_env

# Verify the PIDs really changed (i.e. disconnection happened)
- assert:
that:
- old_become_env.pid != new_become_env.pid
@@ -35,6 +35,7 @@
DOCKERFILE = r"""
COPY data/001-mitogen.sudo /etc/sudoers.d/001-mitogen
RUN \
chsh -s /bin/bash && \
mkdir -p /var/run/sshd && \
echo i-am-mitogen-test-docker-image > /etc/sentinel && \
groupadd mitogen__sudo_nopw && \

0 comments on commit dc4433a

Please sign in to comment.
You can’t perform that action at this time.