Skip to content

Commit dc4433a

Browse files
committed
issue #202: ansible: forget all dependent contexts on Stream disconnect
This is a partial fix, there are still at least 2 cases needing covered: - In-progress connections must have CallError or similar sent to any waiters - Once connection delegation exists, it is possible for other worker processes to be active (and in any step in the process), trying to communicate with a context that we know can no longer be communicated with. The solution to that isn't clear yet. Additionally ensure root has /bin/bash shell in both Docker images.
1 parent c5fe817 commit dc4433a

File tree

4 files changed

+65
-0
lines changed

4 files changed

+65
-0
lines changed

ansible_mitogen/services.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,29 @@ def shutdown_all(self):
199199
self._shutdown(context)
200200
self._lru_by_via = {}
201201

202+
def _on_stream_disconnect(self, stream):
203+
"""
204+
Respond to Stream disconnection by deleting any record of contexts
205+
reached via that stream. This method runs in the Broker thread and must
206+
not to block.
207+
"""
208+
# TODO: there is a race between creation of a context and disconnection
209+
# of its related stream. An error reply should be sent to any message
210+
# in _waiters_by_key below.
211+
self._lock.acquire()
212+
try:
213+
for context, key in list(self._key_by_context.items()):
214+
if context.context_id in stream.routes:
215+
LOG.info('Dropping %r due to disconnect of %r',
216+
context, stream)
217+
self._response_by_key.pop(key, None)
218+
self._waiters_by_key.pop(key, None)
219+
self._refs_by_context.pop(context, None)
220+
self._lru_by_via.pop(context, None)
221+
self._refs_by_context.pop(context, None)
222+
finally:
223+
self._lock.release()
224+
202225
def _connect(self, key, method_name, **kwargs):
203226
"""
204227
Actual connect implementation. Arranges for the Mitogen connection to
@@ -240,14 +263,24 @@ def _connect(self, key, method_name, **kwargs):
240263

241264
if kwargs.get('via'):
242265
self._update_lru(context, method_name=method_name, **kwargs)
266+
else:
267+
# For directly connected contexts, listen to the associated
268+
# Stream's disconnect event and use it to invalidate dependent
269+
# Contexts.
270+
stream = self.router.stream_by_id(context.context_id)
271+
mitogen.core.listen(stream, 'disconnect',
272+
lambda: self._on_stream_disconnect(stream))
273+
243274
home_dir = context.call(os.path.expanduser, '~')
244275

245276
# We don't need to wait for the result of this. Ideally we'd check its
246277
# return value somewhere, but logs will catch a failure anyway.
247278
context.call_async(ansible_mitogen.target.start_fork_parent)
279+
248280
if os.environ.get('MITOGEN_DUMP_THREAD_STACKS'):
249281
from mitogen import debug
250282
context.call(debug.dump_to_logger)
283+
251284
self._key_by_context[context] = key
252285
self._refs_by_context[context] = 0
253286
return {
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
- import_playbook: lru_one_target.yml
2+
- import_playbook: reconnection.yml
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# Test ContextService ability to handle disconnections, including handling
2+
# cleanup of dependent (via=) contexts.
3+
4+
- name: integration/context_service/reconnection.yml
5+
hosts: all
6+
any_errors_fatal: true
7+
tasks:
8+
9+
- become: true
10+
custom_python_detect_environment:
11+
register: old_become_env
12+
13+
- become: true
14+
# This must be >1 for vanilla Ansible.
15+
shell: |
16+
bash -c "( sleep 3; pkill -f sshd:; ) & disown"
17+
18+
- connection: local
19+
shell: sleep 3
20+
21+
- wait_for_connection:
22+
23+
- become: true
24+
custom_python_detect_environment:
25+
register: new_become_env
26+
27+
# Verify the PIDs really changed (i.e. disconnection happened)
28+
- assert:
29+
that:
30+
- old_become_env.pid != new_become_env.pid

tests/build_docker_images.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
DOCKERFILE = r"""
3636
COPY data/001-mitogen.sudo /etc/sudoers.d/001-mitogen
3737
RUN \
38+
chsh -s /bin/bash && \
3839
mkdir -p /var/run/sshd && \
3940
echo i-am-mitogen-test-docker-image > /etc/sentinel && \
4041
groupadd mitogen__sudo_nopw && \

0 commit comments

Comments
 (0)