Skip to content
Permalink
Browse files

issue #199: ansible: stop writing temp files for new style modules

While adding support for non-new style module types, NewStyleRunner
began writing modules to a temporary file, and sys.argv was patched to
actually include the script filename. The argv change was never required
to fix any particular bug, and a search of the standard modules reveals
no argv users. Update argv[0] to be '', like an interactive interpreter
would have.

While fixing #210, new style runner began setting __file__ to the
temporary file path in order to allow apt.py to discover the Ansiballz
temporary directory. 5 out of 1,516 standard modules follow this
pattern, but in each case, none actually attempt to access __file__,
they just call dirname on it. Therefore do not write the contents of
file, simply set it to the path as it would exist, within a real
temporary directory.

Finally move temporary directory creation out of runner and into target.
Now a single directory exists for the duration of a run, and is emptied
by runner.py as necessary after each task invocation.

This could be further extended to stop rewriting non-new-style modules
in a with_items loop, but that's another step.

Finally the last bullet point in the documentation almost isn't a lie
again.
  • Loading branch information...
dw committed May 4, 2018
1 parent 43e9e51 commit f9e1905ec6d81d00f1ab38cb2b7b4e3c0e729664
@@ -74,7 +74,7 @@ def reopen_readonly(fp):
"""
Replace the file descriptor belonging to the file object `fp` with one
open on the same file (`fp.name`), but opened with :py:data:`os.O_RDONLY`.
This enables temporary files to be executed on Linux, which usually theows
This enables temporary files to be executed on Linux, which usually throws
``ETXTBUSY`` if any writeable handle exists pointing to a file passed to
`execve()`.
"""
@@ -106,14 +106,6 @@ def __init__(self, module, remote_tmp, service_context,
self.raw_params = raw_params
self.args = args
self.env = env
self._temp_dir = None

def get_temp_dir(self):
if not self._temp_dir:
self._temp_dir = tempfile.mkdtemp(prefix='ansible_mitogen_')
# https://github.com/dw/mitogen/issues/239
#ansible_mitogen.target.make_temp_directory(self.remote_tmp)
return self._temp_dir

def setup(self):
"""
@@ -128,8 +120,16 @@ def revert(self):
implementation simply restores the original environment.
"""
self._env.revert()
if self._temp_dir:
shutil.rmtree(self._temp_dir)
self._cleanup_temp()

def _cleanup_temp(self):
for name in os.listdir(ansible_mitogen.target.temp_dir):
if name in ('.', '..'):
continue

path = os.path.join(ansible_mitogen.target.temp_dir, name)
LOG.debug('Deleting %r', path)
ansible_mitogen.target.prune_tree(path)

def _run(self):
"""
@@ -214,13 +214,15 @@ def setup(self):
super(ProgramRunner, self).setup()
self._setup_program()

program_fp = None

def _setup_program(self):
"""
Create a temporary file containing the program code. The code is
fetched via :meth:`_get_program`.
"""
name = 'ansible_module_' + os.path.basename(self.path)
path = os.path.join(self.get_temp_dir(), name)
path = os.path.join(ansible_mitogen.target.temp_dir, name)
self.program_fp = open(path, 'wb')
self.program_fp.write(self._get_program())
self.program_fp.flush()
@@ -247,7 +249,8 @@ def revert(self):
"""
Delete the temporary program file.
"""
self.program_fp.close()
if self.program_fp:
self.program_fp.close()
super(ProgramRunner, self).revert()

def _run(self):
@@ -284,7 +287,7 @@ def _setup_args(self):
self.args_fp = tempfile.NamedTemporaryFile(
prefix='ansible_mitogen',
suffix='-args',
dir=self.get_temp_dir(),
dir=ansible_mitogen.target.temp_dir,
)
self.args_fp.write(self._get_args_contents())
self.args_fp.flush()
@@ -361,37 +364,55 @@ class NewStyleRunner(ScriptRunner):
def setup(self):
super(NewStyleRunner, self).setup()
self._stdio = NewStyleStdio(self.args)
self._argv = TemporaryArgv([self.program_fp.name])
# It is possible that not supplying the script filename will break some
# module, but this has never been a bug report. Instead act like an
# interpreter that had its script piped on stdin.
self._argv = TemporaryArgv([''])

def revert(self):
self._argv.revert()
self._stdio.revert()
super(NewStyleRunner, self).revert()

def _setup_args(self):
pass

def _setup_program(self):
pass

def _get_code(self):
self.source = ansible_mitogen.target.get_file(
context=self.service_context,
path=self.path,
)

try:
return self._code_by_path[self.path]
except KeyError:
return self._code_by_path.setdefault(self.path, compile(
source=ansible_mitogen.target.get_file(
context=self.service_context,
path=self.path,
),
source=self.source,
filename=self.path,
mode='exec',
dont_inherit=True,
))

def _run(self):
code = self._get_code()

mod = types.ModuleType('__main__')
mod.__file__ = self.program_fp.name
mod.__package__ = None
d = vars(mod)
# Some Ansible modules use __file__ to find the Ansiballz temporary
# directory. We must provide some temporary path in __file__, but we
# don't want to pointlessly write the module to disk when it never
# actually needs to exist. So just pass the filename as it would exist.
mod.__file__ = os.path.join(
ansible_mitogen.target.temp_dir,
'ansible_module_' + os.path.basename(self.path),
)
e = None

try:
exec code in d, d
exec code in vars(mod)
except SystemExit, e:
pass

@@ -270,7 +270,7 @@ def _connect(self, key, spec, via=None):

# We don't need to wait for the result of this. Ideally we'd check its
# return value somewhere, but logs will catch a failure anyway.
context.call_async(ansible_mitogen.target.start_fork_parent)
context.call_async(ansible_mitogen.target.init_child)

if os.environ.get('MITOGEN_DUMP_THREAD_STACKS'):
from mitogen import debug
@@ -33,6 +33,7 @@

from __future__ import absolute_import
import cStringIO
import errno
import grp
import json
import logging
@@ -58,6 +59,10 @@

LOG = logging.getLogger(__name__)

#: Set by init_child() to the single temporary directory that will exist for
#: the duration of the process.
temp_dir = None

#: Caching of fetched file data.
_file_cache = {}

@@ -191,8 +196,67 @@ def transfer_file(context, in_path, out_path, sync=False, set_owner=False):
os.utime(out_path, (metadata['atime'], metadata['mtime']))


def prune_tree(path):
"""
Like shutil.rmtree(), but log errors rather than discard them, and do not
waste multiple os.stat() calls discovering whether the object can be
deleted, just try deleting it instead.
"""
try:
os.unlink(path)
return
except OSError, e:
if not (os.path.isdir(path) and
e.args[0] in (errno.EPERM, errno.EISDIR)):
LOG.error('prune_tree(%r): %s', path, e)
return

try:
# Ensure write access for readonly directories. Ignore error in case
# path is on a weird filesystem (e.g. vfat).
os.chmod(path, int('0700', 8))
except OSError, e:
LOG.warning('prune_tree(%r): %s', path, e)

try:
for name in os.listdir(path):
if name not in ('.', '..'):
prune_tree(os.path.join(path, name))
os.rmdir(path)
except OSError, e:
LOG.error('prune_tree(%r): %s', path, e)


def _on_broker_shutdown():
"""
Respond to broker shutdown (graceful termination by parent, or loss of
connection to parent) by deleting our sole temporary directory.
"""
prune_tree(temp_dir)


def reset_temp_dir(econtext):
"""
Create one temporary directory to be reused by all runner.py invocations
for the lifetime of the process. The temporary directory is changed for
each forked job, and emptied as necessary runner.py::_cleanup_temp() after
each module invocation.
The result is that a context need only create and delete one directory
during startup and shutdown, and no further filesystem writes need occur
assuming no modules execute that create temporary files.
"""
global temp_dir
# https://github.com/dw/mitogen/issues/239
temp_dir = tempfile.mkdtemp(prefix='ansible_mitogen_')

# This must be reinstalled in forked children too, since the Broker
# instance from the parent process does not carry over to the new child.
mitogen.core.listen(econtext.broker, 'shutdown', _on_broker_shutdown)


@mitogen.core.takes_econtext
def start_fork_parent(econtext):
def init_child(econtext):
"""
Called by ContextService immediately after connection; arranges for the
(presently) spotless Python interpreter to be forked, where the newly
@@ -206,6 +270,7 @@ def start_fork_parent(econtext):
global _fork_parent
mitogen.parent.upgrade_router(econtext)
_fork_parent = econtext.router.fork()
reset_temp_dir(econtext)


@mitogen.core.takes_econtext
@@ -321,6 +386,7 @@ def run_module_async(job_id, kwargs, econtext):
"""
try:
try:
reset_temp_dir(econtext)
_run_module_async(job_id, kwargs, econtext)
except Exception:
LOG.exception('_run_module_async crashed')
@@ -331,7 +397,9 @@ def run_module_async(job_id, kwargs, econtext):
def make_temp_directory(base_dir):
"""
Handle creation of `base_dir` if it is absent, in addition to a unique
temporary directory within `base_dir`.
temporary directory within `base_dir`. This is the temporary directory that
becomes 'remote_tmp', not the one used by Ansiballz. It always uses the
system temporary directory.
:returns:
Newly created temporary directory.
@@ -14,6 +14,7 @@ def main():
module = AnsibleModule(argument_spec={})
module.exit_json(
argv=sys.argv,
__file__=__file__,
argv_types=[str(type(s)) for s in sys.argv],
env=dict(os.environ),
cwd=os.getcwd(),

0 comments on commit f9e1905

Please sign in to comment.
You can’t perform that action at this time.