Skip to content

Commit

Permalink
Worked around Python memory leaks if toaster runs for a long time (li…
Browse files Browse the repository at this point in the history
…mitation: multithreading must be enabled for this to work, e.g. specify --jobs 2 or higher).
  • Loading branch information
amorilia committed Dec 29, 2009
1 parent 121f0fb commit 37605b1
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 16 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.rst
Expand Up @@ -28,6 +28,9 @@ Release 2.0.6 (in development)

* New --jobs toaster option which enables multithreaded toasting.

* Added workaround for memory leaks (at the moment requires --jobs >= 2 to be
functional).

* The niftoaster opt_geometry spell now always skips nif files when a
similarly named tri or egm file is found.

Expand Down
63 changes: 48 additions & 15 deletions pyffi/spells/__init__.py
Expand Up @@ -138,6 +138,7 @@
from copy import deepcopy
from cStringIO import StringIO
import gc
from itertools import izip
import logging # Logger
try:
import multiprocessing # Pool
Expand All @@ -146,7 +147,7 @@
multiprocessing = None
import optparse
import os
import os.path
import os.path # getsize, split, join
import re # for regex parsing (--skip, --only)
import subprocess
import sys # sys.stdout
Expand Down Expand Up @@ -571,7 +572,7 @@ class Toaster(object):
createpatch=False, applypatch=False, diffcmd="", patchcmd="",
series=False,
skip=[], only=[],
jobs=1)
jobs=1, refresh=32)

"""List of spell classes of the particular :class:`Toaster` instance."""

Expand Down Expand Up @@ -850,8 +851,8 @@ def cli(self):
parser.add_option(
"-j", "--jobs", dest="jobs",
type="int",
metavar="N",
help="allow N jobs at once [default: %default]")
metavar="JOBS",
help="allow JOBS jobs at once [default: %default]")
parser.add_option(
"--noninteractive", dest="interactive",
action="store_false",
Expand Down Expand Up @@ -892,6 +893,15 @@ def cli(self):
"-r", "--raise", dest="raisetesterror",
action="store_true",
help="raise exception on errors during the spell (for debugging)")
parser.add_option(
"--refresh", dest="refresh",
type="int",
metavar="REFRESH",
help=
"start new process pool every JOBS * REFRESH files"
" if JOBS is 2 or more"
" (when processing a large number of files, this prevents"
" leaking memory on some operating systems) [default: %default]")
parser.add_option(
"--series", dest="series",
action="store_true",
Expand Down Expand Up @@ -1009,6 +1019,23 @@ def toast(self, top):
:type top: str
"""

def file_pools(chunksize):
"""Helper function which generates list of files, sorted by size,
in chunks of given size.
"""
all_files = pyffi.utils.walk(
top, onerror=None,
re_filename=self.FILEFORMAT.RE_FILENAME)
file_pool = True
while file_pool:
# fetch chunksize files from all files
file_pool = [
filename for i, filename in izip(
xrange(chunksize), all_files)]
# sort files by size
file_pool.sort(key=os.path.getsize, reverse=True)
yield file_pool

# toast entry code
if not self.spellclass.toastentry(self):
self.msg("spell does not apply! quiting early...")
Expand Down Expand Up @@ -1069,18 +1096,24 @@ def toast(self, top):
pool_options = deepcopy(self.options)
pool_options["jobs"] = 1
pool_options["interactive"] = False
chunksize = self.options["refresh"] * self.options["jobs"]
self.msg("toasting with %i threads" % jobs)
pool = multiprocessing.Pool(processes=jobs)
result = pool.map_async(
_toaster_job,
((self.__class__, filename, pool_options, self.spellnames)
for filename in pyffi.utils.walk(
top, onerror=None,
re_filename=self.FILEFORMAT.RE_FILENAME)))
# specify timeout, so CTRL-C works
# 99999999 is about 3 years, should be long enough... :-)
result.get(timeout=99999999)

for file_pool in file_pools(chunksize):
self.logger.debug("process file pool:")
for filename in file_pool:
self.logger.debug(" " + filename)
pool = multiprocessing.Pool(processes=jobs)
# force chunksize=1 for the pool
# this makes sure that the largest files (which come first
# in the pool) are processed in parallel
result = pool.map_async(
_toaster_job,
((self.__class__, filename, pool_options, self.spellnames)
for filename in file_pool),
chunksize=1)
# specify timeout, so CTRL-C works
# 99999999 is about 3 years, should be long enough... :-)
result.get(timeout=99999999)

# toast exit code
self.spellclass.toastexit(self)
Expand Down
6 changes: 5 additions & 1 deletion tests/nif/niftoaster.txt
Expand Up @@ -36,7 +36,7 @@ Options:
except those specified under --exclude; include
multiple block types by specifying this option more
than once
-j N, --jobs=N allow N jobs at once [default: 1]
-j JOBS, --jobs=JOBS allow JOBS jobs at once [default: 1]
--noninteractive non-interactive session (overwrites files without
warning)
--only=REGEX only toast files whose names (i) match the regular
Expand All @@ -52,6 +52,10 @@ Options:
overwriting the original
-r, --raise raise exception on errors during the spell (for
debugging)
--refresh=REFRESH start new process pool every JOBS * REFRESH files if
JOBS is 2 or more (when processing a large number of
files, this prevents leaking memory on some operating
systems) [default: 32]
--series run spells in series rather than in parallel
--skip=REGEX skip all files whose names match the regular
expression REGEX (takes precedence over --only); if
Expand Down

0 comments on commit 37605b1

Please sign in to comment.