Skip to content

Commit

Permalink
fix file handle leak in subprocess, but not ideal for Python 2.6 beca…
Browse files Browse the repository at this point in the history
  • Loading branch information
daler committed Sep 10, 2011
1 parent c342998 commit 703f7e2
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 9 deletions.
22 changes: 14 additions & 8 deletions pybedtools/bedtool.py
Expand Up @@ -10,8 +10,8 @@
from itertools import groupby, islice

from pybedtools.helpers import get_tempdir, _tags,\
History, HistoryStep, call_bedtools, _flatten_list, \
_check_sequence_stderr, isBAM, BEDToolsError
History, HistoryStep, _flatten_list, _prog_names, call_bedtools, \
_check_sequence_stderr, isBAM, BEDToolsError, _cleanup_process
from cbedtools import IntervalFile, IntervalIterator
import pybedtools

Expand Down Expand Up @@ -172,9 +172,10 @@ def wrapped(self, *args, **kwargs):
cmds, tmp, stdin = self.handle_kwargs(prog=prog, **kwargs)

# Do the actual call
stream = call_bedtools(cmds, tmp, stdin=stdin,
process, stream = call_bedtools(cmds, tmp, stdin=stdin,
check_stderr=check_stderr)
result = BedTool(stream)
result.process = process

# Post-hoc editing of the BedTool -- for example, this is used for
# the sequence methods to add a `seqfn` attribute to the resulting
Expand Down Expand Up @@ -307,6 +308,7 @@ def __init__(self, fn, from_string=False):
self._hascounts = False
self._file_type = None
self.history = History()
self.process = None

if self._isbam and isinstance(self.fn, basestring):
self._bam_header = ''.join(BAM(self.fn, header_only=True))
Expand Down Expand Up @@ -1610,7 +1612,8 @@ def randomstats(self, other, iterations, **kwargs):
return d

def randomintersection(self, other, iterations, intersect_kwargs=None,
shuffle_kwargs=None, debug=False):
shuffle_kwargs=None, debug=False,
report_iterations=False):
"""
Performs *iterations* shufflings of self, each time intersecting with
*other*.
Expand Down Expand Up @@ -1653,12 +1656,15 @@ def randomintersection(self, other, iterations, intersect_kwargs=None,
for i in range(iterations):
if debug:
shuffle_kwargs['seed'] = i
tmp = self.shuffle(**shuffle_kwargs)
tmp2 = tmp.intersect(other, **intersect_kwargs)
if report_iterations:
sys.stderr.write('\r%s' % i)
sys.stderr.flush()
tmp = self.shuffle(stream=True, **shuffle_kwargs)
tmp2 = tmp.intersect(other, stream=True, **intersect_kwargs)

yield len(tmp2)
os.unlink(tmp.fn)
os.unlink(tmp2.fn)
_cleanup_process(tmp.process)
_cleanup_process(tmp2.process)
del(tmp)
del(tmp2)

Expand Down
21 changes: 20 additions & 1 deletion pybedtools/helpers.py
Expand Up @@ -301,7 +301,7 @@ def call_bedtools(cmds, tmpfn=None, stdin=None, check_stderr=None):
print '\n\t' + '\n\t'.join(problems[err.errno])
raise OSError('See above for commands that gave the error')

return output
return p, output


def set_bedtools_path(path=""):
Expand Down Expand Up @@ -345,4 +345,23 @@ def _check_sequence_stderr(x):
return True
return False


def _cleanup_process(process):
"""
Makes sure a process spawned by subprocess.Popen is good and dead, and
doesn't leave any filehandles open.
"""
if process.stdin:
process.stdin.close()

if process.stdout:
process.stdout.close()

if process.stderr:
process.stderr.close()

process.kill()


atexit.register(cleanup)

1 comment on commit 703f7e2

@daler
Copy link
Owner Author

@daler daler commented on 703f7e2 Sep 10, 2011

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This fixes #38. However:

In Python 2.6, because of http://bugs.python.org/issue5099, the subprocess module does not behave well when a reference is kept to the process. Specifically, every time a BedTool is destroyed in Python 2.6, you get this error:

Exception AttributeError: "'NoneType' object has no attribute 'error'" in <bound method Popen.__del__ of <subprocess.Popen object at 0xec0108c>> ignored

This seems to be happening because I'm keeping a reference to the process in BedTool.process so it can be cleaned up when needed in BedTool.randomintersection(). This error does not appear in Python 2.7, and it also does not appear if a reference to the process is not stored outside of the helpers.call_bedtools() function.

Until I figure out a way to solve this in Python 2.6, I'm going to keep as a separate branch for now.

Please sign in to comment.