Skip to content

Commit

Permalink
Fix a memory leak in HTTPServer with very large file uploads.
Browse files Browse the repository at this point in the history
This is not quite a true leak since the garbage collector will
reclaim everything when it runs, but it might as well be a leak
because CPython's garbage collector uses heuristics based on the
number of allocated objects to decide when to run.  Since this
scenario resulted in a small number of very large objects, the process
could consume a large amount of memory.

The actual change is to ensure that HTTPConnection always sets a
close_callback on the IOStream instead of waiting for the Application
to set one.  I also nulled out a few more references to break up cycles.

Closes #740.
Closes #747.
Closes #760.
  • Loading branch information
bdarnell committed Apr 29, 2013
1 parent 8fdd57d commit 12780c1
Showing 1 changed file with 10 additions and 15 deletions.
25 changes: 10 additions & 15 deletions tornado/httpserver.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,23 +180,23 @@ def __init__(self, stream, address, request_callback, no_keep_alive=False,
self.no_keep_alive = no_keep_alive
self.xheaders = xheaders
self.protocol = protocol
self._request = None
self._request_finished = False
self._write_callback = None
self._close_callback = None
self._clear_request_state()
# Save stack context here, outside of any request. This keeps
# contexts from one request from leaking into the next.
self._header_callback = stack_context.wrap(self._on_headers)
self.stream.set_close_callback(self._on_connection_close)
self.stream.read_until(b"\r\n\r\n", self._header_callback)

def _clear_callbacks(self):
"""Clears the per-request callbacks.
def _clear_request_state(self):
"""Clears the per-request state.
This is run in between requests to allow the previous handler
to be garbage collected (and prevent spurious close callbacks),
and when the connection is closed (to break up cycles and
facilitate garbage collection in cpython).
"""
self._request = None
self._request_finished = False
self._write_callback = None
self._close_callback = None

Expand All @@ -209,7 +209,6 @@ def set_close_callback(self, callback):
recommended approach prior to Tornado 3.0).
"""
self._close_callback = stack_context.wrap(callback)
self.stream.set_close_callback(self._on_connection_close)

def _on_connection_close(self):
if self._close_callback is not None:
Expand All @@ -218,25 +217,23 @@ def _on_connection_close(self):
callback()
# Delete any unfinished callbacks to break up reference cycles.
self._header_callback = None
self._clear_callbacks()
self._clear_request_state()

def close(self):
self.stream.close()
# Remove this reference to self, which would otherwise cause a
# cycle and delay garbage collection of this connection.
self._header_callback = None
self._clear_callbacks()
self._clear_request_state()

def write(self, chunk, callback=None):
"""Writes a chunk of output to the stream."""
assert self._request, "Request closed"
if not self.stream.closed():
self._write_callback = stack_context.wrap(callback)
self.stream.write(chunk, self._on_write_complete)

def finish(self):
"""Finishes the request."""
assert self._request, "Request closed"
self._request_finished = True
if not self.stream.writing():
self._finish_request()
Expand All @@ -257,7 +254,7 @@ def _on_write_complete(self):
self._finish_request()

def _finish_request(self):
if self.no_keep_alive:
if self.no_keep_alive or self._request is None:
disconnect = True
else:
connection_header = self._request.headers.get("Connection")
Expand All @@ -270,9 +267,7 @@ def _finish_request(self):
disconnect = connection_header != "keep-alive"
else:
disconnect = True
self._request = None
self._request_finished = False
self._clear_callbacks()
self._clear_request_state()
if disconnect:
self.close()
return
Expand Down

2 comments on commit 12780c1

@twinsant
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about manual call gc.collect?

@bdarnell
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

gc.collect() is expensive; it's a bad idea to call it in normal circumstances.

Please sign in to comment.