Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Fixed #9886 -- Added a file-like interface to HttpRequest. Thanks to …
…Ivan Sagalaev for the suggestion and patch.

git-svn-id: http://code.djangoproject.com/svn/django/trunk@14394 bcc190cf-cafb-0310-a4f2-bffc1f526a37
  • Loading branch information
freakboy3742 committed Oct 29, 2010
1 parent 3086b55 commit 269e921
Show file tree
Hide file tree
Showing 5 changed files with 236 additions and 95 deletions.
30 changes: 2 additions & 28 deletions django/core/handlers/modpython.py
Expand Up @@ -42,6 +42,8 @@ def __init__(self, req):
# naughty, but also pretty harmless.
self.path_info = u'/'
self._post_parse_error = False
self._stream = self._req
self._read_started = False

def __repr__(self):
# Since this is called as part of error handling, we need to be very
Expand Down Expand Up @@ -81,26 +83,6 @@ def is_secure(self):
# mod_python < 3.2.10 doesn't have req.is_https().
return self._req.subprocess_env.get('HTTPS', '').lower() in ('on', '1')

def _load_post_and_files(self):
"Populates self._post and self._files"
if self.method != 'POST':
self._post, self._files = http.QueryDict('', encoding=self._encoding), datastructures.MultiValueDict()
return

if 'content-type' in self._req.headers_in and self._req.headers_in['content-type'].startswith('multipart'):
self._raw_post_data = ''
try:
self._post, self._files = self.parse_file_upload(self.META, self._req)
except:
# See django.core.handlers.wsgi.WSGIHandler for an explanation
# of what's going on here.
self._post = http.QueryDict('')
self._files = datastructures.MultiValueDict()
self._post_parse_error = True
raise
else:
self._post, self._files = http.QueryDict(self.raw_post_data, encoding=self._encoding), datastructures.MultiValueDict()

def _get_request(self):
if not hasattr(self, '_request'):
self._request = datastructures.MergeDict(self.POST, self.GET)
Expand Down Expand Up @@ -162,13 +144,6 @@ def _get_meta(self):
self._meta[key] = value
return self._meta

def _get_raw_post_data(self):
try:
return self._raw_post_data
except AttributeError:
self._raw_post_data = self._req.read()
return self._raw_post_data

def _get_method(self):
return self.META['REQUEST_METHOD'].upper()

Expand All @@ -178,7 +153,6 @@ def _get_method(self):
FILES = property(_get_files)
META = property(_get_meta)
REQUEST = property(_get_request)
raw_post_data = property(_get_raw_post_data)
method = property(_get_method)

class ModPythonHandler(BaseHandler):
Expand Down
127 changes: 68 additions & 59 deletions django/core/handlers/wsgi.py
Expand Up @@ -5,6 +5,7 @@
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
import socket

from django import http
from django.core import signals
Expand Down Expand Up @@ -62,20 +63,55 @@
505: 'HTTP VERSION NOT SUPPORTED',
}

def safe_copyfileobj(fsrc, fdst, length=16*1024, size=0):
"""
A version of shutil.copyfileobj that will not read more than 'size' bytes.
This makes it safe from clients sending more than CONTENT_LENGTH bytes of
data in the body.
"""
if not size:
return
while size > 0:
buf = fsrc.read(min(length, size))
if not buf:
break
fdst.write(buf)
size -= len(buf)
class LimitedStream(object):
'''
LimitedStream wraps another stream in order to not allow reading from it
past specified amount of bytes.
'''
def __init__(self, stream, limit, buf_size=64 * 1024 * 1024):
self.stream = stream
self.remaining = limit
self.buffer = ''
self.buf_size = buf_size

def _read_limited(self, size=None):
if size is None or size > self.remaining:
size = self.remaining
if size == 0:
return ''
result = self.stream.read(size)
self.remaining -= len(result)
return result

def read(self, size=None):
if size is None:
result = self.buffer + self._read_limited()
self.buffer = ''
elif size < len(self.buffer):
result = self.buffer[:size]
self.buffer = self.buffer[size:]
else: # size >= len(self.buffer)
result = self.buffer + self._read_limited(size - len(self.buffer))
self.buffer = ''
return result

def readline(self, size=None):
while '\n' not in self.buffer or \
(size is not None and len(self.buffer) < size):
if size:
chunk = self._read_limited(size - len(self.buffer))
else:
chunk = self._read_limited()
if not chunk:
break
self.buffer += chunk
sio = StringIO(self.buffer)
if size:
line = sio.readline(size)
else:
line = sio.readline()
self.buffer = sio.read()
return line

class WSGIRequest(http.HttpRequest):
def __init__(self, environ):
Expand All @@ -98,6 +134,24 @@ def __init__(self, environ):
self.META['SCRIPT_NAME'] = script_name
self.method = environ['REQUEST_METHOD'].upper()
self._post_parse_error = False
if isinstance(self.environ['wsgi.input'], socket._fileobject):
# Under development server 'wsgi.input' is an instance of
# socket._fileobject which hangs indefinitely on reading bytes past
# available count. To prevent this it's wrapped in LimitedStream
# that doesn't read past Content-Length bytes.
#
# This is not done for other kinds of inputs (like flup's FastCGI
# streams) beacuse they don't suffer from this problem and we can
# avoid using another wrapper with its own .read and .readline
# implementation.
try:
content_length = int(self.environ.get('CONTENT_LENGTH', 0))
except (ValueError, TypeError):
content_length = 0
self._stream = LimitedStream(self.environ['wsgi.input'], content_length)
else:
self._stream = self.environ['wsgi.input']
self._read_started = False

def __repr__(self):
# Since this is called as part of error handling, we need to be very
Expand Down Expand Up @@ -133,30 +187,6 @@ def is_secure(self):
return 'wsgi.url_scheme' in self.environ \
and self.environ['wsgi.url_scheme'] == 'https'

def _load_post_and_files(self):
# Populates self._post and self._files
if self.method == 'POST':
if self.environ.get('CONTENT_TYPE', '').startswith('multipart'):
self._raw_post_data = ''
try:
self._post, self._files = self.parse_file_upload(self.META, self.environ['wsgi.input'])
except:
# An error occured while parsing POST data. Since when
# formatting the error the request handler might access
# self.POST, set self._post and self._file to prevent
# attempts to parse POST data again.
self._post = http.QueryDict('')
self._files = datastructures.MultiValueDict()
# Mark that an error occured. This allows self.__repr__ to
# be explicit about it instead of simply representing an
# empty POST
self._post_parse_error = True
raise
else:
self._post, self._files = http.QueryDict(self.raw_post_data, encoding=self._encoding), datastructures.MultiValueDict()
else:
self._post, self._files = http.QueryDict('', encoding=self._encoding), datastructures.MultiValueDict()

def _get_request(self):
if not hasattr(self, '_request'):
self._request = datastructures.MergeDict(self.POST, self.GET)
Expand Down Expand Up @@ -192,32 +222,11 @@ def _get_files(self):
self._load_post_and_files()
return self._files

def _get_raw_post_data(self):
try:
return self._raw_post_data
except AttributeError:
buf = StringIO()
try:
# CONTENT_LENGTH might be absent if POST doesn't have content at all (lighttpd)
content_length = int(self.environ.get('CONTENT_LENGTH', 0))
except (ValueError, TypeError):
# If CONTENT_LENGTH was empty string or not an integer, don't
# error out. We've also seen None passed in here (against all
# specs, but see ticket #8259), so we handle TypeError as well.
content_length = 0
if content_length > 0:
safe_copyfileobj(self.environ['wsgi.input'], buf,
size=content_length)
self._raw_post_data = buf.getvalue()
buf.close()
return self._raw_post_data

GET = property(_get_get, _set_get)
POST = property(_get_post, _set_post)
COOKIES = property(_get_cookies, _set_cookies)
FILES = property(_get_files)
REQUEST = property(_get_request)
raw_post_data = property(_get_raw_post_data)

class WSGIHandler(base.BaseHandler):
initLock = Lock()
Expand Down
77 changes: 74 additions & 3 deletions django/http/__init__.py
Expand Up @@ -6,6 +6,10 @@
from pprint import pformat
from urllib import urlencode
from urlparse import urljoin
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
try:
# The mod_python version is more efficient, so try importing it first.
from mod_python.util import parse_qsl
Expand Down Expand Up @@ -132,6 +136,73 @@ def parse_file_upload(self, META, post_data):
parser = MultiPartParser(META, post_data, self.upload_handlers, self.encoding)
return parser.parse()

def _get_raw_post_data(self):
if not hasattr(self, '_raw_post_data'):
if self._read_started:
raise Exception("You cannot access raw_post_data after reading from request's data stream")
self._raw_post_data = self.read()
self._stream = StringIO(self._raw_post_data)
return self._raw_post_data
raw_post_data = property(_get_raw_post_data)

def _mark_post_parse_error(self):
self._post = QueryDict('')
self._files = MultiValueDict()
self._post_parse_error = True

def _load_post_and_files(self):
# Populates self._post and self._files
if self.method != 'POST':
self._post, self._files = QueryDict('', encoding=self._encoding), MultiValueDict()
return
if self._read_started:
self._mark_post_parse_error()
return

if self.META.get('CONTENT_TYPE', '').startswith('multipart'):
self._raw_post_data = ''
try:
self._post, self._files = self.parse_file_upload(self.META, self)
except:
# An error occured while parsing POST data. Since when
# formatting the error the request handler might access
# self.POST, set self._post and self._file to prevent
# attempts to parse POST data again.
# Mark that an error occured. This allows self.__repr__ to
# be explicit about it instead of simply representing an
# empty POST
self._mark_post_parse_error()
raise
else:
self._post, self._files = QueryDict(self.raw_post_data, encoding=self._encoding), MultiValueDict()

## File-like and iterator interface.
##
## Expects self._stream to be set to an appropriate source of bytes by
## a corresponding request subclass (WSGIRequest or ModPythonRequest).
## Also when request data has already been read by request.POST or
## request.raw_post_data, self._stream points to a StringIO instance
## containing that data.

def read(self, *args, **kwargs):
self._read_started = True
return self._stream.read(*args, **kwargs)

def readline(self, *args, **kwargs):
self._read_started = True
return self._stream.readline(*args, **kwargs)

def xreadlines(self):
while True:
buf = self.readline()
if not buf:
break
yield buf
__iter__ = xreadlines

def readlines(self):
return list(iter(self))

class QueryDict(MultiValueDict):
"""
A specialized MultiValueDict that takes a query string when initialized.
Expand Down Expand Up @@ -198,7 +269,7 @@ def __deepcopy__(self, memo):
for key, value in dict.items(self):
dict.__setitem__(result, copy.deepcopy(key, memo), copy.deepcopy(value, memo))
return result

def setlist(self, key, list_):
self._assert_mutable()
key = str_to_unicode(key, self.encoding)
Expand Down Expand Up @@ -385,7 +456,7 @@ def set_cookie(self, key, value='', max_age=None, expires=None, path='/',
"""
Sets a cookie.
``expires`` can be a string in the correct format or a
``expires`` can be a string in the correct format or a
``datetime.datetime`` object in UTC. If ``expires`` is a datetime
object then ``max_age`` will be calculated.
"""
Expand All @@ -407,7 +478,7 @@ def set_cookie(self, key, value='', max_age=None, expires=None, path='/',
# IE requires expires, so set it if hasn't been already.
if not expires:
self.cookies[key]['expires'] = cookie_date(time.time() +
max_age)
max_age)
if path is not None:
self.cookies[key]['path'] = path
if domain is not None:
Expand Down
31 changes: 29 additions & 2 deletions docs/ref/request-response.txt
Expand Up @@ -189,8 +189,14 @@ All attributes except ``session`` should be considered read-only.

.. attribute:: HttpRequest.raw_post_data

The raw HTTP POST data. This is only useful for advanced processing. Use
``POST`` instead.
The raw HTTP POST data as a byte string. This is useful for processing
data in different formats than of conventional HTML forms: binary images,
XML payload etc. For processing form data use ``HttpRequest.POST``.

.. versionadded:: 1.3

You can also read from an HttpRequest using file-like interface. See
:meth:`HttpRequest.read()`.

.. attribute:: HttpRequest.urlconf

Expand Down Expand Up @@ -249,6 +255,27 @@ Methods
If you write your own XMLHttpRequest call (on the browser side), you'll
have to set this header manually if you want ``is_ajax()`` to work.

.. method:: HttpRequest.read(size=None)
.. method:: HttpRequest.readline()
.. method:: HttpRequest.readlines()
.. method:: HttpRequest.xreadlines()
.. method:: HttpRequest.__iter__()

.. versionadded:: 1.3

Methods implementing a file-like interface for reading from an
HttpRequest instance. This makes it possible to consume an incoming
request in a streaming fashion. A common use-case would be to process a
big XML payload with iterative parser without constructing a whole
XML tree in memory.

Given this standard interface, an HttpRequest instance can be
passed directly to an XML parser such as ElementTree::

import xml.etree.ElementTree as ET
for element in ET.iterparse(request):
process(element)


QueryDict objects
-----------------
Expand Down

0 comments on commit 269e921

Please sign in to comment.