Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Fixed #9886 -- Added a file-like interface to HttpRequest. Thanks to …

…Ivan Sagalaev for the suggestion and patch.

git-svn-id: http://code.djangoproject.com/svn/django/trunk@14394 bcc190cf-cafb-0310-a4f2-bffc1f526a37
  • Loading branch information...
commit 269e921756371bee6d35a967bc2ffe84d1ae39eb 1 parent 3086b55
@freakboy3742 freakboy3742 authored
View
30 django/core/handlers/modpython.py
@@ -42,6 +42,8 @@ def __init__(self, req):
# naughty, but also pretty harmless.
self.path_info = u'/'
self._post_parse_error = False
+ self._stream = self._req
+ self._read_started = False
def __repr__(self):
# Since this is called as part of error handling, we need to be very
@@ -81,26 +83,6 @@ def is_secure(self):
# mod_python < 3.2.10 doesn't have req.is_https().
return self._req.subprocess_env.get('HTTPS', '').lower() in ('on', '1')
- def _load_post_and_files(self):
- "Populates self._post and self._files"
- if self.method != 'POST':
- self._post, self._files = http.QueryDict('', encoding=self._encoding), datastructures.MultiValueDict()
- return
-
- if 'content-type' in self._req.headers_in and self._req.headers_in['content-type'].startswith('multipart'):
- self._raw_post_data = ''
- try:
- self._post, self._files = self.parse_file_upload(self.META, self._req)
- except:
- # See django.core.handlers.wsgi.WSGIHandler for an explanation
- # of what's going on here.
- self._post = http.QueryDict('')
- self._files = datastructures.MultiValueDict()
- self._post_parse_error = True
- raise
- else:
- self._post, self._files = http.QueryDict(self.raw_post_data, encoding=self._encoding), datastructures.MultiValueDict()
-
def _get_request(self):
if not hasattr(self, '_request'):
self._request = datastructures.MergeDict(self.POST, self.GET)
@@ -162,13 +144,6 @@ def _get_meta(self):
self._meta[key] = value
return self._meta
- def _get_raw_post_data(self):
- try:
- return self._raw_post_data
- except AttributeError:
- self._raw_post_data = self._req.read()
- return self._raw_post_data
-
def _get_method(self):
return self.META['REQUEST_METHOD'].upper()
@@ -178,7 +153,6 @@ def _get_method(self):
FILES = property(_get_files)
META = property(_get_meta)
REQUEST = property(_get_request)
- raw_post_data = property(_get_raw_post_data)
method = property(_get_method)
class ModPythonHandler(BaseHandler):
View
127 django/core/handlers/wsgi.py
@@ -5,6 +5,7 @@
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
+import socket
from django import http
from django.core import signals
@@ -62,20 +63,55 @@
505: 'HTTP VERSION NOT SUPPORTED',
}
-def safe_copyfileobj(fsrc, fdst, length=16*1024, size=0):
- """
- A version of shutil.copyfileobj that will not read more than 'size' bytes.
- This makes it safe from clients sending more than CONTENT_LENGTH bytes of
- data in the body.
- """
- if not size:
- return
- while size > 0:
- buf = fsrc.read(min(length, size))
- if not buf:
- break
- fdst.write(buf)
- size -= len(buf)
+class LimitedStream(object):
+ '''
+ LimitedStream wraps another stream in order to not allow reading from it
+ past specified amount of bytes.
+ '''
+ def __init__(self, stream, limit, buf_size=64 * 1024 * 1024):
+ self.stream = stream
+ self.remaining = limit
+ self.buffer = ''
+ self.buf_size = buf_size
+
+ def _read_limited(self, size=None):
+ if size is None or size > self.remaining:
+ size = self.remaining
+ if size == 0:
+ return ''
+ result = self.stream.read(size)
+ self.remaining -= len(result)
+ return result
+
+ def read(self, size=None):
+ if size is None:
+ result = self.buffer + self._read_limited()
+ self.buffer = ''
+ elif size < len(self.buffer):
+ result = self.buffer[:size]
+ self.buffer = self.buffer[size:]
+ else: # size >= len(self.buffer)
+ result = self.buffer + self._read_limited(size - len(self.buffer))
+ self.buffer = ''
+ return result
+
+ def readline(self, size=None):
+ while '\n' not in self.buffer or \
+ (size is not None and len(self.buffer) < size):
+ if size:
+ chunk = self._read_limited(size - len(self.buffer))
+ else:
+ chunk = self._read_limited()
+ if not chunk:
+ break
+ self.buffer += chunk
+ sio = StringIO(self.buffer)
+ if size:
+ line = sio.readline(size)
+ else:
+ line = sio.readline()
+ self.buffer = sio.read()
+ return line
class WSGIRequest(http.HttpRequest):
def __init__(self, environ):
@@ -98,6 +134,24 @@ def __init__(self, environ):
self.META['SCRIPT_NAME'] = script_name
self.method = environ['REQUEST_METHOD'].upper()
self._post_parse_error = False
+ if isinstance(self.environ['wsgi.input'], socket._fileobject):
+ # Under development server 'wsgi.input' is an instance of
+ # socket._fileobject which hangs indefinitely on reading bytes past
+ # available count. To prevent this it's wrapped in LimitedStream
+ # that doesn't read past Content-Length bytes.
+ #
+ # This is not done for other kinds of inputs (like flup's FastCGI
+ # streams) beacuse they don't suffer from this problem and we can
+ # avoid using another wrapper with its own .read and .readline
+ # implementation.
+ try:
+ content_length = int(self.environ.get('CONTENT_LENGTH', 0))
+ except (ValueError, TypeError):
+ content_length = 0
+ self._stream = LimitedStream(self.environ['wsgi.input'], content_length)
+ else:
+ self._stream = self.environ['wsgi.input']
+ self._read_started = False
def __repr__(self):
# Since this is called as part of error handling, we need to be very
@@ -133,30 +187,6 @@ def is_secure(self):
return 'wsgi.url_scheme' in self.environ \
and self.environ['wsgi.url_scheme'] == 'https'
- def _load_post_and_files(self):
- # Populates self._post and self._files
- if self.method == 'POST':
- if self.environ.get('CONTENT_TYPE', '').startswith('multipart'):
- self._raw_post_data = ''
- try:
- self._post, self._files = self.parse_file_upload(self.META, self.environ['wsgi.input'])
- except:
- # An error occured while parsing POST data. Since when
- # formatting the error the request handler might access
- # self.POST, set self._post and self._file to prevent
- # attempts to parse POST data again.
- self._post = http.QueryDict('')
- self._files = datastructures.MultiValueDict()
- # Mark that an error occured. This allows self.__repr__ to
- # be explicit about it instead of simply representing an
- # empty POST
- self._post_parse_error = True
- raise
- else:
- self._post, self._files = http.QueryDict(self.raw_post_data, encoding=self._encoding), datastructures.MultiValueDict()
- else:
- self._post, self._files = http.QueryDict('', encoding=self._encoding), datastructures.MultiValueDict()
-
def _get_request(self):
if not hasattr(self, '_request'):
self._request = datastructures.MergeDict(self.POST, self.GET)
@@ -192,32 +222,11 @@ def _get_files(self):
self._load_post_and_files()
return self._files
- def _get_raw_post_data(self):
- try:
- return self._raw_post_data
- except AttributeError:
- buf = StringIO()
- try:
- # CONTENT_LENGTH might be absent if POST doesn't have content at all (lighttpd)
- content_length = int(self.environ.get('CONTENT_LENGTH', 0))
- except (ValueError, TypeError):
- # If CONTENT_LENGTH was empty string or not an integer, don't
- # error out. We've also seen None passed in here (against all
- # specs, but see ticket #8259), so we handle TypeError as well.
- content_length = 0
- if content_length > 0:
- safe_copyfileobj(self.environ['wsgi.input'], buf,
- size=content_length)
- self._raw_post_data = buf.getvalue()
- buf.close()
- return self._raw_post_data
-
GET = property(_get_get, _set_get)
POST = property(_get_post, _set_post)
COOKIES = property(_get_cookies, _set_cookies)
FILES = property(_get_files)
REQUEST = property(_get_request)
- raw_post_data = property(_get_raw_post_data)
class WSGIHandler(base.BaseHandler):
initLock = Lock()
View
77 django/http/__init__.py
@@ -7,6 +7,10 @@
from urllib import urlencode
from urlparse import urljoin
try:
+ from cStringIO import StringIO
+except ImportError:
+ from StringIO import StringIO
+try:
# The mod_python version is more efficient, so try importing it first.
from mod_python.util import parse_qsl
except ImportError:
@@ -132,6 +136,73 @@ def parse_file_upload(self, META, post_data):
parser = MultiPartParser(META, post_data, self.upload_handlers, self.encoding)
return parser.parse()
+ def _get_raw_post_data(self):
+ if not hasattr(self, '_raw_post_data'):
+ if self._read_started:
+ raise Exception("You cannot access raw_post_data after reading from request's data stream")
+ self._raw_post_data = self.read()
+ self._stream = StringIO(self._raw_post_data)
+ return self._raw_post_data
+ raw_post_data = property(_get_raw_post_data)
+
+ def _mark_post_parse_error(self):
+ self._post = QueryDict('')
+ self._files = MultiValueDict()
+ self._post_parse_error = True
+
+ def _load_post_and_files(self):
+ # Populates self._post and self._files
+ if self.method != 'POST':
+ self._post, self._files = QueryDict('', encoding=self._encoding), MultiValueDict()
+ return
+ if self._read_started:
+ self._mark_post_parse_error()
+ return
+
+ if self.META.get('CONTENT_TYPE', '').startswith('multipart'):
+ self._raw_post_data = ''
+ try:
+ self._post, self._files = self.parse_file_upload(self.META, self)
+ except:
+ # An error occured while parsing POST data. Since when
+ # formatting the error the request handler might access
+ # self.POST, set self._post and self._file to prevent
+ # attempts to parse POST data again.
+ # Mark that an error occured. This allows self.__repr__ to
+ # be explicit about it instead of simply representing an
+ # empty POST
+ self._mark_post_parse_error()
+ raise
+ else:
+ self._post, self._files = QueryDict(self.raw_post_data, encoding=self._encoding), MultiValueDict()
+
+ ## File-like and iterator interface.
+ ##
+ ## Expects self._stream to be set to an appropriate source of bytes by
+ ## a corresponding request subclass (WSGIRequest or ModPythonRequest).
+ ## Also when request data has already been read by request.POST or
+ ## request.raw_post_data, self._stream points to a StringIO instance
+ ## containing that data.
+
+ def read(self, *args, **kwargs):
+ self._read_started = True
+ return self._stream.read(*args, **kwargs)
+
+ def readline(self, *args, **kwargs):
+ self._read_started = True
+ return self._stream.readline(*args, **kwargs)
+
+ def xreadlines(self):
+ while True:
+ buf = self.readline()
+ if not buf:
+ break
+ yield buf
+ __iter__ = xreadlines
+
+ def readlines(self):
+ return list(iter(self))
+
class QueryDict(MultiValueDict):
"""
A specialized MultiValueDict that takes a query string when initialized.
@@ -198,7 +269,7 @@ def __deepcopy__(self, memo):
for key, value in dict.items(self):
dict.__setitem__(result, copy.deepcopy(key, memo), copy.deepcopy(value, memo))
return result
-
+
def setlist(self, key, list_):
self._assert_mutable()
key = str_to_unicode(key, self.encoding)
@@ -385,7 +456,7 @@ def set_cookie(self, key, value='', max_age=None, expires=None, path='/',
"""
Sets a cookie.
- ``expires`` can be a string in the correct format or a
+ ``expires`` can be a string in the correct format or a
``datetime.datetime`` object in UTC. If ``expires`` is a datetime
object then ``max_age`` will be calculated.
"""
@@ -407,7 +478,7 @@ def set_cookie(self, key, value='', max_age=None, expires=None, path='/',
# IE requires expires, so set it if hasn't been already.
if not expires:
self.cookies[key]['expires'] = cookie_date(time.time() +
- max_age)
+ max_age)
if path is not None:
self.cookies[key]['path'] = path
if domain is not None:
View
31 docs/ref/request-response.txt
@@ -189,8 +189,14 @@ All attributes except ``session`` should be considered read-only.
.. attribute:: HttpRequest.raw_post_data
- The raw HTTP POST data. This is only useful for advanced processing. Use
- ``POST`` instead.
+ The raw HTTP POST data as a byte string. This is useful for processing
+ data in different formats than of conventional HTML forms: binary images,
+ XML payload etc. For processing form data use ``HttpRequest.POST``.
+
+ .. versionadded:: 1.3
+
+ You can also read from an HttpRequest using file-like interface. See
+ :meth:`HttpRequest.read()`.
.. attribute:: HttpRequest.urlconf
@@ -249,6 +255,27 @@ Methods
If you write your own XMLHttpRequest call (on the browser side), you'll
have to set this header manually if you want ``is_ajax()`` to work.
+.. method:: HttpRequest.read(size=None)
+.. method:: HttpRequest.readline()
+.. method:: HttpRequest.readlines()
+.. method:: HttpRequest.xreadlines()
+.. method:: HttpRequest.__iter__()
+
+ .. versionadded:: 1.3
+
+ Methods implementing a file-like interface for reading from an
+ HttpRequest instance. This makes it possible to consume an incoming
+ request in a streaming fashion. A common use-case would be to process a
+ big XML payload with iterative parser without constructing a whole
+ XML tree in memory.
+
+ Given this standard interface, an HttpRequest instance can be
+ passed directly to an XML parser such as ElementTree::
+
+ import xml.etree.ElementTree as ET
+ for element in ET.iterparse(request):
+ process(element)
+
QueryDict objects
-----------------
View
66 tests/regressiontests/requests/tests.py
@@ -1,9 +1,10 @@
from datetime import datetime, timedelta
import time
+from StringIO import StringIO
import unittest
from django.http import HttpRequest, HttpResponse, parse_cookie
-from django.core.handlers.wsgi import WSGIRequest
+from django.core.handlers.wsgi import WSGIRequest, LimitedStream
from django.core.handlers.modpython import ModPythonRequest
from django.utils.http import cookie_date
@@ -17,11 +18,11 @@ def test_httprequest(self):
self.assertEqual(request.META.keys(), [])
def test_wsgirequest(self):
- request = WSGIRequest({'PATH_INFO': 'bogus', 'REQUEST_METHOD': 'bogus'})
+ request = WSGIRequest({'PATH_INFO': 'bogus', 'REQUEST_METHOD': 'bogus', 'wsgi.input': StringIO('')})
self.assertEqual(request.GET.keys(), [])
self.assertEqual(request.POST.keys(), [])
self.assertEqual(request.COOKIES.keys(), [])
- self.assertEqual(set(request.META.keys()), set(['PATH_INFO', 'REQUEST_METHOD', 'SCRIPT_NAME']))
+ self.assertEqual(set(request.META.keys()), set(['PATH_INFO', 'REQUEST_METHOD', 'SCRIPT_NAME', 'wsgi.input']))
self.assertEqual(request.META['PATH_INFO'], 'bogus')
self.assertEqual(request.META['REQUEST_METHOD'], 'bogus')
self.assertEqual(request.META['SCRIPT_NAME'], '')
@@ -88,3 +89,62 @@ def test_max_age_expiration(self):
max_age_cookie = response.cookies['max_age']
self.assertEqual(max_age_cookie['max-age'], 10)
self.assertEqual(max_age_cookie['expires'], cookie_date(time.time()+10))
+
+ def test_limited_stream(self):
+ # Read all of a limited stream
+ stream = LimitedStream(StringIO('test'), 2)
+ self.assertEqual(stream.read(), 'te')
+
+ # Read a number of characters greater than the stream has to offer
+ stream = LimitedStream(StringIO('test'), 2)
+ self.assertEqual(stream.read(5), 'te')
+
+ # Read sequentially from a stream
+ stream = LimitedStream(StringIO('12345678'), 8)
+ self.assertEqual(stream.read(5), '12345')
+ self.assertEqual(stream.read(5), '678')
+
+ # Read lines from a stream
+ stream = LimitedStream(StringIO('1234\n5678\nabcd\nefgh\nijkl'), 24)
+ # Read a full line, unconditionally
+ self.assertEqual(stream.readline(), '1234\n')
+ # Read a number of characters less than a line
+ self.assertEqual(stream.readline(2), '56')
+ # Read the rest of the partial line
+ self.assertEqual(stream.readline(), '78\n')
+ # Read a full line, with a character limit greater than the line length
+ self.assertEqual(stream.readline(6), 'abcd\n')
+ # Read the next line, deliberately terminated at the line end
+ self.assertEqual(stream.readline(4), 'efgh')
+ # Read the next line... just the line end
+ self.assertEqual(stream.readline(), '\n')
+ # Read everything else.
+ self.assertEqual(stream.readline(), 'ijkl')
+
+ def test_stream(self):
+ request = WSGIRequest({'REQUEST_METHOD': 'POST', 'wsgi.input': StringIO('name=value')})
+ self.assertEqual(request.read(), 'name=value')
+
+ def test_read_after_value(self):
+ """
+ Reading from request is allowed after accessing request contents as
+ POST or raw_post_data.
+ """
+ request = WSGIRequest({'REQUEST_METHOD': 'POST', 'wsgi.input': StringIO('name=value')})
+ self.assertEqual(request.POST, {u'name': [u'value']})
+ self.assertEqual(request.raw_post_data, 'name=value')
+ self.assertEqual(request.read(), 'name=value')
+
+ def test_value_after_read(self):
+ """
+ Construction of POST or raw_post_data is not allowed after reading
+ from request.
+ """
+ request = WSGIRequest({'REQUEST_METHOD': 'POST', 'wsgi.input': StringIO('name=value')})
+ self.assertEqual(request.read(2), 'na')
+ self.assertRaises(Exception, lambda: request.raw_post_data)
+ self.assertEqual(request.POST, {})
+
+ def test_read_by_lines(self):
+ request = WSGIRequest({'REQUEST_METHOD': 'POST', 'wsgi.input': StringIO('name=value')})
+ self.assertEqual(list(request), ['name=value'])
Please sign in to comment.
Something went wrong with that request. Please try again.