Skip to content

Commit

Permalink
Fixed #9886 -- Added a file-like interface to HttpRequest. Thanks to …
Browse files Browse the repository at this point in the history
…Ivan Sagalaev for the suggestion and patch.

git-svn-id: http://code.djangoproject.com/svn/django/trunk@14394 bcc190cf-cafb-0310-a4f2-bffc1f526a37
  • Loading branch information
freakboy3742 committed Oct 29, 2010
1 parent 3086b55 commit 269e921
Show file tree
Hide file tree
Showing 5 changed files with 236 additions and 95 deletions.
30 changes: 2 additions & 28 deletions django/core/handlers/modpython.py
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ def __init__(self, req):
# naughty, but also pretty harmless. # naughty, but also pretty harmless.
self.path_info = u'/' self.path_info = u'/'
self._post_parse_error = False self._post_parse_error = False
self._stream = self._req
self._read_started = False


def __repr__(self): def __repr__(self):
# Since this is called as part of error handling, we need to be very # Since this is called as part of error handling, we need to be very
Expand Down Expand Up @@ -81,26 +83,6 @@ def is_secure(self):
# mod_python < 3.2.10 doesn't have req.is_https(). # mod_python < 3.2.10 doesn't have req.is_https().
return self._req.subprocess_env.get('HTTPS', '').lower() in ('on', '1') return self._req.subprocess_env.get('HTTPS', '').lower() in ('on', '1')


def _load_post_and_files(self):
"Populates self._post and self._files"
if self.method != 'POST':
self._post, self._files = http.QueryDict('', encoding=self._encoding), datastructures.MultiValueDict()
return

if 'content-type' in self._req.headers_in and self._req.headers_in['content-type'].startswith('multipart'):
self._raw_post_data = ''
try:
self._post, self._files = self.parse_file_upload(self.META, self._req)
except:
# See django.core.handlers.wsgi.WSGIHandler for an explanation
# of what's going on here.
self._post = http.QueryDict('')
self._files = datastructures.MultiValueDict()
self._post_parse_error = True
raise
else:
self._post, self._files = http.QueryDict(self.raw_post_data, encoding=self._encoding), datastructures.MultiValueDict()

def _get_request(self): def _get_request(self):
if not hasattr(self, '_request'): if not hasattr(self, '_request'):
self._request = datastructures.MergeDict(self.POST, self.GET) self._request = datastructures.MergeDict(self.POST, self.GET)
Expand Down Expand Up @@ -162,13 +144,6 @@ def _get_meta(self):
self._meta[key] = value self._meta[key] = value
return self._meta return self._meta


def _get_raw_post_data(self):
try:
return self._raw_post_data
except AttributeError:
self._raw_post_data = self._req.read()
return self._raw_post_data

def _get_method(self): def _get_method(self):
return self.META['REQUEST_METHOD'].upper() return self.META['REQUEST_METHOD'].upper()


Expand All @@ -178,7 +153,6 @@ def _get_method(self):
FILES = property(_get_files) FILES = property(_get_files)
META = property(_get_meta) META = property(_get_meta)
REQUEST = property(_get_request) REQUEST = property(_get_request)
raw_post_data = property(_get_raw_post_data)
method = property(_get_method) method = property(_get_method)


class ModPythonHandler(BaseHandler): class ModPythonHandler(BaseHandler):
Expand Down
127 changes: 68 additions & 59 deletions django/core/handlers/wsgi.py
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from cStringIO import StringIO from cStringIO import StringIO
except ImportError: except ImportError:
from StringIO import StringIO from StringIO import StringIO
import socket


from django import http from django import http
from django.core import signals from django.core import signals
Expand Down Expand Up @@ -62,20 +63,55 @@
505: 'HTTP VERSION NOT SUPPORTED', 505: 'HTTP VERSION NOT SUPPORTED',
} }


def safe_copyfileobj(fsrc, fdst, length=16*1024, size=0): class LimitedStream(object):
""" '''
A version of shutil.copyfileobj that will not read more than 'size' bytes. LimitedStream wraps another stream in order to not allow reading from it
This makes it safe from clients sending more than CONTENT_LENGTH bytes of past specified amount of bytes.
data in the body. '''
""" def __init__(self, stream, limit, buf_size=64 * 1024 * 1024):
if not size: self.stream = stream
return self.remaining = limit
while size > 0: self.buffer = ''
buf = fsrc.read(min(length, size)) self.buf_size = buf_size
if not buf:
break def _read_limited(self, size=None):
fdst.write(buf) if size is None or size > self.remaining:
size -= len(buf) size = self.remaining
if size == 0:
return ''
result = self.stream.read(size)
self.remaining -= len(result)
return result

def read(self, size=None):
if size is None:
result = self.buffer + self._read_limited()
self.buffer = ''
elif size < len(self.buffer):
result = self.buffer[:size]
self.buffer = self.buffer[size:]
else: # size >= len(self.buffer)
result = self.buffer + self._read_limited(size - len(self.buffer))
self.buffer = ''
return result

def readline(self, size=None):
while '\n' not in self.buffer or \
(size is not None and len(self.buffer) < size):
if size:
chunk = self._read_limited(size - len(self.buffer))
else:
chunk = self._read_limited()
if not chunk:
break
self.buffer += chunk
sio = StringIO(self.buffer)
if size:
line = sio.readline(size)
else:
line = sio.readline()
self.buffer = sio.read()
return line


class WSGIRequest(http.HttpRequest): class WSGIRequest(http.HttpRequest):
def __init__(self, environ): def __init__(self, environ):
Expand All @@ -98,6 +134,24 @@ def __init__(self, environ):
self.META['SCRIPT_NAME'] = script_name self.META['SCRIPT_NAME'] = script_name
self.method = environ['REQUEST_METHOD'].upper() self.method = environ['REQUEST_METHOD'].upper()
self._post_parse_error = False self._post_parse_error = False
if isinstance(self.environ['wsgi.input'], socket._fileobject):
# Under development server 'wsgi.input' is an instance of
# socket._fileobject which hangs indefinitely on reading bytes past
# available count. To prevent this it's wrapped in LimitedStream
# that doesn't read past Content-Length bytes.
#
# This is not done for other kinds of inputs (like flup's FastCGI
# streams) beacuse they don't suffer from this problem and we can
# avoid using another wrapper with its own .read and .readline
# implementation.
try:
content_length = int(self.environ.get('CONTENT_LENGTH', 0))
except (ValueError, TypeError):
content_length = 0
self._stream = LimitedStream(self.environ['wsgi.input'], content_length)
else:
self._stream = self.environ['wsgi.input']
self._read_started = False


def __repr__(self): def __repr__(self):
# Since this is called as part of error handling, we need to be very # Since this is called as part of error handling, we need to be very
Expand Down Expand Up @@ -133,30 +187,6 @@ def is_secure(self):
return 'wsgi.url_scheme' in self.environ \ return 'wsgi.url_scheme' in self.environ \
and self.environ['wsgi.url_scheme'] == 'https' and self.environ['wsgi.url_scheme'] == 'https'


def _load_post_and_files(self):
# Populates self._post and self._files
if self.method == 'POST':
if self.environ.get('CONTENT_TYPE', '').startswith('multipart'):
self._raw_post_data = ''
try:
self._post, self._files = self.parse_file_upload(self.META, self.environ['wsgi.input'])
except:
# An error occured while parsing POST data. Since when
# formatting the error the request handler might access
# self.POST, set self._post and self._file to prevent
# attempts to parse POST data again.
self._post = http.QueryDict('')
self._files = datastructures.MultiValueDict()
# Mark that an error occured. This allows self.__repr__ to
# be explicit about it instead of simply representing an
# empty POST
self._post_parse_error = True
raise
else:
self._post, self._files = http.QueryDict(self.raw_post_data, encoding=self._encoding), datastructures.MultiValueDict()
else:
self._post, self._files = http.QueryDict('', encoding=self._encoding), datastructures.MultiValueDict()

def _get_request(self): def _get_request(self):
if not hasattr(self, '_request'): if not hasattr(self, '_request'):
self._request = datastructures.MergeDict(self.POST, self.GET) self._request = datastructures.MergeDict(self.POST, self.GET)
Expand Down Expand Up @@ -192,32 +222,11 @@ def _get_files(self):
self._load_post_and_files() self._load_post_and_files()
return self._files return self._files


def _get_raw_post_data(self):
try:
return self._raw_post_data
except AttributeError:
buf = StringIO()
try:
# CONTENT_LENGTH might be absent if POST doesn't have content at all (lighttpd)
content_length = int(self.environ.get('CONTENT_LENGTH', 0))
except (ValueError, TypeError):
# If CONTENT_LENGTH was empty string or not an integer, don't
# error out. We've also seen None passed in here (against all
# specs, but see ticket #8259), so we handle TypeError as well.
content_length = 0
if content_length > 0:
safe_copyfileobj(self.environ['wsgi.input'], buf,
size=content_length)
self._raw_post_data = buf.getvalue()
buf.close()
return self._raw_post_data

GET = property(_get_get, _set_get) GET = property(_get_get, _set_get)
POST = property(_get_post, _set_post) POST = property(_get_post, _set_post)
COOKIES = property(_get_cookies, _set_cookies) COOKIES = property(_get_cookies, _set_cookies)
FILES = property(_get_files) FILES = property(_get_files)
REQUEST = property(_get_request) REQUEST = property(_get_request)
raw_post_data = property(_get_raw_post_data)


class WSGIHandler(base.BaseHandler): class WSGIHandler(base.BaseHandler):
initLock = Lock() initLock = Lock()
Expand Down
77 changes: 74 additions & 3 deletions django/http/__init__.py
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
from pprint import pformat from pprint import pformat
from urllib import urlencode from urllib import urlencode
from urlparse import urljoin from urlparse import urljoin
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
try: try:
# The mod_python version is more efficient, so try importing it first. # The mod_python version is more efficient, so try importing it first.
from mod_python.util import parse_qsl from mod_python.util import parse_qsl
Expand Down Expand Up @@ -132,6 +136,73 @@ def parse_file_upload(self, META, post_data):
parser = MultiPartParser(META, post_data, self.upload_handlers, self.encoding) parser = MultiPartParser(META, post_data, self.upload_handlers, self.encoding)
return parser.parse() return parser.parse()


def _get_raw_post_data(self):
if not hasattr(self, '_raw_post_data'):
if self._read_started:
raise Exception("You cannot access raw_post_data after reading from request's data stream")
self._raw_post_data = self.read()
self._stream = StringIO(self._raw_post_data)
return self._raw_post_data
raw_post_data = property(_get_raw_post_data)

def _mark_post_parse_error(self):
self._post = QueryDict('')
self._files = MultiValueDict()
self._post_parse_error = True

def _load_post_and_files(self):
# Populates self._post and self._files
if self.method != 'POST':
self._post, self._files = QueryDict('', encoding=self._encoding), MultiValueDict()
return
if self._read_started:
self._mark_post_parse_error()
return

if self.META.get('CONTENT_TYPE', '').startswith('multipart'):
self._raw_post_data = ''
try:
self._post, self._files = self.parse_file_upload(self.META, self)
except:
# An error occured while parsing POST data. Since when
# formatting the error the request handler might access
# self.POST, set self._post and self._file to prevent
# attempts to parse POST data again.
# Mark that an error occured. This allows self.__repr__ to
# be explicit about it instead of simply representing an
# empty POST
self._mark_post_parse_error()
raise
else:
self._post, self._files = QueryDict(self.raw_post_data, encoding=self._encoding), MultiValueDict()

## File-like and iterator interface.
##
## Expects self._stream to be set to an appropriate source of bytes by
## a corresponding request subclass (WSGIRequest or ModPythonRequest).
## Also when request data has already been read by request.POST or
## request.raw_post_data, self._stream points to a StringIO instance
## containing that data.

def read(self, *args, **kwargs):
self._read_started = True
return self._stream.read(*args, **kwargs)

def readline(self, *args, **kwargs):
self._read_started = True
return self._stream.readline(*args, **kwargs)

def xreadlines(self):
while True:
buf = self.readline()
if not buf:
break
yield buf
__iter__ = xreadlines

def readlines(self):
return list(iter(self))

class QueryDict(MultiValueDict): class QueryDict(MultiValueDict):
""" """
A specialized MultiValueDict that takes a query string when initialized. A specialized MultiValueDict that takes a query string when initialized.
Expand Down Expand Up @@ -198,7 +269,7 @@ def __deepcopy__(self, memo):
for key, value in dict.items(self): for key, value in dict.items(self):
dict.__setitem__(result, copy.deepcopy(key, memo), copy.deepcopy(value, memo)) dict.__setitem__(result, copy.deepcopy(key, memo), copy.deepcopy(value, memo))
return result return result

def setlist(self, key, list_): def setlist(self, key, list_):
self._assert_mutable() self._assert_mutable()
key = str_to_unicode(key, self.encoding) key = str_to_unicode(key, self.encoding)
Expand Down Expand Up @@ -385,7 +456,7 @@ def set_cookie(self, key, value='', max_age=None, expires=None, path='/',
""" """
Sets a cookie. Sets a cookie.
``expires`` can be a string in the correct format or a ``expires`` can be a string in the correct format or a
``datetime.datetime`` object in UTC. If ``expires`` is a datetime ``datetime.datetime`` object in UTC. If ``expires`` is a datetime
object then ``max_age`` will be calculated. object then ``max_age`` will be calculated.
""" """
Expand All @@ -407,7 +478,7 @@ def set_cookie(self, key, value='', max_age=None, expires=None, path='/',
# IE requires expires, so set it if hasn't been already. # IE requires expires, so set it if hasn't been already.
if not expires: if not expires:
self.cookies[key]['expires'] = cookie_date(time.time() + self.cookies[key]['expires'] = cookie_date(time.time() +
max_age) max_age)
if path is not None: if path is not None:
self.cookies[key]['path'] = path self.cookies[key]['path'] = path
if domain is not None: if domain is not None:
Expand Down
31 changes: 29 additions & 2 deletions docs/ref/request-response.txt
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -189,8 +189,14 @@ All attributes except ``session`` should be considered read-only.


.. attribute:: HttpRequest.raw_post_data .. attribute:: HttpRequest.raw_post_data


The raw HTTP POST data. This is only useful for advanced processing. Use The raw HTTP POST data as a byte string. This is useful for processing
``POST`` instead. data in different formats than of conventional HTML forms: binary images,
XML payload etc. For processing form data use ``HttpRequest.POST``.

.. versionadded:: 1.3

You can also read from an HttpRequest using file-like interface. See
:meth:`HttpRequest.read()`.


.. attribute:: HttpRequest.urlconf .. attribute:: HttpRequest.urlconf


Expand Down Expand Up @@ -249,6 +255,27 @@ Methods
If you write your own XMLHttpRequest call (on the browser side), you'll If you write your own XMLHttpRequest call (on the browser side), you'll
have to set this header manually if you want ``is_ajax()`` to work. have to set this header manually if you want ``is_ajax()`` to work.


.. method:: HttpRequest.read(size=None)
.. method:: HttpRequest.readline()
.. method:: HttpRequest.readlines()
.. method:: HttpRequest.xreadlines()
.. method:: HttpRequest.__iter__()

.. versionadded:: 1.3

Methods implementing a file-like interface for reading from an
HttpRequest instance. This makes it possible to consume an incoming
request in a streaming fashion. A common use-case would be to process a
big XML payload with iterative parser without constructing a whole
XML tree in memory.

Given this standard interface, an HttpRequest instance can be
passed directly to an XML parser such as ElementTree::

import xml.etree.ElementTree as ET
for element in ET.iterparse(request):
process(element)



QueryDict objects QueryDict objects
----------------- -----------------
Expand Down
Loading

0 comments on commit 269e921

Please sign in to comment.