Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refs #21442 -- Added content type parsing #17546

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
6 changes: 6 additions & 0 deletions django/core/handlers/asgi.py
Expand Up @@ -19,6 +19,7 @@
HttpResponseServerError,
QueryDict,
parse_cookie,
parsers,
)
from django.urls import set_script_prefix
from django.utils.functional import cached_property
Expand Down Expand Up @@ -111,6 +112,11 @@ def __init__(self, scope, body_file):
self._stream = body_file
# Other bits.
self.resolver_match = None
self._parsers = [
parsers.FormParser,
parsers.MultiPartParser,
parsers.JSONParser,
]

@cached_property
def GET(self):
Expand Down
7 changes: 6 additions & 1 deletion django/core/handlers/wsgi.py
Expand Up @@ -3,7 +3,7 @@
from django.conf import settings
from django.core import signals
from django.core.handlers import base
from django.http import HttpRequest, QueryDict, parse_cookie
from django.http import HttpRequest, QueryDict, parse_cookie, parsers
from django.urls import set_script_prefix
from django.utils.encoding import repercent_broken_unicode
from django.utils.functional import cached_property
Expand Down Expand Up @@ -78,6 +78,11 @@ def __init__(self, environ):
self._stream = LimitedStream(self.environ["wsgi.input"], content_length)
self._read_started = False
self.resolver_match = None
self._parsers = [
parsers.FormParser,
parsers.MultiPartParser,
parsers.JSONParser,
]

def _get_scheme(self):
return self.environ.get("wsgi.url_scheme")
Expand Down
48 changes: 33 additions & 15 deletions django/http/multipartparser.py
Expand Up @@ -54,7 +54,7 @@ class MultiPartParser:

boundary_re = _lazy_re_compile(r"[ -~]{0,200}[!-~]")

def __init__(self, META, input_data, upload_handlers, encoding=None):
def __init__(self, META, input_data, upload_handlers, encoding=None, parsers=None):
"""
Initialize the MultiPartParser object.

Expand Down Expand Up @@ -112,6 +112,7 @@ def __init__(self, META, input_data, upload_handlers, encoding=None):
self._encoding = encoding or settings.DEFAULT_CHARSET
self._content_length = content_length
self._upload_handlers = upload_handlers
self._parsers = parsers

def parse(self):
# Call the actual parse routine and close all open files in case of
Expand Down Expand Up @@ -236,21 +237,38 @@ def _parse(self):
data = field_stream.read(size=read_size)
num_bytes_read += len(data)

# Add two here to make the check consistent with the
# x-www-form-urlencoded check that includes '&='.
num_bytes_read += len(field_name) + 2
if (
settings.DATA_UPLOAD_MAX_MEMORY_SIZE is not None
and num_bytes_read > settings.DATA_UPLOAD_MAX_MEMORY_SIZE
):
raise RequestDataTooBig(
"Request body exceeded "
"settings.DATA_UPLOAD_MAX_MEMORY_SIZE."
try:
content_type = meta_data["content-type"][0].strip()
except KeyError:
content_type = None
selected_parser = None
if content_type:
for parser in self._parsers:
if parser.can_handle(content_type):
selected_parser = parser
break
if selected_parser:
# TODO maybe .parse() shouldn't return an empty MultiValueDict
# for files if it's not needed
self._post.appendlist(
field_name, selected_parser.parse(data)[0]
)
else:
# Add two here to make the check consistent with the
# x-www-form-urlencoded check that includes '&='.
num_bytes_read += len(field_name) + 2
if (
settings.DATA_UPLOAD_MAX_MEMORY_SIZE is not None
and num_bytes_read > settings.DATA_UPLOAD_MAX_MEMORY_SIZE
):
raise RequestDataTooBig(
"Request body exceeded "
"settings.DATA_UPLOAD_MAX_MEMORY_SIZE."
)

self._post.appendlist(
field_name, force_str(data, encoding, errors="replace")
)

self._post.appendlist(
field_name, force_str(data, encoding, errors="replace")
)
elif item_type == FILE:
# Avoid storing more than DATA_UPLOAD_MAX_NUMBER_FILES.
num_files += 1
Expand Down
90 changes: 90 additions & 0 deletions django/http/parsers.py
@@ -0,0 +1,90 @@
import json
from io import BytesIO

from django.core.exceptions import BadRequest
from django.http.multipartparser import MultiPartParser as _MultiPartParser
from django.utils.datastructures import ImmutableList, MultiValueDict


class BaseParser:
media_type = None
parsers = None

def __init__(self, request):
self.request = request

@classmethod
def can_handle(cls, media_type):
return media_type == cls.media_type

def parse(self, data):
pass


class FormParser(BaseParser):
media_type = "application/x-www-form-urlencoded"

def __init__(self, request):
super().__init__(request)
# According to RFC 1866, the "application/x-www-form-urlencoded"
# content type does not have a charset and should be always treated
# as UTF-8.
if (
self.request._encoding is not None
and self.request._encoding.lower() != "utf-8"
):
raise BadRequest(
"HTTP requests with the 'application/x-www-form-urlencoded' "
"content type must be UTF-8 encoded."
)

def parse(self, data):
from django.http import QueryDict

return QueryDict(data, encoding="utf-8"), MultiValueDict()


class MultiPartParser(BaseParser):
media_type = "multipart/form-data"

def parse(self, data):
request = self.request
if hasattr(request, "_body"):
# Use already read data
request_data = BytesIO(request._body)
else:
request_data = request

# TODO - POST and data can be called on the same request. This parser can be
# called multiple times on the same request. While `_post` `_data` are different
# _files is the same. Allow parsing them twice, but don't change the handlers?
if not hasattr(request, "_files"):
request.upload_handlers = ImmutableList(
request.upload_handlers,
warning=(
"You cannot alter upload handlers after the upload has been "
"processed."
),
)
parser = _MultiPartParser(
request.META,
request_data,
request.upload_handlers,
request.encoding,
self.parsers,
)
# TODO _post could also be _data
_post, _files = parser.parse()
return _post, _files


class JSONParser(BaseParser):
media_type = "application/json"

def parse(self, data):
def strict_constant(o):
raise ValueError(
"Out of range float values are not JSON compliant: " + repr(o)
)

return json.loads(data, parse_constant=strict_constant), MultiValueDict()
91 changes: 59 additions & 32 deletions django/http/request.py
Expand Up @@ -7,18 +7,14 @@
from django.conf import settings
from django.core import signing
from django.core.exceptions import (
BadRequest,
DisallowedHost,
ImproperlyConfigured,
RequestDataTooBig,
TooManyFieldsSent,
)
from django.core.files import uploadhandler
from django.http.multipartparser import (
MultiPartParser,
MultiPartParserError,
TooManyFilesSent,
)
from django.http import parsers
from django.http.multipartparser import MultiPartParser
from django.utils.datastructures import (
CaseInsensitiveMapping,
ImmutableList,
Expand Down Expand Up @@ -73,6 +69,11 @@ def __init__(self):
self.resolver_match = None
self.content_type = None
self.content_params = None
self._parsers = [
parsers.FormParser,
parsers.MultiPartParser,
parsers.JSONParser,
]

def __repr__(self):
if self.method is None or not self.get_full_path():
Expand Down Expand Up @@ -350,49 +351,52 @@ def _mark_post_parse_error(self):
self._post = QueryDict()
self._files = MultiValueDict()

def _load_post_and_files(self):
"""Populate self._post and self._files if the content-type is a form type"""
if self.method != "POST":
def _load_post_and_files(
self, data_attr="_post", parser_list=None, methods=("POST",)
):
if methods and self.method not in methods:
self._post, self._files = (
QueryDict(encoding=self._encoding),
MultiValueDict(),
)
return
if self._read_started and not hasattr(self, "_body"):
self._mark_post_parse_error()
setattr(self, data_attr, QueryDict())
self._files = MultiValueDict()
return

if self.content_type == "multipart/form-data":
if hasattr(self, "_body"):
# Use already read data
data = BytesIO(self._body)
else:
data = self
if parser_list is None:
parser_list = [parsers.FormParser, parsers.MultiPartParser]
selected_parser = None
for parser in parser_list:
if parser.can_handle(self.content_type):
selected_parser = parser
break

if selected_parser:
parser = selected_parser(self)
try:
self._post, self._files = self.parse_file_upload(self.META, data)
except (MultiPartParserError, TooManyFilesSent):
if self.content_type == "multipart/form-data":
parser.parsers = (parser(self) for parser in parser_list)
data, self._files = parser.parse(None)
else:
data, self._files = parser.parse(self.body)
setattr(self, data_attr, data)
except Exception as e:
# TODO 'application/x-www-form-urlencoded' didn't do this.
# An error occurred while parsing POST data. Since when
# formatting the error the request handler might access
# self.POST, set self._post and self._file to prevent
# attempts to parse POST data again.
self._mark_post_parse_error()
raise
elif self.content_type == "application/x-www-form-urlencoded":
# According to RFC 1866, the "application/x-www-form-urlencoded"
# content type does not have a charset and should be always treated
# as UTF-8.
if self._encoding is not None and self._encoding.lower() != "utf-8":
raise BadRequest(
"HTTP requests with the 'application/x-www-form-urlencoded' "
"content type must be UTF-8 encoded."
)
self._post = QueryDict(self.body, encoding="utf-8")
self._files = MultiValueDict()
data_attr = QueryDict()
self._files = MultiValueDict()
raise e
else:
self._post, self._files = (
data, self._files = (
QueryDict(encoding=self._encoding),
MultiValueDict(),
)
setattr(self, data_attr, data)

def close(self):
if hasattr(self, "_files"):
Expand Down Expand Up @@ -427,6 +431,29 @@ def __iter__(self):
def readlines(self):
return list(self)

@property
def parsers(self):
return self._parsers

@parsers.setter
def parsers(self, parsers):
if hasattr(self, "_data") or hasattr(self, "_files"):
raise AttributeError(
"You cannot change parsers after processing the request's content."
)
self._parsers = parsers

# TODO should this property be on [WSGI|ASGI]Request?
@property
def data(self):
if not hasattr(self, "_data"):
self._load_post_and_files("_data", self.parsers, methods=None)
return self._data

@data.setter
def data(self, data):
self._data = data


class HttpHeaders(CaseInsensitiveMapping):
HTTP_PREFIX = "HTTP_"
Expand Down
1 change: 1 addition & 0 deletions docs/ref/index.txt
Expand Up @@ -21,6 +21,7 @@ API Reference
migration-operations
models/index
paginator
parsers
request-response
schema-editor
settings
Expand Down