From 9ec2cfc5dc64e1a11f31feed8f1f819c4fa1052d Mon Sep 17 00:00:00 2001 From: Yeray Diaz Diaz Date: Sun, 20 Oct 2019 13:25:00 +0100 Subject: [PATCH] Multipart files tweaks (#482) * Allow filenames as None in multipart encoding * Allow str file contents in multipart encode * Some formatting changes on `advanced.md` * Document multipart file encoding in the advanced docs * Update docs/advanced.md Co-Authored-By: Florimond Manca --- docs/advanced.md | 81 +++++++++++++++++++++++++++++++++-------- httpx/multipart.py | 29 +++++++-------- tests/test_multipart.py | 33 +++++++++++++++++ 3 files changed, 113 insertions(+), 30 deletions(-) diff --git a/docs/advanced.md b/docs/advanced.md index 8a69d7b287..f0d402ba63 100644 --- a/docs/advanced.md +++ b/docs/advanced.md @@ -241,17 +241,22 @@ with httpx.Client(proxies=proxy) as client: information at `Client` initialization. ## Timeout fine-tuning -HTTPX offers various request timeout management options. Three types of timeouts are available: **connect** timeouts, -**write** timeouts and **read** timeouts. -* The **connect timeout** specifies the maximum amount of time to wait until a connection to the requested host is established. -If HTTPX is unable to connect within this time frame, a `ConnectTimeout` exception is raised. -* The **write timeout** specifies the maximum duration to wait for a chunk of data to be sent (for example, a chunk of the request body). -If HTTPX is unable to send data within this time frame, a `WriteTimeout` exception is raised. -* The **read timeout** specifies the maximum duration to wait for a chunk of data to be received (for example, a chunk of the response body). -If HTTPX is unable to receive data within this time frame, a `ReadTimeout` exception is raised. +HTTPX offers various request timeout management options. Three types of timeouts +are available: **connect** timeouts, **write** timeouts and **read** timeouts. + +* The **connect timeout** specifies the maximum amount of time to wait until +a connection to the requested host is established. If HTTPX is unable to connect +within this time frame, a `ConnectTimeout` exception is raised. +* The **write timeout** specifies the maximum duration to wait for a chunk of +data to be sent (for example, a chunk of the request body). If HTTPX is unable +to send data within this time frame, a `WriteTimeout` exception is raised. +* The **read timeout** specifies the maximum duration to wait for a chunk of +data to be received (for example, a chunk of the response body). If HTTPX is +unable to receive data within this time frame, a `ReadTimeout` exception is raised. ### Setting timeouts + You can set timeouts on two levels: - For a given request: @@ -274,13 +279,13 @@ with httpx.Client(timeout=5) as client: Besides, you can pass timeouts in two forms: -- A number, which sets the read, write and connect timeouts to the same value, as in the examples above. +- A number, which sets the read, write and connect timeouts to the same value, as in the examples above. - A `TimeoutConfig` instance, which allows to define the read, write and connect timeouts independently: ```python timeout = httpx.TimeoutConfig( - connect_timeout=5, - read_timeout=10, + connect_timeout=5, + read_timeout=10, write_timeout=15 ) @@ -288,10 +293,12 @@ resp = httpx.get('http://example.com/api/v1/example', timeout=timeout) ``` ### Default timeouts + By default all types of timeouts are set to 5 second. - + ### Disabling timeouts -To disable timeouts, you can pass `None` as a timeout parameter. + +To disable timeouts, you can pass `None` as a timeout parameter. Note that currently this is not supported by the top-level API. ```python @@ -305,9 +312,53 @@ with httpx.Client(timeout=None) as client: timeout = httpx.TimeoutConfig( - connect_timeout=5, - read_timeout=None, + connect_timeout=5, + read_timeout=None, write_timeout=5 ) httpx.get(url, timeout=timeout) # Does not timeout, returns after 10s ``` + +## Multipart file encoding + +As mentioned in the [quickstart](/quickstart#sending-multipart-file-uploads) +multipart file encoding is available by passing a dictionary with the +name of the payloads as keys and a tuple of elements as values. + +```python +>>> files = {'upload-file': ('report.xls', open('report.xls', 'rb'), 'application/vnd.ms-excel')} +>>> r = httpx.post("https://httpbin.org/post", files=files) +>>> print(r.text) +{ + ... + "files": { + "upload-file": "<... binary content ...>" + }, + ... +} +``` + +More specifically, this tuple must have at least two elements and maximum of three: +- The first one is an optional file name which can be set to `None`. +- The second may be a file-like object or a string which will be automatically +encoded in UTF-8. +- An optional third element can be included with the +[MIME type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_Types) +of the file being uploaded. If not specified HTTPX will attempt to guess the MIME type +based on the file name specified as the first element or the tuple, if that +is set to `None` or it cannot be inferred from it, HTTPX will default to +`applicaction/octet-stream`. + +```python +>>> files = {'upload-file': (None, 'text content', 'text/plain')} +>>> r = httpx.post("https://httpbin.org/post", files=files) +>>> print(r.text) +{ + ... + "files": {}, + "form": { + "upload-file": "text-content" + }, + ... +} +``` diff --git a/httpx/multipart.py b/httpx/multipart.py index 14cf26a9aa..d850f79dcf 100644 --- a/httpx/multipart.py +++ b/httpx/multipart.py @@ -59,26 +59,25 @@ def __init__( ) def guess_content_type(self) -> str: - return mimetypes.guess_type(self.filename)[0] or "application/octet-stream" + if self.filename: + return mimetypes.guess_type(self.filename)[0] or "application/octet-stream" + else: + return "application/octet-stream" def render_headers(self) -> bytes: - name = _format_param("name", self.name) - filename = _format_param("filename", self.filename) + parts = [b"Content-Disposition: form-data; ", _format_param("name", self.name)] + if self.filename: + filename = _format_param("filename", self.filename) + parts.extend([b"; ", filename]) content_type = self.content_type.encode() - return b"".join( - [ - b"Content-Disposition: form-data; ", - name, - b"; ", - filename, - b"\r\nContent-Type: ", - content_type, - b"\r\n\r\n", - ] - ) + parts.extend([b"\r\nContent-Type: ", content_type, b"\r\n\r\n"]) + return b"".join(parts) def render_data(self) -> bytes: - content = self.file.read() + if isinstance(self.file, str): + content = self.file + else: + content = self.file.read() return content.encode("utf-8") if isinstance(content, str) else content diff --git a/tests/test_multipart.py b/tests/test_multipart.py index 052cb90d42..41e69f7ab9 100644 --- a/tests/test_multipart.py +++ b/tests/test_multipart.py @@ -127,6 +127,39 @@ def test_multipart_encode(): ) +def test_multipart_encode_files_allows_filenames_as_none(): + files = {"file": (None, io.BytesIO(b""))} + with mock.patch("os.urandom", return_value=os.urandom(16)): + boundary = binascii.hexlify(os.urandom(16)).decode("ascii") + + body, content_type = multipart.multipart_encode(data={}, files=files) + + assert content_type == f"multipart/form-data; boundary={boundary}" + assert body == ( + '--{0}\r\nContent-Disposition: form-data; name="file"\r\n' + "Content-Type: application/octet-stream\r\n\r\n\r\n" + "--{0}--\r\n" + "".format(boundary).encode("ascii") + ) + + +def test_multipart_encode_files_allows_str_content(): + files = {"file": ("test.txt", "", "text/plain")} + with mock.patch("os.urandom", return_value=os.urandom(16)): + boundary = binascii.hexlify(os.urandom(16)).decode("ascii") + + body, content_type = multipart.multipart_encode(data={}, files=files) + + assert content_type == f"multipart/form-data; boundary={boundary}" + assert body == ( + '--{0}\r\nContent-Disposition: form-data; name="file"; ' + 'filename="test.txt"\r\n' + "Content-Type: text/plain\r\n\r\n\r\n" + "--{0}--\r\n" + "".format(boundary).encode("ascii") + ) + + class TestHeaderParamHTML5Formatting: def test_unicode(self): param = multipart._format_param("filename", "n\u00e4me")