diff --git a/lib/plug/adapters/cowboy/conn.ex b/lib/plug/adapters/cowboy/conn.ex index 122a9be6..a023b158 100644 --- a/lib/plug/adapters/cowboy/conn.ex +++ b/lib/plug/adapters/cowboy/conn.ex @@ -62,28 +62,6 @@ defmodule Plug.Adapters.Cowboy.Conn do :cowboy_req.body(req, opts) end - def parse_req_multipart(req, opts, callback) do - # We need to remove the length from the list - # otherwise cowboy will attempt to load the - # whole length at once. - {limit, opts} = Keyword.pop(opts, :length, 8_000_000) - - # We need to construct the header opts using defaults here, - # since once opts are passed cowboy defaults are not applied anymore. - {headers_opts, opts} = Keyword.pop(opts, :headers, []) - headers_opts = headers_opts ++ [length: 64_000, read_length: 64_000, read_timeout: 5000] - - {:ok, limit, acc, req} = parse_multipart(:cowboy_req.part(req, headers_opts), limit, opts, headers_opts, [], callback) - - params = Enum.reduce(acc, %{}, &Plug.Conn.Query.decode_pair/2) - - if limit > 0 do - {:ok, params, req} - else - {:more, params, req} - end - end - ## Helpers defp scheme(:tcp), do: :http @@ -93,74 +71,4 @@ defmodule Plug.Adapters.Cowboy.Conn do segments = :binary.split(path, "/", [:global]) for segment <- segments, segment != "", do: segment end - - ## Multipart - - defp parse_multipart({:ok, headers, req}, limit, opts, headers_opts, acc, callback) when limit >= 0 do - case callback.(headers) do - {:binary, name} -> - {:ok, limit, body, req} = - parse_multipart_body(:cowboy_req.part_body(req, opts), limit, opts, "") - - Plug.Conn.Utils.validate_utf8!(body, Plug.Parsers.BadEncodingError, "multipart body") - parse_multipart(:cowboy_req.part(req, headers_opts), limit, opts, headers_opts, - [{name, body}|acc], callback) - - {:file, name, path, %Plug.Upload{} = uploaded} -> - {:ok, file} = File.open(path, [:write, :binary, :delayed_write, :raw]) - - {:ok, limit, req} = - parse_multipart_file(:cowboy_req.part_body(req, opts), limit, opts, file) - - :ok = File.close(file) - parse_multipart(:cowboy_req.part(req, headers_opts), limit, opts, headers_opts, - [{name, uploaded}|acc], callback) - - :skip -> - parse_multipart(:cowboy_req.part(req, headers_opts), limit, opts, headers_opts, - acc, callback) - end - end - - defp parse_multipart({:ok, _headers, req}, limit, _opts, _headers_opts, acc, _callback) do - {:ok, limit, acc, req} - end - - defp parse_multipart({:done, req}, limit, _opts, _headers_opts, acc, _callback) do - {:ok, limit, acc, req} - end - - defp parse_multipart_body({:more, tail, req}, limit, opts, body) when limit >= byte_size(tail) do - parse_multipart_body(:cowboy_req.part_body(req, opts), limit - byte_size(tail), opts, body <> tail) - end - - defp parse_multipart_body({:more, tail, req}, limit, _opts, body) do - {:ok, limit - byte_size(tail), body, req} - end - - defp parse_multipart_body({:ok, tail, req}, limit, _opts, body) when limit >= byte_size(tail) do - {:ok, limit - byte_size(tail), body <> tail, req} - end - - defp parse_multipart_body({:ok, tail, req}, limit, _opts, body) do - {:ok, limit - byte_size(tail), body, req} - end - - defp parse_multipart_file({:more, tail, req}, limit, opts, file) when limit >= byte_size(tail) do - IO.binwrite(file, tail) - parse_multipart_file(:cowboy_req.part_body(req, opts), limit - byte_size(tail), opts, file) - end - - defp parse_multipart_file({:more, tail, req}, limit, _opts, _file) do - {:ok, limit - byte_size(tail), req} - end - - defp parse_multipart_file({:ok, tail, req}, limit, _opts, file) when limit >= byte_size(tail) do - IO.binwrite(file, tail) - {:ok, limit - byte_size(tail), req} - end - - defp parse_multipart_file({:ok, tail, req}, limit, _opts, _file) do - {:ok, limit - byte_size(tail), req} - end end diff --git a/lib/plug/adapters/test/conn.ex b/lib/plug/adapters/test/conn.ex index ae0418d4..d23a38eb 100644 --- a/lib/plug/adapters/test/conn.ex +++ b/lib/plug/adapters/test/conn.ex @@ -86,18 +86,6 @@ defmodule Plug.Adapters.Test.Conn do {tag, data, %{state | req_body: rest}} end - def parse_req_multipart(%{params: params} = state, _opts, _callback) do - {:ok, params, state} - end - - def parse_req_multipart(%{req_body: multipart} = state, opts, callback) do - boundary = Keyword.get(opts, :boundary) - params = parse_multipart(:cow_multipart.parse_headers(multipart, boundary), boundary, [], callback) - |> Enum.reduce(%{}, &Plug.Conn.Query.decode_pair/2) - - {:ok, params, state} - end - ## Private helpers defp body_or_params(nil, _query, headers), @@ -112,10 +100,11 @@ defmodule Plug.Adapters.Test.Conn do end defp body_or_params(params, query, headers) when is_map(params) do - content_type = List.keyfind(headers, "content-type", 0, {"content-type", "multipart/mixed; charset: utf-8"}) + content_type = List.keyfind(headers, "content-type", 0, + {"content-type", "multipart/mixed; boundary=plug_conn_test"}) headers = List.keystore(headers, "content-type", 0, content_type) params = Map.merge(Plug.Conn.Query.decode(query), stringify_params(params)) - {"", params, headers} + {"--plug_conn_test--", params, headers} end defp stringify_params([{_, _}|_] = params), @@ -146,27 +135,4 @@ defmodule Plug.Adapters.Test.Conn do 0 -> :ok end end - - defp parse_multipart({:ok, headers, body}, boundary, acc, callback) do - {:done, content, rest} = :cow_multipart.parse_body(body, boundary) - - case callback.(headers)do - {:file, name, path, %Plug.Upload{} = uploaded} -> - {:ok, file} = File.open(path, [:write, :binary, :delayed_write, :raw]) - IO.binwrite(file, content) - File.close(file) - - parse_multipart(:cow_multipart.parse_headers(rest, boundary), boundary, [{name, uploaded}|acc], callback) - - {:binary, name} -> - parse_multipart(:cow_multipart.parse_headers(rest, boundary), boundary, [{name, content}|acc], callback) - - :skip -> - parse_multipart(:cow_multipart.parse_headers(rest, boundary), boundary, acc, callback) - end - end - - defp parse_multipart({:done, _rest}, _boundary, acc, _callback) do - acc - end end diff --git a/lib/plug/conn.ex b/lib/plug/conn.ex index f4339910..a1acef93 100644 --- a/lib/plug/conn.ex +++ b/lib/plug/conn.ex @@ -774,12 +774,12 @@ defmodule Plug.Conn do ## Options - * `:length` - sets the maximum number of bytes to read from the body for each - chunk, defaults to 8_000_000 bytes - * `:read_length` - sets the amount of bytes to read at one time from the - underlying socket to fill the chunk, defaults to 1_000_000 bytes - * `:read_timeout` - sets the timeout for each socket read, defaults to - 15_000 ms + * `:length` - sets the maximum number of bytes to read from the body for + each chunk, defaults to 8_000_000 bytes + * `:read_length` - sets the amount of bytes to read at one time from the + underlying socket to fill the chunk, defaults to 1_000_000 bytes + * `:read_timeout` - sets the timeout for each socket read, defaults to + 15_000ms The values above are not meant to be exact. For example, setting the length to 8_000_000 may end up reading some hundred bytes more from @@ -804,6 +804,122 @@ defmodule Plug.Conn do end end + @doc """ + Reads the headers of a multipart request. + + It returns `{:ok, headers, conn}` with the headers or + `{:done, conn}` if there are no more parts. + + Once `read_part_headers/2` is invoked, a developer may call + `read_part_body/2` to read the body associated to the headers. + If `read_part_headers/2` is called instead, the body is automatically + skipped until the next part headers. + + ## Options + + * `:length` - sets the maximum number of bytes to read from the body for + each chunk, defaults to 64_000 bytes + * `:read_length` - sets the amount of bytes to read at one time from the + underlying socket to fill the chunk, defaults to 64_000 bytes + * `:read_timeout` - sets the timeout for each socket read, defaults to + 5_000ms + + """ + @spec read_part_headers(t, Keyword.t) :: {:ok, headers, t} | {:done, t} + def read_part_headers(%Conn{adapter: {adapter, state}} = conn, opts \\ []) do + opts = opts ++ [length: 64_000, read_length: 64_000, read_timeout: 5000] + case init_multipart(conn) do + {boundary, buffer} -> + {data, state} = read_multipart_from_buffer_or_adapter(buffer, adapter, state, opts) + read_part_headers(conn, data, boundary, adapter, state, opts) + :done -> + {:done, conn} + end + end + + defp read_part_headers(conn, data, boundary, adapter, state, opts) do + case :plug_multipart.parse_headers(data, boundary) do + {:ok, headers, rest} -> + {:ok, headers, store_multipart(conn, {boundary, rest}, adapter, state)} + :more -> + {_, next, state} = next_multipart(adapter, state, opts) + read_part_headers(conn, data <> next, boundary, adapter, state, opts) + {:more, rest} -> + {_, next, state} = next_multipart(adapter, state, opts) + read_part_headers(conn, rest <> next, boundary, adapter, state, opts) + {:done, _} -> + {:done, store_multipart(conn, :done, adapter, state)} + end + end + + @doc """ + Reads the body of a multipart request. + + Returns `{:ok, body, conn}` if all body has been read, + `{:more, binary, conn}` otherwise. + + It accepts the same options as `read_body/2`. + """ + @spec read_part_body(t, Keyword.t) :: {:ok, binary, t} | {:more, binary, t} + def read_part_body(%{adapter: {adapter, state}} = conn, opts) do + case init_multipart(conn) do + {boundary, buffer} -> + length = Keyword.get(opts, :length, 8_000_000) + {data, state} = read_multipart_from_buffer_or_adapter(buffer, adapter, state, opts) + read_part_body(conn, data, "", length, boundary, adapter, state, opts) + :done -> + {:done, conn} + end + end + + defp read_part_body(conn, data, acc, length, boundary, adapter, state, _opts) when byte_size(acc) > length do + {:more, acc, store_multipart(conn, {boundary, data}, adapter, state)} + end + defp read_part_body(conn, data, acc, length, boundary, adapter, state, opts) do + case :plug_multipart.parse_body(data, boundary) do + {:ok, body} -> + {_, next, state} = next_multipart(adapter, state, opts) + read_part_body(conn, next, acc <> body, length, boundary, adapter, state, opts) + {:ok, body, rest} -> + {_, next, state} = next_multipart(adapter, state, opts) + read_part_body(conn, rest <> next, acc <> body, length, boundary, adapter, state, opts) + :done -> + {:ok, acc, store_multipart(conn, {boundary, ""}, adapter, state)} + {:done, body} -> + {:ok, acc <> body, store_multipart(conn, {boundary, ""}, adapter, state)} + {:done, body, rest} -> + {:ok, acc <> body, store_multipart(conn, {boundary, rest}, adapter, state)} + end + end + + defp init_multipart(%{private: %{plug_multipart: plug_multipart}}) do + plug_multipart + end + defp init_multipart(%{req_headers: req_headers}) do + {_, content_type} = List.keyfind(req_headers, "content-type", 0) + {:ok, "multipart", _, %{"boundary" => boundary}} = Plug.Conn.Utils.content_type(content_type) + {boundary, ""} + end + + defp next_multipart(adapter, state, opts) do + case adapter.read_req_body(state, opts) do + {:ok, "", _} -> raise "invalid multipart, body terminated too soon" + valid -> valid + end + end + + defp store_multipart(conn, multipart, adapter, state) do + %{put_in(conn.private[:plug_multipart], multipart) | adapter: {adapter, state}} + end + + defp read_multipart_from_buffer_or_adapter("", adapter, state, opts) do + {_, data, state} = adapter.read_req_body(state, opts) + {data, state} + end + defp read_multipart_from_buffer_or_adapter(buffer, _adapter, state, _opts) do + {buffer, state} + end + @doc """ Fetches cookies from the request headers. """ diff --git a/lib/plug/conn/adapter.ex b/lib/plug/conn/adapter.ex index dbd4a346..79b333fc 100644 --- a/lib/plug/conn/adapter.ex +++ b/lib/plug/conn/adapter.ex @@ -72,26 +72,4 @@ defmodule Plug.Conn.Adapter do {:ok, data :: binary, payload} | {:more, data :: binary, payload} | {:error, term} - - @doc """ - Parses a multipart request. - - This function receives the payload, the body limit and a callback. - When parsing each multipart segment, the parser should invoke the - given fallback passing the headers for that segment, before consuming - the body. The callback will return one of the following values: - - * `{:binary, name}` - the current segment must be treated as a regular - binary value with the given `name` - * `{:file, name, file, upload}` - the current segment is a file upload with `name` - and contents should be written to the given `file` - * `:skip` - this multipart segment should be skipped - - This function may return a `:ok` or `:more` tuple. The first one is - returned when there is no more multipart data to be processed. - - For the supported options, please read `Plug.Conn.read_body/2` docs. - """ - @callback parse_req_multipart(payload, options :: Keyword.t, fun) :: - {:ok, Conn.params, payload} | {:more, Conn.params, payload} end diff --git a/lib/plug/parsers.ex b/lib/plug/parsers.ex index 3fd69c07..989d2ed4 100644 --- a/lib/plug/parsers.ex +++ b/lib/plug/parsers.ex @@ -104,13 +104,13 @@ defmodule Plug.Parsers do Plug ships with the following parsers: - * `Plug.Parsers.URLENCODED` - parses `application/x-www-form-urlencoded` - requests (can be used as `:urlencoded` as well in the `:parsers` option) - * `Plug.Parsers.MULTIPART` - parses `multipart/form-data` and - `multipart/mixed` requests (can be used as `:multipart` as well in the - `:parsers` option) - * `Plug.Parsers.JSON` - parses `application/json` requests with the given - `:json_decoder` (can be used as `:json` as well in the `:parsers` option) + * `Plug.Parsers.URLENCODED` - parses `application/x-www-form-urlencoded` + requests (can be used as `:urlencoded` as well in the `:parsers` option) + * `Plug.Parsers.MULTIPART` - parses `multipart/form-data` and + `multipart/mixed` requests (can be used as `:multipart` as well in the + `:parsers` option) + * `Plug.Parsers.JSON` - parses `application/json` requests with the given + `:json_decoder` (can be used as `:json` as well in the `:parsers` option) ## File handling diff --git a/lib/plug/parsers/multipart.ex b/lib/plug/parsers/multipart.ex index ffa9684a..c0b1a7dd 100644 --- a/lib/plug/parsers/multipart.ex +++ b/lib/plug/parsers/multipart.ex @@ -10,25 +10,14 @@ defmodule Plug.Parsers.MULTIPART do @behaviour Plug.Parsers - def parse(conn, "multipart", subtype, headers, opts) when subtype in ["form-data", "mixed"] do - {adapter, state} = conn.adapter - + def parse(conn, "multipart", subtype, _headers, opts) when subtype in ["form-data", "mixed"] do try do - adapter.parse_req_multipart(state, add_boundary(opts, Map.get(headers, "boundary")), &handle_headers/1) + parse_multipart(conn, opts) rescue e in Plug.UploadError -> # Do not ignore upload errors reraise e, System.stacktrace e -> # All others are wrapped reraise Plug.Parsers.ParseError.exception(exception: e), System.stacktrace - else - {:ok, params, state} -> - {:ok, params, %{conn | adapter: {adapter, state}}} - {:more, _params, state} -> - {:error, :too_large, %{conn | adapter: {adapter, state}}} - {:error, :timeout} -> - raise Plug.TimeoutError - {:error, _} -> - raise Plug.BadRequestError end end @@ -36,35 +25,114 @@ defmodule Plug.Parsers.MULTIPART do {:next, conn} end - def handle_headers(headers) do - case List.keyfind(headers, "content-disposition", 0) do - {_, disposition} -> handle_disposition(disposition, headers) - nil -> :skip + ## Multipart + + defp parse_multipart(conn, opts) do + # Remove the length from options as it would attempt + # to eagerly read the body on the limit value. + {limit, opts} = Keyword.pop(opts, :length, 8_000_000) + + # The header options are handled indidually. + {headers_opts, opts} = Keyword.pop(opts, :headers, []) + + {:ok, limit, acc, conn} = + parse_multipart(Plug.Conn.read_part_headers(conn, headers_opts), limit, opts, headers_opts, []) + + if limit > 0 do + {:ok, Enum.reduce(acc, %{}, &Plug.Conn.Query.decode_pair/2), conn} + else + {:error, :too_large, conn} end end - defp handle_disposition(disposition, headers) do - case :binary.split(disposition, ";") do - [_, params] -> - params = Plug.Conn.Utils.params(params) - if name = Map.get(params, "name") do - handle_disposition_params(name, params, headers) - else - :skip - end - [_] -> - :skip + defp parse_multipart({:ok, headers, conn}, limit, opts, headers_opts, acc) when limit >= 0 do + {conn, limit, acc} = parse_multipart_headers(headers, conn, limit, opts, acc) + parse_multipart(Plug.Conn.read_part_headers(conn, headers_opts), limit, opts, headers_opts, acc) + end + + defp parse_multipart({:ok, _headers, conn}, limit, _opts, _headers_opts, acc) do + {:ok, limit, acc, conn} + end + + defp parse_multipart({:done, conn}, limit, _opts, _headers_opts, acc) do + {:ok, limit, acc, conn} + end + + defp parse_multipart_headers(headers, conn, limit, opts, acc) do + case multipart_type(headers) do + {:binary, name} -> + {:ok, limit, body, conn} = parse_multipart_body(Plug.Conn.read_part_body(conn, opts), limit, opts, "") + Plug.Conn.Utils.validate_utf8!(body, Plug.Parsers.BadEncodingError, "multipart body") + {conn, limit, [{name, body} | acc]} + + {:file, name, path, %Plug.Upload{} = uploaded} -> + {:ok, file} = File.open(path, [:write, :binary, :delayed_write, :raw]) + {:ok, limit, conn} = parse_multipart_file(Plug.Conn.read_part_body(conn, opts), limit, opts, file) + :ok = File.close(file) + {conn, limit, [{name, uploaded} | acc]} + + :skip -> + {conn, limit, acc} + end + end + + defp parse_multipart_body({:more, tail, conn}, limit, opts, body) when limit >= byte_size(tail) do + parse_multipart_body(Plug.Conn.read_part_body(conn, opts), limit - byte_size(tail), opts, body <> tail) + end + + defp parse_multipart_body({:more, tail, conn}, limit, _opts, body) do + {:ok, limit - byte_size(tail), body, conn} + end + + defp parse_multipart_body({:ok, tail, conn}, limit, _opts, body) when limit >= byte_size(tail) do + {:ok, limit - byte_size(tail), body <> tail, conn} + end + + defp parse_multipart_body({:ok, tail, conn}, limit, _opts, body) do + {:ok, limit - byte_size(tail), body, conn} + end + + defp parse_multipart_file({:more, tail, conn}, limit, opts, file) when limit >= byte_size(tail) do + IO.binwrite(file, tail) + parse_multipart_file(Plug.Conn.read_part_body(conn, opts), limit - byte_size(tail), opts, file) + end + + defp parse_multipart_file({:more, tail, conn}, limit, _opts, _file) do + {:ok, limit - byte_size(tail), conn} + end + + defp parse_multipart_file({:ok, tail, conn}, limit, _opts, file) when limit >= byte_size(tail) do + IO.binwrite(file, tail) + {:ok, limit - byte_size(tail), conn} + end + + defp parse_multipart_file({:ok, tail, conn}, limit, _opts, _file) do + {:ok, limit - byte_size(tail), conn} + end + + ## Helpers + + defp multipart_type(headers) do + with {_, disposition} <- List.keyfind(headers, "content-disposition", 0), + [_, params] <- :binary.split(disposition, ";"), + %{"name" => name} = params <- Plug.Conn.Utils.params(params) do + handle_disposition(params, name, headers) + else + _ -> :skip end end - defp handle_disposition_params(name, params, headers) do - case Map.get(params, "filename") do - nil -> {:binary, name} - "" -> :skip - filename -> + defp handle_disposition(params, name, headers) do + case Map.fetch(params, "filename") do + {:ok, ""} -> + :skip + {:ok, filename} -> path = Plug.Upload.random_file!("multipart") - {:file, name, path, %Plug.Upload{filename: filename, path: path, - content_type: get_header(headers, "content-type")}} + content_type = get_header(headers, "content-type") + upload = %Plug.Upload{filename: filename, path: path, content_type: content_type} + {:file, name, path, upload} + :error -> + {:binary, name} end end @@ -74,9 +142,4 @@ defmodule Plug.Parsers.MULTIPART do nil -> nil end end - - defp add_boundary(opts, nil), do: opts - defp add_boundary(opts, boundary) do - opts ++ [{:boundary, boundary}] - end end diff --git a/src/plug_multipart.erl b/src/plug_multipart.erl new file mode 100644 index 00000000..4af79137 --- /dev/null +++ b/src/plug_multipart.erl @@ -0,0 +1,471 @@ +%% Copyright (c) 2014-2015, Loïc Hoguin +%% +%% Permission to use, copy, modify, and/or distribute this software for any +%% purpose with or without fee is hereby granted, provided that the above +%% copyright notice and this permission notice appear in all copies. +%% +%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +-module(plug_multipart). + +%% Parsing. +-export([parse_headers/2]). +-export([parse_body/2]). + +%% Building. +-export([boundary/0]). +-export([first_part/2]). +-export([part/2]). +-export([close/1]). + +%% Headers. +-export([form_data/1]). +-export([parse_content_disposition/1]). +-export([parse_content_transfer_encoding/1]). +-export([parse_content_type/1]). + +-type headers() :: [{iodata(), iodata()}]. +-export_type([headers/0]). + +-define(LC(C), case C of + $A -> $a; + $B -> $b; + $C -> $c; + $D -> $d; + $E -> $e; + $F -> $f; + $G -> $g; + $H -> $h; + $I -> $i; + $J -> $j; + $K -> $k; + $L -> $l; + $M -> $m; + $N -> $n; + $O -> $o; + $P -> $p; + $Q -> $q; + $R -> $r; + $S -> $s; + $T -> $t; + $U -> $u; + $V -> $v; + $W -> $w; + $X -> $x; + $Y -> $y; + $Z -> $z; + _ -> C +end). + +%% LOWER(Bin) +%% +%% Lowercase the entire binary string in a binary comprehension. + +-define(LOWER(Bin), << << ?LC(C) >> || << C >> <= Bin >>). + +%% LOWERCASE(Function, Rest, Acc, ...) +%% +%% To be included at the end of a case block. +%% Defined for up to 10 extra arguments. + +-define(LOWER(Function, Rest, Acc), case C of + $A -> Function(Rest, << Acc/binary, $a >>); + $B -> Function(Rest, << Acc/binary, $b >>); + $C -> Function(Rest, << Acc/binary, $c >>); + $D -> Function(Rest, << Acc/binary, $d >>); + $E -> Function(Rest, << Acc/binary, $e >>); + $F -> Function(Rest, << Acc/binary, $f >>); + $G -> Function(Rest, << Acc/binary, $g >>); + $H -> Function(Rest, << Acc/binary, $h >>); + $I -> Function(Rest, << Acc/binary, $i >>); + $J -> Function(Rest, << Acc/binary, $j >>); + $K -> Function(Rest, << Acc/binary, $k >>); + $L -> Function(Rest, << Acc/binary, $l >>); + $M -> Function(Rest, << Acc/binary, $m >>); + $N -> Function(Rest, << Acc/binary, $n >>); + $O -> Function(Rest, << Acc/binary, $o >>); + $P -> Function(Rest, << Acc/binary, $p >>); + $Q -> Function(Rest, << Acc/binary, $q >>); + $R -> Function(Rest, << Acc/binary, $r >>); + $S -> Function(Rest, << Acc/binary, $s >>); + $T -> Function(Rest, << Acc/binary, $t >>); + $U -> Function(Rest, << Acc/binary, $u >>); + $V -> Function(Rest, << Acc/binary, $v >>); + $W -> Function(Rest, << Acc/binary, $w >>); + $X -> Function(Rest, << Acc/binary, $x >>); + $Y -> Function(Rest, << Acc/binary, $y >>); + $Z -> Function(Rest, << Acc/binary, $z >>); + C -> Function(Rest, << Acc/binary, C >>) +end). + +-define(LOWER(Function, Rest, A0, Acc), case C of + $A -> Function(Rest, A0, << Acc/binary, $a >>); + $B -> Function(Rest, A0, << Acc/binary, $b >>); + $C -> Function(Rest, A0, << Acc/binary, $c >>); + $D -> Function(Rest, A0, << Acc/binary, $d >>); + $E -> Function(Rest, A0, << Acc/binary, $e >>); + $F -> Function(Rest, A0, << Acc/binary, $f >>); + $G -> Function(Rest, A0, << Acc/binary, $g >>); + $H -> Function(Rest, A0, << Acc/binary, $h >>); + $I -> Function(Rest, A0, << Acc/binary, $i >>); + $J -> Function(Rest, A0, << Acc/binary, $j >>); + $K -> Function(Rest, A0, << Acc/binary, $k >>); + $L -> Function(Rest, A0, << Acc/binary, $l >>); + $M -> Function(Rest, A0, << Acc/binary, $m >>); + $N -> Function(Rest, A0, << Acc/binary, $n >>); + $O -> Function(Rest, A0, << Acc/binary, $o >>); + $P -> Function(Rest, A0, << Acc/binary, $p >>); + $Q -> Function(Rest, A0, << Acc/binary, $q >>); + $R -> Function(Rest, A0, << Acc/binary, $r >>); + $S -> Function(Rest, A0, << Acc/binary, $s >>); + $T -> Function(Rest, A0, << Acc/binary, $t >>); + $U -> Function(Rest, A0, << Acc/binary, $u >>); + $V -> Function(Rest, A0, << Acc/binary, $v >>); + $W -> Function(Rest, A0, << Acc/binary, $w >>); + $X -> Function(Rest, A0, << Acc/binary, $x >>); + $Y -> Function(Rest, A0, << Acc/binary, $y >>); + $Z -> Function(Rest, A0, << Acc/binary, $z >>); + C -> Function(Rest, A0, << Acc/binary, C >>) +end). + +%% Parsing. +%% +%% The multipart format is defined in RFC 2045. + +%% @doc Parse the headers for the next multipart part. +%% +%% This function skips any preamble before the boundary. +%% The preamble may be retrieved using parse_body/2. +%% +%% This function will accept input of any size, it is +%% up to the caller to limit it if needed. + +-spec parse_headers(binary(), binary()) + -> more | {more, binary()} + | {ok, headers(), binary()} + | {done, binary()}. +%% If the stream starts with the boundary we can make a few assumptions +%% and quickly figure out if we got the complete list of headers. +parse_headers(<< "--", Stream/bits >>, Boundary) -> + BoundarySize = byte_size(Boundary), + case Stream of + %% Last boundary. Return the epilogue. + << Boundary:BoundarySize/binary, "--", Stream2/bits >> -> + {done, Stream2}; + << Boundary:BoundarySize/binary, Stream2/bits >> -> + %% We have all the headers only if there is a \r\n\r\n + %% somewhere in the data after the boundary. + case binary:match(Stream2, <<"\r\n\r\n">>) of + nomatch -> + more; + _ -> + before_parse_headers(Stream2) + end; + %% If there isn't enough to represent Boundary \r\n\r\n + %% then we definitely don't have all the headers. + _ when byte_size(Stream) < byte_size(Boundary) + 4 -> + more; + %% Otherwise we have preamble data to skip. + %% We still got rid of the first two misleading bytes. + _ -> + skip_preamble(Stream, Boundary) + end; +%% Otherwise we have preamble data to skip. +parse_headers(Stream, Boundary) -> + skip_preamble(Stream, Boundary). + +%% We need to find the boundary and a \r\n\r\n after that. +%% Since the boundary isn't at the start, it must be right +%% after a \r\n too. +skip_preamble(Stream, Boundary) -> + case binary:match(Stream, <<"\r\n--", Boundary/bits >>) of + %% No boundary, need more data. + nomatch -> + %% We can safely skip the size of the stream + %% minus the last 3 bytes which may be a partial boundary. + SkipSize = byte_size(Stream) - 3, + case SkipSize > 0 of + false -> + more; + true -> + << _:SkipSize/binary, Stream2/bits >> = Stream, + {more, Stream2} + end; + {Start, Length} -> + Start2 = Start + Length, + << _:Start2/binary, Stream2/bits >> = Stream, + case Stream2 of + %% Last boundary. Return the epilogue. + << "--", Stream3/bits >> -> + {done, Stream3}; + _ -> + case binary:match(Stream, <<"\r\n\r\n">>) of + %% We don't have the full headers. + nomatch -> + {more, Stream2}; + _ -> + before_parse_headers(Stream2) + end + end + end. + +before_parse_headers(<< "\r\n\r\n", Stream/bits >>) -> + %% This indicates that there are no headers, so we can abort immediately. + {ok, [], Stream}; +before_parse_headers(<< "\r\n", Stream/bits >>) -> + %% There is a line break right after the boundary, skip it. + parse_hd_name(Stream, [], <<>>). + +parse_hd_name(<< C, Rest/bits >>, H, SoFar) -> + case C of + $: -> parse_hd_before_value(Rest, H, SoFar); + $\s -> parse_hd_name_ws(Rest, H, SoFar); + $\t -> parse_hd_name_ws(Rest, H, SoFar); + _ -> ?LOWER(parse_hd_name, Rest, H, SoFar) + end. + +parse_hd_name_ws(<< C, Rest/bits >>, H, Name) -> + case C of + $\s -> parse_hd_name_ws(Rest, H, Name); + $\t -> parse_hd_name_ws(Rest, H, Name); + $: -> parse_hd_before_value(Rest, H, Name) + end. + +parse_hd_before_value(<< $\s, Rest/bits >>, H, N) -> + parse_hd_before_value(Rest, H, N); +parse_hd_before_value(<< $\t, Rest/bits >>, H, N) -> + parse_hd_before_value(Rest, H, N); +parse_hd_before_value(Buffer, H, N) -> + parse_hd_value(Buffer, H, N, <<>>). + +parse_hd_value(<< $\r, Rest/bits >>, Headers, Name, SoFar) -> + case Rest of + << "\n\r\n", Rest2/bits >> -> + {ok, [{Name, SoFar}|Headers], Rest2}; + << $\n, C, Rest2/bits >> when C =:= $\s; C =:= $\t -> + parse_hd_value(Rest2, Headers, Name, SoFar); + << $\n, Rest2/bits >> -> + parse_hd_name(Rest2, [{Name, SoFar}|Headers], <<>>) + end; +parse_hd_value(<< C, Rest/bits >>, H, N, SoFar) -> + parse_hd_value(Rest, H, N, << SoFar/binary, C >>). + +%% @doc Parse the body of the current multipart part. +%% +%% The body is everything until the next boundary. + +-spec parse_body(binary(), binary()) + -> {ok, binary()} | {ok, binary(), binary()} + | done | {done, binary()} | {done, binary(), binary()}. +parse_body(Stream, Boundary) -> + BoundarySize = byte_size(Boundary), + case Stream of + << "--", Boundary:BoundarySize/binary, _/bits >> -> + done; + _ -> + case binary:match(Stream, << "\r\n--", Boundary/bits >>) of + %% No boundary, check for a possible partial at the end. + %% Return more or less of the body depending on the result. + nomatch -> + StreamSize = byte_size(Stream), + From = StreamSize - BoundarySize - 3, + MatchOpts = if + %% Binary too small to contain boundary, check it fully. + From < 0 -> []; + %% Optimize, only check the end of the binary. + true -> [{scope, {From, StreamSize - From}}] + end, + case binary:match(Stream, <<"\r">>, MatchOpts) of + nomatch -> + {ok, Stream}; + {Pos, _} -> + case Stream of + << Body:Pos/binary >> -> + {ok, Body}; + << Body:Pos/binary, Rest/bits >> -> + {ok, Body, Rest} + end + end; + %% Boundary found, this is the last chunk of the body. + {Pos, _} -> + case Stream of + << Body:Pos/binary, "\r\n" >> -> + {done, Body}; + << Body:Pos/binary, "\r\n", Rest/bits >> -> + {done, Body, Rest}; + << Body:Pos/binary, Rest/bits >> -> + {done, Body, Rest} + end + end + end. + +%% Building. + +%% @doc Generate a new random boundary. +%% +%% The boundary generated has a low probability of ever appearing +%% in the data. + +-spec boundary() -> binary(). +boundary() -> + base64:encode(crypto:strong_rand_bytes(48)). + +%% @doc Return the first part's head. +%% +%% This works exactly like the part/2 function except there is +%% no leading \r\n. It's not required to use this function, +%% just makes the output a little smaller and prettier. + +-spec first_part(binary(), headers()) -> iodata(). +first_part(Boundary, Headers) -> + [<<"--">>, Boundary, <<"\r\n">>, headers_to_iolist(Headers, [])]. + +%% @doc Return a part's head. + +-spec part(binary(), headers()) -> iodata(). +part(Boundary, Headers) -> + [<<"\r\n--">>, Boundary, <<"\r\n">>, headers_to_iolist(Headers, [])]. + +headers_to_iolist([], Acc) -> + lists:reverse([<<"\r\n">>|Acc]); +headers_to_iolist([{N, V}|Tail], Acc) -> + %% We don't want to create a sublist so we list the + %% values in reverse order so that it gets reversed properly. + headers_to_iolist(Tail, [<<"\r\n">>, V, <<": ">>, N|Acc]). + +%% @doc Return the closing delimiter of the multipart message. + +-spec close(binary()) -> iodata(). +close(Boundary) -> + [<<"\r\n--">>, Boundary, <<"--">>]. + +%% Headers. + +%% @doc Convenience function for extracting information from headers +%% when parsing a multipart/form-data stream. + +-spec form_data(headers()) + -> {data, binary()} + | {file, binary(), binary(), binary(), binary()}. +form_data(Headers) -> + {_, DispositionBin} = lists:keyfind(<<"content-disposition">>, 1, Headers), + {<<"form-data">>, Params} = parse_content_disposition(DispositionBin), + {_, FieldName} = lists:keyfind(<<"name">>, 1, Params), + case lists:keyfind(<<"filename">>, 1, Params) of + false -> + {data, FieldName}; + {_, Filename} -> + Type = case lists:keyfind(<<"content-type">>, 1, Headers) of + false -> <<"text/plain">>; + {_, T} -> T + end, + %% @todo Turns out this is unnecessary per RFC7578 4.7. + TransferEncoding = case lists:keyfind( + <<"content-transfer-encoding">>, 1, Headers) of + false -> <<"7bit">>; + {_, TE} -> TE + end, + {file, FieldName, Filename, Type, TransferEncoding} + end. + +%% @doc Parse an RFC 2183 content-disposition value. + +-spec parse_content_disposition(binary()) + -> {binary(), [{binary(), binary()}]}. +parse_content_disposition(Bin) -> + parse_cd_type(Bin, <<>>). + +parse_cd_type(<<>>, Acc) -> + {Acc, []}; +parse_cd_type(<< C, Rest/bits >>, Acc) -> + case C of + $; -> {Acc, parse_before_param(Rest, [])}; + $\s -> {Acc, parse_before_param(Rest, [])}; + $\t -> {Acc, parse_before_param(Rest, [])}; + _ -> ?LOWER(parse_cd_type, Rest, Acc) + end. + +%% @doc Parse an RFC 2045 content-transfer-encoding header. + +-spec parse_content_transfer_encoding(binary()) -> binary(). +parse_content_transfer_encoding(Bin) -> + ?LOWER(Bin). + +%% @doc Parse an RFC 2045 content-type header. + +-spec parse_content_type(binary()) + -> {binary(), binary(), [{binary(), binary()}]}. +parse_content_type(Bin) -> + parse_ct_type(Bin, <<>>). + +parse_ct_type(<< C, Rest/bits >>, Acc) -> + case C of + $/ -> parse_ct_subtype(Rest, Acc, <<>>); + _ -> ?LOWER(parse_ct_type, Rest, Acc) + end. + +parse_ct_subtype(<<>>, Type, Subtype) when Subtype =/= <<>> -> + {Type, Subtype, []}; +parse_ct_subtype(<< C, Rest/bits >>, Type, Acc) -> + case C of + $; -> {Type, Acc, parse_before_param(Rest, [])}; + $\s -> {Type, Acc, parse_before_param(Rest, [])}; + $\t -> {Type, Acc, parse_before_param(Rest, [])}; + _ -> ?LOWER(parse_ct_subtype, Rest, Type, Acc) + end. + +%% @doc Parse RFC 2045 parameters. + +parse_before_param(<<>>, Params) -> + lists:reverse(Params); +parse_before_param(<< C, Rest/bits >>, Params) -> + case C of + $; -> parse_before_param(Rest, Params); + $\s -> parse_before_param(Rest, Params); + $\t -> parse_before_param(Rest, Params); + _ -> ?LOWER(parse_param_name, Rest, Params, <<>>) + end. + +parse_param_name(<<>>, Params, Acc) -> + lists:reverse([{Acc, <<>>}|Params]); +parse_param_name(<< C, Rest/bits >>, Params, Acc) -> + case C of + $= -> parse_param_value(Rest, Params, Acc); + _ -> ?LOWER(parse_param_name, Rest, Params, Acc) + end. + +parse_param_value(<<>>, Params, Name) -> + lists:reverse([{Name, <<>>}|Params]); +parse_param_value(<< C, Rest/bits >>, Params, Name) -> + case C of + $" -> parse_param_quoted_value(Rest, Params, Name, <<>>); + $; -> parse_before_param(Rest, [{Name, <<>>}|Params]); + $\s -> parse_before_param(Rest, [{Name, <<>>}|Params]); + $\t -> parse_before_param(Rest, [{Name, <<>>}|Params]); + C -> parse_param_value(Rest, Params, Name, << C >>) + end. + +parse_param_value(<<>>, Params, Name, Acc) -> + lists:reverse([{Name, Acc}|Params]); +parse_param_value(<< C, Rest/bits >>, Params, Name, Acc) -> + case C of + $; -> parse_before_param(Rest, [{Name, Acc}|Params]); + $\s -> parse_before_param(Rest, [{Name, Acc}|Params]); + $\t -> parse_before_param(Rest, [{Name, Acc}|Params]); + C -> parse_param_value(Rest, Params, Name, << Acc/binary, C >>) + end. + +%% We expect a final $" so no need to test for <<>>. +parse_param_quoted_value(<< $\\, C, Rest/bits >>, Params, Name, Acc) -> + parse_param_quoted_value(Rest, Params, Name, << Acc/binary, C >>); +parse_param_quoted_value(<< $", Rest/bits >>, Params, Name, Acc) -> + parse_before_param(Rest, [{Name, Acc}|Params]); +parse_param_quoted_value(<< C, Rest/bits >>, Params, Name, Acc) + when C =/= $\r -> + parse_param_quoted_value(Rest, Params, Name, << Acc/binary, C >>). \ No newline at end of file diff --git a/test/plug/adapters/cowboy/conn_test.exs b/test/plug/adapters/cowboy/conn_test.exs index e9e7a438..96a1eec7 100644 --- a/test/plug/adapters/cowboy/conn_test.exs +++ b/test/plug/adapters/cowboy/conn_test.exs @@ -82,15 +82,15 @@ defmodule Plug.Adapters.Cowboy.ConnTest do test "request_path" do assert {200, _, "/return_request_path/foo"} = - request :get, "/return_request_path/foo?barbat" + request :get, "/return_request_path/foo?barbat" assert {200, _, "/return_request_path/foo/bar"} = - request :get, "/return_request_path/foo/bar?bar=bat" + request :get, "/return_request_path/foo/bar?bar=bat" assert {200, _, "/return_request_path/foo/bar/"} = - request :get, "/return_request_path/foo/bar/?bar=bat" + request :get, "/return_request_path/foo/bar/?bar=bat" assert {200, _, "/return_request_path/foo//bar"} = - request :get, "/return_request_path/foo//bar" + request :get, "/return_request_path/foo//bar" assert {200, _, "//return_request_path//foo//bar//"} = - request :get, "//return_request_path//foo//bar//" + request :get, "//return_request_path//foo//bar//" end def headers(conn) do @@ -321,12 +321,25 @@ defmodule Plug.Adapters.Cowboy.ConnTest do """ headers = - [{"Content-Type", "multipart/form-data"}, + [{"Content-Type", "multipart/form-data; boundary=----w58EW1cEpjzydSCq"}, + {"Content-Length", byte_size(multipart)}] + + assert {500, _, body} = request :post, "/multipart", headers, multipart + assert body =~ "malformed request, a RuntimeError exception was raised with message \"invalid multipart" + + multipart = """ + ------w58EW1cEpjzydSCq\r + Content-Disposition: form-data; name=\"name\"\r + \r + hello + """ + + headers = + [{"Content-Type", "multipart/form-data; boundary=----w58EW1cEpjzydSCq"}, {"Content-Length", byte_size(multipart)}] assert {500, _, body} = request :post, "/multipart", headers, multipart - assert body =~ "malformed request, a MatchError exception was raised with message " <> - ~s("no match of right hand side value: false") + assert body =~ "malformed request, a RuntimeError exception was raised with message \"invalid multipart" end def https(conn) do diff --git a/test/plug/adapters/test/conn_test.exs b/test/plug/adapters/test/conn_test.exs index 63a93b8a..ad8d3a44 100644 --- a/test/plug/adapters/test/conn_test.exs +++ b/test/plug/adapters/test/conn_test.exs @@ -42,7 +42,7 @@ defmodule Plug.Adapters.Test.ConnTest do test "custom params sets content-type to multipart/mixed when content-type is not set" do conn = conn(:get, "/", foo: "bar") - assert conn.req_headers == [{"content-type", "multipart/mixed; charset: utf-8"}] + assert conn.req_headers == [{"content-type", "multipart/mixed; boundary=plug_conn_test"}] end test "custom params does not change content-type when set" do @@ -53,55 +53,6 @@ defmodule Plug.Adapters.Test.ConnTest do assert conn.req_headers == [{"content-type", "application/vnd.api+json"}] end - test "parse_req_multipart/4" do - multipart = """ - ------w58EW1cEpjzydSCq\r - Content-Disposition: form-data; name=\"name\"\r - \r - hello\r - ------w58EW1cEpjzydSCq\r - Content-Disposition: form-data; name=\"pic\"; filename=\"foo.txt\"\r - Content-Type: text/plain\r - \r - hello - - \r - ------w58EW1cEpjzydSCq\r - Content-Disposition: form-data; name=\"empty\"; filename=\"\"\r - Content-Type: application/octet-stream\r - \r - \r - ------w58EW1cEpjzydSCq\r - Content-Disposition: form-data; name="status[]"\r - \r - choice1\r - ------w58EW1cEpjzydSCq\r - Content-Disposition: form-data; name="status[]"\r - \r - choice2\r - ------w58EW1cEpjzydSCq\r - Content-Disposition: form-data; name=\"commit\"\r - \r - Create User\r - ------w58EW1cEpjzydSCq--\r - """ - - conn = conn(:post, "/") - - {adapter, _state} = conn.adapter - - assert {:ok, params, _} = adapter.parse_req_multipart(%{req_body: multipart}, [{:boundary, "----w58EW1cEpjzydSCq"}], &Plug.Parsers.MULTIPART.handle_headers/1) - - assert params["name"] == "hello" - assert params["status"] == ["choice1", "choice2"] - assert params["empty"] == nil - - assert %Plug.Upload{} = file = params["pic"] - assert File.read!(file.path) == "hello\n\n" - assert file.content_type == "text/plain" - assert file.filename == "foo.txt" - end - test "use existing conn.host if exists" do conn_with_host = conn(:get, "http://www.elixir-lang.org/") assert conn_with_host.host == "www.elixir-lang.org" diff --git a/test/plug/parsers_test.exs b/test/plug/parsers_test.exs index 4d79b417..11202ef9 100644 --- a/test/plug/parsers_test.exs +++ b/test/plug/parsers_test.exs @@ -72,7 +72,7 @@ defmodule Plug.ParsersTest do assert conn.params["foo"] == "baz" end - test "parses multipart bodies" do + test "parses multipart bodies with test params" do conn = parse(conn(:post, "/?foo=bar")) assert conn.params == %{"foo" => "bar"} @@ -80,6 +80,58 @@ defmodule Plug.ParsersTest do assert conn.params == %{"foo" => "baz"} end + test "parses multipart bodies with test body" do + multipart = """ + ------w58EW1cEpjzydSCq\r + Content-Disposition: form-data; name=\"name\"\r + \r + hello\r + ------w58EW1cEpjzydSCq\r + Content-Disposition: form-data; name=\"pic\"; filename=\"foo.txt\"\r + Content-Type: text/plain\r + \r + hello + + \r + ------w58EW1cEpjzydSCq\r + Content-Disposition: form-data\r + \r + skipped\r + ------w58EW1cEpjzydSCq\r + Content-Disposition: form-data; name=\"empty\"; filename=\"\"\r + Content-Type: application/octet-stream\r + \r + \r + ------w58EW1cEpjzydSCq\r + Content-Disposition: form-data; name="status[]"\r + \r + choice1\r + ------w58EW1cEpjzydSCq\r + Content-Disposition: form-data; name="status[]"\r + \r + choice2\r + ------w58EW1cEpjzydSCq\r + Content-Disposition: form-data; name=\"commit\"\r + \r + Create User\r + ------w58EW1cEpjzydSCq--\r + """ + + %{params: params} = + conn(:post, "/", multipart) + |> put_req_header("content-type", "multipart/mixed; boundary=----w58EW1cEpjzydSCq") + |> parse() + + assert params["name"] == "hello" + assert params["status"] == ["choice1", "choice2"] + assert params["empty"] == nil + + assert %Plug.Upload{} = file = params["pic"] + assert File.read!(file.path) == "hello\n\n" + assert file.content_type == "text/plain" + assert file.filename == "foo.txt" + end + test "raises on invalid url encoded" do assert_raise Plug.Parsers.BadEncodingError, "invalid UTF-8 on urlencoded body, got byte 139", fn ->