Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Merge pull request #30 from zotonic/zotonic-subject-encoding

Escape the Subject: header using RFC2047
  • Loading branch information...
commit 7b74bf1ab3da04f671692e9f5f6641070d50db70 2 parents 40dea60 + 5573728
@mworrell mworrell authored
View
3  .gitignore
@@ -6,4 +6,5 @@ coverage/*
build
*.xcodeproj
.eunit/
-ebin/gen_smtp.app
+ebin/gen_smtp.app
+src/smtp_rfc822_parse.erl
View
69 src/mimemail.erl
@@ -668,7 +668,7 @@ encode_headers(Headers) ->
encode_headers([], EncodedHeaders) ->
EncodedHeaders;
encode_headers([{Key, Value}|T] = _Headers, EncodedHeaders) ->
- encode_headers(T, encode_folded_header(list_to_binary([Key,": ",Value]),
+ encode_headers(T, encode_folded_header(list_to_binary([Key,": ",encode_header_value(Key, Value)]),
EncodedHeaders)).
encode_folded_header(Header, HeaderLines) ->
@@ -688,6 +688,16 @@ encode_folded_header(Header, HeaderLines) ->
encode_folded_header(TabbedRemainder, [])
end.
+encode_header_value(H, Value) when H =:= <<"To">>; H =:= <<"Cc">>; H =:= <<"Bcc">>;
+ H =:= <<"Reply-To">>; H =:= <<"From">> ->
+ {ok, Addresses} = smtp_util:parse_rfc822_addresses(Value),
+ {Names, Emails} = lists:unzip(Addresses),
+ NewNames = lists:map(fun rfc2047_utf8_encode/1, Names),
+ smtp_util:combine_rfc822_addresses(lists:zip(NewNames, Emails));
+
+encode_header_value(_, Value) ->
+ rfc2047_utf8_encode(Value).
+
encode_component(_Type, _SubType, Headers, Params, Body) ->
if
is_list(Body) -> % is this a multipart component?
@@ -825,6 +835,40 @@ fix_encoding(Encoding) when Encoding == <<"utf8">>; Encoding == <<"UTF8">> ->
fix_encoding(Encoding) ->
Encoding.
+
+%% @doc Encode a binary or list according to RFC 2047. Input is
+%% assumed to be in UTF-8 encoding.
+rfc2047_utf8_encode(undefined) -> undefined;
+rfc2047_utf8_encode(B) when is_binary(B) ->
+ rfc2047_utf8_encode(binary_to_list(B));
+rfc2047_utf8_encode([]) ->
+ [];
+rfc2047_utf8_encode(Text) ->
+ rfc2047_utf8_encode(Text, Text).
+
+%% Don't escape when all characters are ASCII printable
+rfc2047_utf8_encode([], Text) ->
+ Text;
+rfc2047_utf8_encode([H|T], Text) when H >= 32 andalso H =< 126 ->
+ rfc2047_utf8_encode(T, Text);
+rfc2047_utf8_encode(_, Text) ->
+ "=?UTF-8?Q?" ++ rfc2047_utf8_encode(Text, [], 0) ++ "?=".
+
+rfc2047_utf8_encode([], Acc, _WordLen) ->
+ lists:reverse(Acc);
+rfc2047_utf8_encode(T, Acc, WordLen) when WordLen >= 55 ->
+ %% Make sure that the individual encoded words are not longer than 76 chars (including charset etc)
+ rfc2047_utf8_encode(T, [$?,$Q,$?,$8,$-,$F,$T,$U,$?,$=,32,10,13,$=,$?|Acc], 0);
+rfc2047_utf8_encode([C|T], Acc, WordLen) when C > 32 andalso C < 127 andalso C /= 32
+ andalso C /= $? andalso C /= $_ andalso C /= $= andalso C /= $. ->
+ rfc2047_utf8_encode(T, [C|Acc], WordLen+1);
+rfc2047_utf8_encode([C|T], Acc, WordLen) ->
+ rfc2047_utf8_encode(T, [hex(C rem 16), hex(C div 16), $= | Acc], WordLen+3).
+
+hex(N) when N >= 10 -> N + $A - 10;
+hex(N) -> N + $0.
+
+
-ifdef(TEST).
parse_with_comments_test_() ->
@@ -1466,6 +1510,12 @@ rfc2047_decode_test_() ->
fun() ->
?assertEqual(<<"Jacek Złydach <jacek.zlydach@erlang-solutions.com>, chak de planet óóóó <jz@erlang-solutions.com>, Jacek Złydach <jacek.zlydach@erlang-solutions.com>, chak de planet óóóó <jz@erlang-solutions.com>">>, decode_header(<<"=?UTF-8?B?SmFjZWsgWsWCeWRhY2g=?= <jacek.zlydach@erlang-solutions.com>, =?UTF-8?B?Y2hhayBkZSBwbGFuZXQgw7PDs8Ozw7M=?= <jz@erlang-solutions.com>, =?UTF-8?B?SmFjZWsgWsWCeWRhY2g=?= <jacek.zlydach@erlang-solutions.com>, =?UTF-8?B?Y2hhayBkZSBwbGFuZXQgw7PDs8Ozw7M=?= <jz@erlang-solutions.com>">>, "utf-8"))
end
+ },
+ {"decode something I encoded myself",
+ fun() ->
+ A = <<"Jacek Złydach <jacek.zlydach@erlang-solutions.com>">>,
+ ?assertEqual(A, decode_header(list_to_binary(rfc2047_utf8_encode(A)), "utf-8"))
+ end
}
].
@@ -1488,6 +1538,23 @@ encoding_test_() ->
?assertEqual(Result, encode(Email))
end
},
+ {"Email with UTF-8 characters",
+ fun() ->
+ Email = {<<"text">>, <<"plain">>, [
+ {<<"Subject">>, <<"Fræderik Hølljen">>},
+ {<<"From">>, <<"Fræderik Hølljen <me@example.com>">>},
+ {<<"To">>, <<"you@example.com">>},
+ {<<"Message-ID">>, <<"<abcd@example.com>">>},
+ {<<"MIME-Version">>, <<"1.0">>},
+ {<<"Date">>, <<"Sun, 01 Nov 2009 14:44:47 +0200">>}],
+ [{<<"content-type-params">>,
+ [{<<"charset">>,<<"US-ASCII">>}],
+ {<<"disposition">>,<<"inline">>}}],
+ <<"This is a plain message">>},
+ Result = <<"Subject: =?UTF-8?Q?Fr=C3=A6derik=20H=C3=B8lljen?=\r\nFrom: =?UTF-8?Q?Fr=C3=A6derik=20H=C3=B8lljen?= <me@example.com>\r\nTo: you@example.com\r\nMessage-ID: <abcd@example.com>\r\nMIME-Version: 1.0\r\nDate: Sun, 01 Nov 2009 14:44:47 +0200\r\n\r\nThis is a plain message">>,
+ ?assertEqual(Result, encode(Email))
+ end
+ },
{"multipart/alternative email",
fun() ->
Email = {<<"multipart">>, <<"alternative">>, [
View
29 src/smtp_rfc822_parse.yrl
@@ -0,0 +1,29 @@
+Nonterminals
+ addresses
+ address
+ name
+ names
+ email.
+
+Terminals
+ string
+ ',' '<' '>'.
+
+Rootsymbol
+ addresses.
+
+Endsymbol
+ '$end'.
+
+addresses -> address : ['$1'].
+addresses -> address ',' addresses : ['$1' | '$3'].
+addresses -> '$empty' : [].
+
+address -> email : {undefined, '$1'}.
+address -> '<' email '>' : {undefined, '$2'}.
+address -> names '<' email '>' : {lists:flatten('$1'), '$3'}.
+
+email -> string : element(3, '$1').
+names -> name : '$1'.
+names -> name names : ['$1', " " | '$2'].
+name -> string : element(3, '$1').
View
68 src/smtp_util.erl
@@ -26,7 +26,9 @@
-export([
mxlookup/1, guess_FQDN/0, compute_cram_digest/2, get_cram_string/1,
trim_crlf/1, rfc5322_timestamp/0, zone/0, generate_message_id/0,
- generate_message_boundary/0]).
+ parse_rfc822_addresses/1,
+ combine_rfc822_addresses/1,
+ generate_message_boundary/0]).
%% @doc returns a sorted list of mx servers for `Domain', lowest distance first
mxlookup(Domain) ->
@@ -111,4 +113,68 @@ generate_message_boundary() ->
["_=", [io_lib:format("~2.36.0b", [X]) || <<X>> <= erlang:md5(term_to_binary([erlang:now(), FQDN]))], "=_"].
+-define(is_whitespace(Ch), (Ch =< 32)).
+combine_rfc822_addresses(Addresses) ->
+ [_,_|Acc] = combine_rfc822_addresses(Addresses, []),
+ iolist_to_binary(lists:reverse(Acc)).
+
+combine_rfc822_addresses([], Acc) ->
+ Acc;
+combine_rfc822_addresses([{undefined, Email}|Rest], Acc) ->
+ combine_rfc822_addresses(Rest, [32, $,, Email|Acc]);
+combine_rfc822_addresses([{Name, Email}|Rest], Acc) ->
+ combine_rfc822_addresses(Rest, [32, $,, $>, Email, $<, 32, opt_quoted(Name)|Acc]).
+
+opt_quoted(N) ->
+ case re:run(N, "\"") of
+ nomatch -> N;
+ {match, _} ->
+ [$", re:replace(N, "\"", "\\\\\"", [global]), $"]
+ end.
+
+parse_rfc822_addresses(B) when is_binary(B) ->
+ parse_rfc822_addresses(binary_to_list(B));
+
+parse_rfc822_addresses(S) when is_list(S) ->
+ Scanned = lists:reverse([{'$end', 0}|scan_rfc822(S, [])]),
+ smtp_rfc822_parse:parse(Scanned).
+
+scan_rfc822([], Acc) ->
+ Acc;
+scan_rfc822([Ch|R], Acc) when ?is_whitespace(Ch) ->
+ scan_rfc822(R, Acc);
+scan_rfc822([$"|R], Acc) ->
+ {Token, Rest} = scan_rfc822_scan_endquote(R, [], false),
+ scan_rfc822(Rest, [{string, 0, Token}|Acc]);
+scan_rfc822([$,|Rest], Acc) ->
+ scan_rfc822(Rest, [{',', 0}|Acc]);
+scan_rfc822([$<|Rest], Acc) ->
+ {Token, R} = scan_rfc822_scan_endpointybracket(Rest),
+ scan_rfc822(R, [{'>', 0}, {string, 0, Token}, {'<', 0}|Acc]);
+scan_rfc822(String, Acc) ->
+ case re:run(String, "(.*?)([\s<>,].*)", [{capture, all_but_first, list}]) of
+ {match, [Token, Rest]} ->
+ scan_rfc822(Rest, [{string, 0, Token}|Acc]);
+ nomatch ->
+ [{string, 0, String}|Acc]
+ end.
+
+scan_rfc822_scan_endpointybracket(String) ->
+ case re:run(String, "(.*?)>(.*)", [{capture, all_but_first, list}]) of
+ {match, [Token, Rest]} ->
+ {Token, Rest};
+ nomatch ->
+ {String, []}
+ end.
+
+scan_rfc822_scan_endquote([$\\|R], Acc, InEscape) ->
+ %% in escape
+ scan_rfc822_scan_endquote(R, Acc, not(InEscape));
+scan_rfc822_scan_endquote([$"|R], Acc, true) ->
+ scan_rfc822_scan_endquote(R, [$"|Acc], false);
+scan_rfc822_scan_endquote([$"|Rest], Acc, false) ->
+ %% Done!
+ {lists:reverse(Acc), Rest};
+scan_rfc822_scan_endquote([Ch|Rest], Acc, _) ->
+ scan_rfc822_scan_endquote(Rest, [Ch|Acc], false).
View
76 test/gen_smtp_util_test.erl
@@ -0,0 +1,76 @@
+-module(gen_smtp_util_test).
+
+-compile(export_all).
+
+-include_lib("eunit/include/eunit.hrl").
+
+test_test() ->
+ smtp_util:parse_rfc822_addresses("foo bar").
+
+parse_rfc822_addresses_test_() ->
+ [
+ {"Empty address list",
+ fun() ->
+ ?assertEqual({ok, []}, smtp_util:parse_rfc822_addresses(<<>>)),
+ ?assertEqual({ok, []}, smtp_util:parse_rfc822_addresses(<<" ">>)),
+ ?assertEqual({ok, []}, smtp_util:parse_rfc822_addresses(<<" \r\n\t ">>)),
+ ?assertEqual({ok, []}, smtp_util:parse_rfc822_addresses(<<"
+">>))
+ end},
+ {"Single addresses",
+ fun() ->
+ ?assertEqual({ok, [{undefined, "john@doe.com"}]},
+ smtp_util:parse_rfc822_addresses(<<"john@doe.com">>)),
+ ?assertEqual({ok, [{"Fræderik Hølljen", "me@example.com"}]},
+ smtp_util:parse_rfc822_addresses(<<"Fræderik Hølljen <me@example.com>">>)),
+ ?assertEqual({ok, [{undefined, "john@doe.com"}]},
+ smtp_util:parse_rfc822_addresses(<<"<john@doe.com>">>)),
+ ?assertEqual({ok, [{"John", "john@doe.com"}]},
+ smtp_util:parse_rfc822_addresses(<<"John <john@doe.com>">>)),
+ ?assertEqual({ok, [{"John Doe", "john@doe.com"}]},
+ smtp_util:parse_rfc822_addresses(<<"John Doe <john@doe.com>">>)),
+ ?assertEqual({ok, [{"John Doe", "john@doe.com"}]},
+ smtp_util:parse_rfc822_addresses(<<"\"John Doe\" <john@doe.com>">>)),
+ ?assertEqual({ok, [{"John \"Mighty\" Doe", "john@doe.com"}]},
+ smtp_util:parse_rfc822_addresses(<<"\"John \\\"Mighty\\\" Doe\" <john@doe.com>">>))
+ end},
+ {"Multiple addresses",
+ fun() ->
+ ?assertEqual({ok, [{undefined, "a@a.com"}, {undefined, "b@b.com"}]},
+ smtp_util:parse_rfc822_addresses(<<"a@a.com,b@b.com">>)),
+ ?assertEqual({ok, [{undefined, "a,a@a.com"}, {undefined, "b@b.com"}]},
+ smtp_util:parse_rfc822_addresses(<<"<a,a@a.com>,b@b.com">>)),
+ ?assertEqual({ok, [{"Jan", "a,a@a.com"}, {undefined, "b@b.com"}]},
+ smtp_util:parse_rfc822_addresses(<<"Jan <a,a@a.com>,b@b.com">>)),
+ ?assertEqual({ok, [{"Jan", "a,a@a.com"}, {"Berend Botje", "b@b.com"}]},
+ smtp_util:parse_rfc822_addresses(<<"Jan <a,a@a.com>,\"Berend Botje\" <b@b.com>">>))
+ end}
+ ].
+
+combine_rfc822_addresses_test_() ->
+ [
+ {"One address",
+ fun() ->
+ ?assertEqual(<<"john@doe.com">>,
+ smtp_util:combine_rfc822_addresses([{undefined, "john@doe.com"}])),
+ ?assertEqual(<<"John <john@doe.com>">>,
+ smtp_util:combine_rfc822_addresses([{"John", "john@doe.com"}])),
+ ?assertEqual(<<"\"John \\\"Foo\" <john@doe.com>">>,
+ smtp_util:combine_rfc822_addresses([{"John \"Foo", "john@doe.com"}]))
+ end},
+ {"Multiple addresses",
+ fun() ->
+ ?assertEqual(<<"john@doe.com, foo@bar.com">>,
+ smtp_util:combine_rfc822_addresses([{undefined, "john@doe.com"}, {undefined, "foo@bar.com"}])),
+ ?assertEqual(<<"John <john@doe.com>, foo@bar.com">>,
+ smtp_util:combine_rfc822_addresses([{"John", "john@doe.com"}, {undefined, "foo@bar.com"}]))
+ end}
+ ].
+
+rfc822_addresses_roundtrip_test() ->
+ Addr = <<"Jan <a,a@a.com>, Berend Botje <b@b.com>">>,
+ {ok, Parsed} = smtp_util:parse_rfc822_addresses(Addr),
+ ?assertEqual(Addr, smtp_util:combine_rfc822_addresses(Parsed)),
+ ok.
+
+
Please sign in to comment.
Something went wrong with that request. Please try again.