Permalink
Browse files

Add parser for RFC 822-style address lists; encode all header values …

…with RFC2047.
  • Loading branch information...
1 parent b9f1196 commit 749b140dd9f75dd327ab7ee63af72fa78e313ade @arjan arjan committed Nov 5, 2012
Showing with 187 additions and 9 deletions.
  1. +2 −1 .gitignore
  2. +13 −7 src/mimemail.erl
  3. +29 −0 src/smtp_rfc822_parse.yrl
  4. +67 −1 src/smtp_util.erl
  5. +76 −0 test/gen_smtp_util_test.erl
View
@@ -6,4 +6,5 @@ coverage/*
build
*.xcodeproj
.eunit/
-ebin/gen_smtp.app
+ebin/gen_smtp.app
+src/smtp_rfc822_parse.erl
View
@@ -688,10 +688,15 @@ encode_folded_header(Header, HeaderLines) ->
encode_folded_header(TabbedRemainder, [])
end.
-encode_header_value(<<"Subject">>, Value) ->
- rfc2047_utf8_encode(Value);
+encode_header_value(H, Value) when H =:= <<"To">>; H =:= <<"Cc">>; H =:= <<"Bcc">>;
+ H =:= <<"Reply-To">>; H =:= <<"From">> ->
+ {ok, Addresses} = smtp_util:parse_rfc822_addresses(Value),
+ {Names, Emails} = lists:unzip(Addresses),
+ NewNames = lists:map(fun rfc2047_utf8_encode/1, Names),
+ smtp_util:combine_rfc822_addresses(lists:zip(NewNames, Emails));
+
encode_header_value(_, Value) ->
- Value.
+ rfc2047_utf8_encode(Value).
encode_component(_Type, _SubType, Headers, Params, Body) ->
if
@@ -833,6 +838,7 @@ fix_encoding(Encoding) ->
%% @doc Encode a binary or list according to RFC 2047. Input is
%% assumed to be in UTF-8 encoding.
+rfc2047_utf8_encode(undefined) -> undefined;
rfc2047_utf8_encode(B) when is_binary(B) ->
rfc2047_utf8_encode(binary_to_list(B));
rfc2047_utf8_encode([]) ->
@@ -843,7 +849,7 @@ rfc2047_utf8_encode(Text) ->
%% Don't escape when all characters are ASCII printable
rfc2047_utf8_encode([], Text) ->
Text;
-rfc2047_utf8_encode([H|T], Text) when H >= 32 andalso H =< 126 andalso H /= $= ->
+rfc2047_utf8_encode([H|T], Text) when H >= 32 andalso H =< 126 ->
rfc2047_utf8_encode(T, Text);
rfc2047_utf8_encode(_, Text) ->
"=?UTF-8?Q?" ++ rfc2047_utf8_encode(Text, [], 0) ++ "?=".
@@ -1526,11 +1532,11 @@ encoding_test_() ->
?assertEqual(Result, encode(Email))
end
},
- {"Email with UTF-8 characters in subject",
+ {"Email with UTF-8 characters",
fun() ->
Email = {<<"text">>, <<"plain">>, [
{<<"Subject">>, <<"Fræderik Hølljen">>},
- {<<"From">>, <<"me@example.com">>},
+ {<<"From">>, <<"Fræderik Hølljen <me@example.com>">>},
{<<"To">>, <<"you@example.com">>},
{<<"Message-ID">>, <<"<abcd@example.com>">>},
{<<"MIME-Version">>, <<"1.0">>},
@@ -1539,7 +1545,7 @@ encoding_test_() ->
[{<<"charset">>,<<"US-ASCII">>}],
{<<"disposition">>,<<"inline">>}}],
<<"This is a plain message">>},
- Result = <<"Subject: =?UTF-8?Q?Fr=C3=A6derik=20H=C3=B8lljen?=\r\nFrom: me@example.com\r\nTo: you@example.com\r\nMessage-ID: <abcd@example.com>\r\nMIME-Version: 1.0\r\nDate: Sun, 01 Nov 2009 14:44:47 +0200\r\n\r\nThis is a plain message">>,
+ Result = <<"Subject: =?UTF-8?Q?Fr=C3=A6derik=20H=C3=B8lljen?=\r\nFrom: =?UTF-8?Q?Fr=C3=A6derik=20H=C3=B8lljen?= <me@example.com>\r\nTo: you@example.com\r\nMessage-ID: <abcd@example.com>\r\nMIME-Version: 1.0\r\nDate: Sun, 01 Nov 2009 14:44:47 +0200\r\n\r\nThis is a plain message">>,
?assertEqual(Result, encode(Email))
end
},
View
@@ -0,0 +1,29 @@
+Nonterminals
+ addresses
+ address
+ name
+ names
+ email.
+
+Terminals
+ string
+ ',' '<' '>'.
+
+Rootsymbol
+ addresses.
+
+Endsymbol
+ '$end'.
+
+addresses -> address : ['$1'].
+addresses -> address ',' addresses : ['$1' | '$3'].
+addresses -> '$empty' : [].
+
+address -> email : {undefined, '$1'}.
+address -> '<' email '>' : {undefined, '$2'}.
+address -> names '<' email '>' : {lists:flatten('$1'), '$3'}.
+
+email -> string : element(3, '$1').
+names -> name : '$1'.
+names -> name names : ['$1', " " | '$2'].
+name -> string : element(3, '$1').
View
@@ -26,7 +26,9 @@
-export([
mxlookup/1, guess_FQDN/0, compute_cram_digest/2, get_cram_string/1,
trim_crlf/1, rfc5322_timestamp/0, zone/0, generate_message_id/0,
- generate_message_boundary/0]).
+ parse_rfc822_addresses/1,
+ combine_rfc822_addresses/1,
+ generate_message_boundary/0]).
%% @doc returns a sorted list of mx servers for `Domain', lowest distance first
mxlookup(Domain) ->
@@ -111,4 +113,68 @@ generate_message_boundary() ->
["_=", [io_lib:format("~2.36.0b", [X]) || <<X>> <= erlang:md5(term_to_binary([erlang:now(), FQDN]))], "=_"].
+-define(is_whitespace(Ch), (Ch =< 32)).
+combine_rfc822_addresses(Addresses) ->
+ [_,_|Acc] = combine_rfc822_addresses(Addresses, []),
+ iolist_to_binary(lists:reverse(Acc)).
+
+combine_rfc822_addresses([], Acc) ->
+ Acc;
+combine_rfc822_addresses([{undefined, Email}|Rest], Acc) ->
+ combine_rfc822_addresses(Rest, [32, $,, Email|Acc]);
+combine_rfc822_addresses([{Name, Email}|Rest], Acc) ->
+ combine_rfc822_addresses(Rest, [32, $,, $>, Email, $<, 32, opt_quoted(Name)|Acc]).
+
+opt_quoted(N) ->
+ case re:run(N, "\"") of
+ nomatch -> N;
+ {match, _} ->
+ [$", re:replace(N, "\"", "\\\\\"", [global]), $"]
+ end.
+
+parse_rfc822_addresses(B) when is_binary(B) ->
+ parse_rfc822_addresses(binary_to_list(B));
+
+parse_rfc822_addresses(S) when is_list(S) ->
+ Scanned = lists:reverse([{'$end', 0}|scan_rfc822(S, [])]),
+ smtp_rfc822_parse:parse(Scanned).
+
+scan_rfc822([], Acc) ->
+ Acc;
+scan_rfc822([Ch|R], Acc) when ?is_whitespace(Ch) ->
+ scan_rfc822(R, Acc);
+scan_rfc822([$"|R], Acc) ->
+ {Token, Rest} = scan_rfc822_scan_endquote(R, [], false),
+ scan_rfc822(Rest, [{string, 0, Token}|Acc]);
+scan_rfc822([$,|Rest], Acc) ->
+ scan_rfc822(Rest, [{',', 0}|Acc]);
+scan_rfc822([$<|Rest], Acc) ->
+ {Token, R} = scan_rfc822_scan_endpointybracket(Rest),
+ scan_rfc822(R, [{'>', 0}, {string, 0, Token}, {'<', 0}|Acc]);
+scan_rfc822(String, Acc) ->
+ case re:run(String, "(.*?)([\s<>,].*)", [{capture, all_but_first, list}]) of
+ {match, [Token, Rest]} ->
+ scan_rfc822(Rest, [{string, 0, Token}|Acc]);
+ nomatch ->
+ [{string, 0, String}|Acc]
+ end.
+
+scan_rfc822_scan_endpointybracket(String) ->
+ case re:run(String, "(.*?)>(.*)", [{capture, all_but_first, list}]) of
+ {match, [Token, Rest]} ->
+ {Token, Rest};
+ nomatch ->
+ {String, []}
+ end.
+
+scan_rfc822_scan_endquote([$\\|R], Acc, InEscape) ->
+ %% in escape
+ scan_rfc822_scan_endquote(R, Acc, not(InEscape));
+scan_rfc822_scan_endquote([$"|R], Acc, true) ->
+ scan_rfc822_scan_endquote(R, [$"|Acc], false);
+scan_rfc822_scan_endquote([$"|Rest], Acc, false) ->
+ %% Done!
+ {lists:reverse(Acc), Rest};
+scan_rfc822_scan_endquote([Ch|Rest], Acc, _) ->
+ scan_rfc822_scan_endquote(Rest, [Ch|Acc], false).
@@ -0,0 +1,76 @@
+-module(gen_smtp_util_test).
+
+-compile(export_all).
+
+-include_lib("eunit/include/eunit.hrl").
+
+test_test() ->
+ smtp_util:parse_rfc822_addresses("foo bar").
+
+parse_rfc822_addresses_test_() ->
+ [
+ {"Empty address list",
+ fun() ->
+ ?assertEqual({ok, []}, smtp_util:parse_rfc822_addresses(<<>>)),
+ ?assertEqual({ok, []}, smtp_util:parse_rfc822_addresses(<<" ">>)),
+ ?assertEqual({ok, []}, smtp_util:parse_rfc822_addresses(<<" \r\n\t ">>)),
+ ?assertEqual({ok, []}, smtp_util:parse_rfc822_addresses(<<"
+">>))
+ end},
+ {"Single addresses",
+ fun() ->
+ ?assertEqual({ok, [{undefined, "john@doe.com"}]},
+ smtp_util:parse_rfc822_addresses(<<"john@doe.com">>)),
+ ?assertEqual({ok, [{"Fræderik Hølljen", "me@example.com"}]},
+ smtp_util:parse_rfc822_addresses(<<"Fræderik Hølljen <me@example.com>">>)),
+ ?assertEqual({ok, [{undefined, "john@doe.com"}]},
+ smtp_util:parse_rfc822_addresses(<<"<john@doe.com>">>)),
+ ?assertEqual({ok, [{"John", "john@doe.com"}]},
+ smtp_util:parse_rfc822_addresses(<<"John <john@doe.com>">>)),
+ ?assertEqual({ok, [{"John Doe", "john@doe.com"}]},
+ smtp_util:parse_rfc822_addresses(<<"John Doe <john@doe.com>">>)),
+ ?assertEqual({ok, [{"John Doe", "john@doe.com"}]},
+ smtp_util:parse_rfc822_addresses(<<"\"John Doe\" <john@doe.com>">>)),
+ ?assertEqual({ok, [{"John \"Mighty\" Doe", "john@doe.com"}]},
+ smtp_util:parse_rfc822_addresses(<<"\"John \\\"Mighty\\\" Doe\" <john@doe.com>">>))
+ end},
+ {"Multiple addresses",
+ fun() ->
+ ?assertEqual({ok, [{undefined, "a@a.com"}, {undefined, "b@b.com"}]},
+ smtp_util:parse_rfc822_addresses(<<"a@a.com,b@b.com">>)),
+ ?assertEqual({ok, [{undefined, "a,a@a.com"}, {undefined, "b@b.com"}]},
+ smtp_util:parse_rfc822_addresses(<<"<a,a@a.com>,b@b.com">>)),
+ ?assertEqual({ok, [{"Jan", "a,a@a.com"}, {undefined, "b@b.com"}]},
+ smtp_util:parse_rfc822_addresses(<<"Jan <a,a@a.com>,b@b.com">>)),
+ ?assertEqual({ok, [{"Jan", "a,a@a.com"}, {"Berend Botje", "b@b.com"}]},
+ smtp_util:parse_rfc822_addresses(<<"Jan <a,a@a.com>,\"Berend Botje\" <b@b.com>">>))
+ end}
+ ].
+
+combine_rfc822_addresses_test_() ->
+ [
+ {"One address",
+ fun() ->
+ ?assertEqual(<<"john@doe.com">>,
+ smtp_util:combine_rfc822_addresses([{undefined, "john@doe.com"}])),
+ ?assertEqual(<<"John <john@doe.com>">>,
+ smtp_util:combine_rfc822_addresses([{"John", "john@doe.com"}])),
+ ?assertEqual(<<"\"John \\\"Foo\" <john@doe.com>">>,
+ smtp_util:combine_rfc822_addresses([{"John \"Foo", "john@doe.com"}]))
+ end},
+ {"Multiple addresses",
+ fun() ->
+ ?assertEqual(<<"john@doe.com, foo@bar.com">>,
+ smtp_util:combine_rfc822_addresses([{undefined, "john@doe.com"}, {undefined, "foo@bar.com"}])),
+ ?assertEqual(<<"John <john@doe.com>, foo@bar.com">>,
+ smtp_util:combine_rfc822_addresses([{"John", "john@doe.com"}, {undefined, "foo@bar.com"}]))
+ end}
+ ].
+
+rfc822_addresses_roundtrip_test() ->
+ Addr = <<"Jan <a,a@a.com>, Berend Botje <b@b.com>">>,
+ {ok, Parsed} = smtp_util:parse_rfc822_addresses(Addr),
+ ?assertEqual(Addr, smtp_util:combine_rfc822_addresses(Parsed)),
+ ok.
+
+

0 comments on commit 749b140

Please sign in to comment.