Skip to content
Permalink
Browse files

Fix pathological backtracking in cfws, quoted strings

This reprises 0ed9339, more broadly.  Backtracking is only
necessary when parsing of a token could be ambiguous -- and the BNF
for email address is many things, but thankfully is essentially never
ambiguous.

Alter `cfws` to mean "one or more comments, or just space" -- since
comments include space, this is no loss in generality over
`(comment|space)*`.  Using this phrasing prevents backtracking into
the `cwfs` token, which is key because of the nested nature of the
comments regex therein.  Changing the `atom` definition is the only
one strictly necessary to resolve this particular case, but greediness
is applied throughout, for safety.
  • Loading branch information...
alexmv authored and sunnavy committed Dec 3, 2018
1 parent 6a42da3 commit a22e6b233443fe3ad1a408e50ecbd7237674817d
Showing with 14 additions and 14 deletions.
  1. +14 −14 lib/Email/Address/List.pm
@@ -201,49 +201,49 @@ $RE{'text'} = qr/[^\x0A\x0D]/;
$RE{'quoted_pair'} = qr/\\$RE{'text'}/;

$RE{'atext'} = qr/[^$RE{'CTL'}$RE{'special'}\s]/;
$RE{'ctext'} = qr/(?>[^()\\]+)/;
$RE{'ctext'} = qr/[^()\\]++/;
$RE{'qtext'} = qr/[^\\"]/;
$RE{'dtext'} = qr/[^\[\]\\]/;

($RE{'ccontent'}, $RE{'comment'}) = (q{})x2;
for (1 .. $COMMENT_NEST_LEVEL) {
$RE{'ccontent'} = qr/$RE{'ctext'}|$RE{'quoted_pair'}|$RE{'comment'}/;
$RE{'comment'} = qr/\s*\((?:\s*$RE{'ccontent'})*\s*\)\s*/;
$RE{'comment'} = qr/(?>\s*+\((?:\s*+$RE{'ccontent'})*+\s*+\)\s*+)/;
}
$RE{'cfws'} = qr/$RE{'comment'}|\s+/;
$RE{'cfws'} = qr/$RE{'comment'}++|\s*+/;

$RE{'qcontent'} = qr/$RE{'qtext'}|$RE{'quoted_pair'}/;
$RE{'quoted-string'} = qr/$RE{'cfws'}*"$RE{'qcontent'}*"$RE{'cfws'}*/;
$RE{'quoted-string'} = qr/$RE{'cfws'}"$RE{'qcontent'}*+"$RE{'cfws'}/;

$RE{'atom'} = qr/$RE{'cfws'}*$RE{'atext'}++$RE{'cfws'}*/;
$RE{'atom'} = qr/$RE{'cfws'}$RE{'atext'}++$RE{'cfws'}/;

$RE{'word'} = qr/$RE{'atom'} | $RE{'quoted-string'}/x;
$RE{'phrase'} = qr/$RE{'word'}+/x;
$RE{'display-name'} = $RE{'phrase'};

$RE{'dot_atom_text'} = qr/$RE{'atext'}+(?:\.$RE{'atext'}+)*/;
$RE{'dot_atom'} = qr/$RE{'cfws'}*$RE{'dot_atom_text'}$RE{'cfws'}*/;
$RE{'dot_atom_text'} = qr/$RE{'atext'}++(?:\.$RE{'atext'}++)*/;
$RE{'dot_atom'} = qr/$RE{'cfws'}$RE{'dot_atom_text'}$RE{'cfws'}/;
$RE{'local-part'} = qr/$RE{'dot_atom'}|$RE{'quoted-string'}/;

$RE{'dcontent'} = qr/$RE{'dtext'}|$RE{'quoted_pair'}/;
$RE{'domain_literal'} = qr/$RE{'cfws'}*\[(?:\s*$RE{'dcontent'})*\s*\]$RE{'cfws'}*/;
$RE{'domain_literal'} = qr/$RE{'cfws'}\[(?:\s*$RE{'dcontent'})*\s*\]$RE{'cfws'}/;
$RE{'domain'} = qr/$RE{'dot_atom'}|$RE{'domain_literal'}/;

$RE{'addr-spec'} = qr/$RE{'local-part'}\@$RE{'domain'}/;
$RE{'angle-addr'} = qr/$RE{'cfws'}* < $RE{'addr-spec'} > $RE{'cfws'}*/x;
$RE{'angle-addr'} = qr/$RE{'cfws'} < $RE{'addr-spec'} > $RE{'cfws'}/x;

$RE{'name-addr'} = qr/$RE{'display-name'}?$RE{'angle-addr'}/;
$RE{'mailbox'} = qr/(?:$RE{'name-addr'}|$RE{'addr-spec'})$RE{'comment'}*/;

$CRE{'addr-spec'} = qr/($RE{'local-part'})\@($RE{'domain'})/;
$CRE{'mailbox'} = qr/
(?:
($RE{'display-name'})?($RE{'cfws'}*)<$CRE{'addr-spec'}>($RE{'cfws'}*)
($RE{'display-name'})?($RE{'cfws'})<$CRE{'addr-spec'}>($RE{'cfws'})
|$CRE{'addr-spec'}
)($RE{'comment'}*)
)($RE{'comment'}*+)
/x;

$RE{'dword'} = qr/$RE{'cfws'}* (?: $RE{'atom'} | \. | "$RE{'qcontent'}+" ) $RE{'cfws'}*/x;
$RE{'dword'} = qr/$RE{'cfws'} (?: $RE{'atom'} | \. | "$RE{'qcontent'}++" ) $RE{'cfws'}/x;
$RE{'obs-phrase'} = qr/$RE{'word'} $RE{'dword'}*/x;
$RE{'obs-display-name'} = $RE{'obs-phrase'};
$RE{'obs-route'} = qr/
@@ -259,9 +259,9 @@ $CRE{'obs-addr-spec'} = qr/($RE{'obs-local-part'})\@($RE{'obs-domain'})/;
$CRE{'obs-mailbox'} = qr/
(?:
($RE{'obs-display-name'})?
($RE{'cfws'}*)< $RE{'obs-route'}? $CRE{'obs-addr-spec'} >($RE{'cfws'}*)
($RE{'cfws'})< $RE{'obs-route'}? $CRE{'obs-addr-spec'} >($RE{'cfws'})
|$CRE{'obs-addr-spec'}
)($RE{'comment'}*)
)($RE{'comment'}*+)
/x;

sub parse {

0 comments on commit a22e6b2

Please sign in to comment.
You can’t perform that action at this time.