Permalink
Browse files

uri_escape now escapes utf-8 and fixed a likely bug in utf8-decoding

  • Loading branch information...
ronaldxs committed Sep 2, 2011
1 parent 5f70339 commit 6b17b93f8fb3cbfac437676b480c53ff2e3cab1b
Showing with 22 additions and 25 deletions.
  1. +19 −18 lib/URI/Escape.pm
  2. +3 −7 t/escape.t
View
@@ -16,24 +16,25 @@ package URI::Escape {
# commented line below used to work ...
# token artifact_unreserved {<[!*'()] +IETF::RFC_Grammar::URI::unreserved>};
- sub uri_escape($s is copy) is export {
+ sub uri_escape($s is copy, Bool :$no_utf8 = False) is export {
my $rc;
- while $s {
- # regexes kludged for many broken things in rakudo
- if my $not_escape = $s ~~ /^<[!*'()\-._~A..Za..z0..9]>+/ {
- $rc ~= $not_escape;
- $s.=substr($not_escape.chars);
- }
- if my $escape = $s ~~ /^<- [!*'()\-._~A..Za..z0..9]>+/ {
- $rc ~= ($escape.comb().map: {
- %escapes{ $_ } ||
- die 'Can\'t escape \\' ~ sprintf(
- 'x{%04X}, try uri_escape_utf8() some day instead',
- ord($_))
- }).join;
- $s.=substr($escape.chars);
- }
+ my $last_pos = 0;
+
+ while my $escape = $s ~~ m:c/<- [!*'()\-._~A..Za..z0..9]>+/ {
+ $rc ~= $s.substr($last_pos, $/.from - $last_pos);
+ $rc ~= ($escape.comb().map: {
+ ( $no_utf8 || ! 0x80 +& ord($_) ) ?? %escapes{ $_ } !!
+ do {
+ my $buf = $_.encode;
+ for (0 ..^ $buf.elems) {
+ sprintf "%%%02X", $buf[ $_ ]
+ }
+ }
+ }).join;
+ $last_pos = $/.to;
}
+ # $s.defined test needed because of bug fixed in nom
+ if $s.defined and $s.chars > $last_pos { $rc ~= $s.substr($last_pos) }
return $rc;
}
@@ -84,9 +85,9 @@ package URI::Escape {
return @octets[0], 1
}
- my $len = 2;
+ my $len = 1;
- while 0x80 +> $len +& @octets[0] and ++$len <= 6 {}
+ while 0x80 +> ++$len +& @octets[0] and $len < 6 {}
my $max_shift = 6 * ($len -1);
my $code_point = reduce {
View
@@ -13,18 +13,14 @@ ok(1,'We use URI::Escape and we are still alive');
is uri_escape('abcDEF?$%@h&m'), 'abcDEF%3F%24%25%40h%26m',
'basic ascii escape test';
-is uri_escape('|abcå'), '%7Cabc%E5', 'basic latin-1 escape test';
-
-ok not defined uri_escape(Str), 'undef returns undef';
+is uri_escape(no_utf8 => True, '|abcå'), '%7Cabc%E5', 'basic latin-1 escape test';
+is uri_escape('|abcå'), '%7Cabc%C3%A5', 'basic utf-8 escape test';
is uri_unescape(no_utf8 => True, '%7C%25abc%E5'), '|%abcå', 'basic latin-1 unescape test';
is uri_unescape('%7C%25abc%C3%A5'), '|%abcå', 'basic utf8 unescape test';
is uri_unescape("%40A%42", "CDE", "F%47++H"), ['@AB', 'CDE', 'FG H'],
'unescape list';
-
-eval 'print uri_escape("abc" ~ chr(300))';
-ok ~$! ~~ /^'Can\'t escape \x{012C}, try uri_escape_utf8() some day instead'/,
- 'verify unicode limitation'
+ok not defined uri_escape(Str), 'undef returns undef';
# vim:ft=perl6

0 comments on commit 6b17b93

Please sign in to comment.