Permalink
Browse files

Added URI::Escape and tests for same. Fixed spacing in grammar.

  • Loading branch information...
1 parent fc55135 commit 81d0afcc5138c1fbf199610cb65a0ea74b11f1fe U-ron-PC\ron committed May 12, 2009
Showing with 115 additions and 2 deletions.
  1. +0 −2 lib/IETF/RFC_Grammar/URI.pm
  2. +64 −0 lib/URI/Escape.pm
  3. +51 −0 t/escape.t
View
2 lib/IETF/RFC_Grammar/URI.pm
@@ -54,12 +54,10 @@ grammar IETF::RFC_Grammar::URI is IETF::RFC_Grammar::IPv6 {
token path_segments { <.segment> [ '/' <.segment> ] * };
-
token segment { <.pchar>* [ ';' <.param>]* };
token param { <.pchar>* };
token pchar { <[:@&=+$,] +unreserved> | <.escaped> };
-
token query { <.uric>* };
token fragment { <.uric>* };
View
64 lib/URI/Escape.pm
@@ -0,0 +1,64 @@
+use v6;
+
+package URI::Escape {
+
+ use IETF::RFC_Grammar::URI;
+
+ our %escapes;
+
+ for 0 .. 255 -> $c { # map broken in module / package ?
+ %escapes{ chr($c) } = sprintf "%%%02X", $c
+ }
+
+ sub uri_escape($s is copy) is export {
+ my $rc;
+ while $s {
+ if my $not_escape = $s ~~ /^<IETF::RFC_Grammar::URI::unreserved>+/ {
+ $rc ~= $not_escape;
+ $s.=substr($not_escape.chars);
+ }
+ if my $escape = $s ~~ /^<- IETF::RFC_Grammar::URI::unreserved>+/ {
+ $rc ~= ($escape.comb().map: {
+ %escapes{ chr(ord($_)) } || # chr(ord()) ??? @#^^!! it works
+ die 'Can\'t escape \\' ~ sprintf('x{%04X}, try uri_escape_utf8() instead',
+ ord($_))
+ }).join;
+ $s.=substr($escape.chars);
+ }
+ }
+
+ return $rc;
+ }
+
+ sub uri_unescape(*@to_unesc) is export {
+ my @rc;
+ for @to_unesc -> $s is copy {
+ my $rc;
+ while my $next_unescape = $s ~~ /.*? '%' (<.xdigit> <.xdigit>)/ {
+ $rc ~= $next_unescape.substr(0, -3) ~
+ chr( :16($next_unescape[0]) );
+ $s.=substr($next_unescape.chars);
+ }
+ @rc.push( $rc ~ $s );
+ }
+ return @rc;
+ }
+
+}
+
+=begin pod
+
+=head NAME
+
+URI::Escape - Escape and unescape unsafe characters
+
+=head SYNOPSYS
+
+ use URI::Escape;
+
+ my $escaped = uri_escape("10% is enough\n");
+ my $un_escaped = uri_unescape('10%25%20is%20enough%0A');
+
+=end pod
+
+# vim:ft=perl6
View
51 t/escape.t
@@ -0,0 +1,51 @@
+use v6;
+
+# Copied, with minor translation to Perl6, from the escape.t file
+# in the CPAN URI distribution
+
+use Test;
+plan 7;
+
+use URI::Escape;
+
+ok(1,'We use URI::Escape and we are still alive');
+
+is uri_escape('abcDEF?$%@h&m'), 'abcDEF%3F%24%25%40h%26m',
+ 'basic ascii escape test';
+
+is uri_escape('|abcå'), '%7Cabc%E5', 'basic latin-1 escape test';
+
+ok not defined uri_escape(undef), 'undef returns undef';
+
+is uri_unescape('%7Cabc%E5'), '|abcå', 'basic latin-1 unescape test';
+
+is uri_unescape("%40A%42", "CDE", "F%47H"), <@AB CDE FGH>,
+ 'unescape list';
+
+eval 'print uri_escape("abc" ~ chr(300))';
+ok ~$! ~~ /^'Can\'t escape \\x{012C}, try uri_escape_utf8\(\) instead'/,
+ 'verify unicode limitation'
+
+=begin
+
+# todo tests
+
+print "not " unless uri_escape("abc", "b-d") eq "a%62%63";
+print "ok 2\n";
+
+use URI::Escape :escapes;
+
+print "not" unless $escapes{"%"} eq "%25";
+print "ok 6\n";
+
+use URI::Escape qw(uri_escape_utf8);
+
+print "not " unless uri_escape_utf8("|abcå") eq "%7Cabc%C3%A5";
+print "ok 7\n";
+
+print "not " unless uri_escape_utf8(chr(0xFFF)) eq "%E0%BF%BF";
+print "ok 9\n";
+
+=end
+
+# vim:ft=perl6

0 comments on commit 81d0afc

Please sign in to comment.