Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP

Loading…

Simplify escape and unescape #7

Merged
merged 2 commits into from

2 participants

@moritz

No functional changes, but much shorter (and IMHO easier to understand) code.

@masak masak merged commit 975949f into ihrd:master
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Commits on Aug 26, 2012
  1. @moritz

    simplify uri_escape

    moritz authored
  2. @moritz

    simplify uri_unescape

    moritz authored
This page is out of date. Refresh to see the latest.
Showing with 12 additions and 56 deletions.
  1. +12 −56 lib/URI/Escape.pm
View
68 lib/URI/Escape.pm
@@ -17,18 +17,12 @@ package URI::Escape {
sub uri_escape($s, Bool :$no_utf8 = False) is export {
return $s unless defined $s;
- $s.subst(:g, rx/<- [!*'()\-._~A..Za..z0..9]>+/,
- -> $escape {
- ($escape.Str.comb.map: {
- ( $no_utf8 || ! 0x80 +& ord($_) ) ?? %escapes{ $_ } !!
- do {
- my $buf = $_.encode;
- for (0 ..^ $buf.elems) {
- sprintf '%%%02X', $buf[ $_ ]
- }
- }
- }).join;
- });
+ $s.subst(:g, rx/<- [!*'()\-._~A..Za..z0..9]>/,
+ {
+ ( $no_utf8 || ! 0x80 +& ord(.Str) ) ?? %escapes{ .Str } !!
+ %escapes{.Str.encode.list>>.chr}.join;
+ }
+ );
}
# todo - automatic invalid UTF-8 detection
@@ -36,58 +30,20 @@ package URI::Escape {
# find first sequence of %[89ABCDEF]<.xdigit>
# use algorithm from url to determine if it's valid UTF-8
sub uri_unescape(*@to_unesc, Bool :$no_utf8 = False) is export {
- my @rc;
- for @to_unesc -> $s is copy {
- my $rc = '';
- my $last_pos = 0;
- while $s ~~ m:c/[ '%' (<.xdigit><.xdigit>)]+/ {
- $rc ~= $s.substr($last_pos, $/.from - $last_pos);
-
- # should be a better way with list context
- my @encoded_octets = map { :16( ~.value ) }, $/.caps;
- # common case optimization
- while @encoded_octets and ($no_utf8 or @encoded_octets[0] < 0x80) {
- $rc ~= chr(shift @encoded_octets);
- }
- # if any utf8 ...
- while @encoded_octets {
- my ($code_point, $utf8_len) = utf8_octets_2_codepoint(
- @encoded_octets
- );
- @encoded_octets.splice(0, $utf8_len);
- $rc ~= chr($code_point);
- }
- $last_pos = $/.to;
- }
- $rc ~= $s.substr($last_pos);
- $rc .= trans('+' => ' ');
- @rc.push($rc);
+ my @rc = @to_unesc.map: {
+ .trans('+' => ' ')\
+ .subst(:g, / '%' (<.xdigit> ** 2 ) /, -> $/ {
+ :16(~$0).chr;
+ })
}
+ @rc.=map(*.encode('latin-1').decode('UTF-8')) unless $no_utf8;
return do given @rc.elems { # this might be simplified some day
when 0 { Nil }
when 1 { @rc[0] }
default { @rc }
}
}
-
- # Stole parts from Masak November::CGI and parts from Parrot's UTF-8 decode
- sub utf8_octets_2_codepoint(@octets) {
- if @octets[ 0 ] < 0x80 { # completeness
- return @octets[0], 1
- }
-
- my $len = 1;
-
- while 0x80 +> ++$len +& @octets[0] and $len < 6 {}
-
- my $max_shift = 6 * ($len -1);
- my $code_point = reduce {
- $^a + @octets[ $^b ] +& 0x3F +< ($max_shift - 6 * $^b)
- }, 0x7F +> $len +& @octets[0] +< $max_shift, 1 ..^ $len;
-
- return $code_point, $len;
- }
}
=begin pod
Something went wrong with that request. Please try again.