Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP

Simplify escape and unescape #7

Merged
merged 2 commits into from almost 2 years ago

2 participants

Moritz Lenz Carl Mäsak
Moritz Lenz

No functional changes, but much shorter (and IMHO easier to understand) code.

Carl Mäsak masak merged commit 975949f into from
Carl Mäsak masak closed this
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Showing 2 unique commits by 1 author.

Aug 26, 2012
Moritz Lenz moritz simplify uri_escape 926cdec
Moritz Lenz moritz simplify uri_unescape 012e825
This page is out of date. Refresh to see the latest.

Showing 1 changed file with 12 additions and 56 deletions. Show diff stats Hide diff stats

  1. +12 56 lib/URI/Escape.pm
68 lib/URI/Escape.pm
@@ -17,18 +17,12 @@ package URI::Escape {
17 17
18 18 sub uri_escape($s, Bool :$no_utf8 = False) is export {
19 19 return $s unless defined $s;
20   - $s.subst(:g, rx/<- [!*'()\-._~A..Za..z0..9]>+/,
21   - -> $escape {
22   - ($escape.Str.comb.map: {
23   - ( $no_utf8 || ! 0x80 +& ord($_) ) ?? %escapes{ $_ } !!
24   - do {
25   - my $buf = $_.encode;
26   - for (0 ..^ $buf.elems) {
27   - sprintf '%%%02X', $buf[ $_ ]
28   - }
29   - }
30   - }).join;
31   - });
  20 + $s.subst(:g, rx/<- [!*'()\-._~A..Za..z0..9]>/,
  21 + {
  22 + ( $no_utf8 || ! 0x80 +& ord(.Str) ) ?? %escapes{ .Str } !!
  23 + %escapes{.Str.encode.list>>.chr}.join;
  24 + }
  25 + );
32 26 }
33 27
34 28 # todo - automatic invalid UTF-8 detection
@@ -36,58 +30,20 @@ package URI::Escape {
36 30 # find first sequence of %[89ABCDEF]<.xdigit>
37 31 # use algorithm from url to determine if it's valid UTF-8
38 32 sub uri_unescape(*@to_unesc, Bool :$no_utf8 = False) is export {
39   - my @rc;
40   - for @to_unesc -> $s is copy {
41   - my $rc = '';
42   - my $last_pos = 0;
43 33
44   - while $s ~~ m:c/[ '%' (<.xdigit><.xdigit>)]+/ {
45   - $rc ~= $s.substr($last_pos, $/.from - $last_pos);
46   -
47   - # should be a better way with list context
48   - my @encoded_octets = map { :16( ~.value ) }, $/.caps;
49   - # common case optimization
50   - while @encoded_octets and ($no_utf8 or @encoded_octets[0] < 0x80) {
51   - $rc ~= chr(shift @encoded_octets);
52   - }
53   - # if any utf8 ...
54   - while @encoded_octets {
55   - my ($code_point, $utf8_len) = utf8_octets_2_codepoint(
56   - @encoded_octets
57   - );
58   - @encoded_octets.splice(0, $utf8_len);
59   - $rc ~= chr($code_point);
60   - }
61   - $last_pos = $/.to;
62   - }
63   - $rc ~= $s.substr($last_pos);
64   - $rc .= trans('+' => ' ');
65   - @rc.push($rc);
  34 + my @rc = @to_unesc.map: {
  35 + .trans('+' => ' ')\
  36 + .subst(:g, / '%' (<.xdigit> ** 2 ) /, -> $/ {
  37 + :16(~$0).chr;
  38 + })
66 39 }
  40 + @rc.=map(*.encode('latin-1').decode('UTF-8')) unless $no_utf8;
67 41 return do given @rc.elems { # this might be simplified some day
68 42 when 0 { Nil }
69 43 when 1 { @rc[0] }
70 44 default { @rc }
71 45 }
72 46 }
73   -
74   - # Stole parts from Masak November::CGI and parts from Parrot's UTF-8 decode
75   - sub utf8_octets_2_codepoint(@octets) {
76   - if @octets[ 0 ] < 0x80 { # completeness
77   - return @octets[0], 1
78   - }
79   -
80   - my $len = 1;
81   -
82   - while 0x80 +> ++$len +& @octets[0] and $len < 6 {}
83   -
84   - my $max_shift = 6 * ($len -1);
85   - my $code_point = reduce {
86   - $^a + @octets[ $^b ] +& 0x3F +< ($max_shift - 6 * $^b)
87   - }, 0x7F +> $len +& @octets[0] +< $max_shift, 1 ..^ $len;
88   -
89   - return $code_point, $len;
90   - }
91 47 }
92 48
93 49 =begin pod

Tip: You can add notes to lines in a file. Hover to the left of a line to make a note

Something went wrong with that request. Please try again.