Permalink
Browse files

Finish grammar migration to RFC 3986 and do full parsing with that gr…

…ammar.

Clean out reliance on old regex based grammar.
  • Loading branch information...
1 parent ed11472 commit 9e4deeb52ccefd5c3f1147f59d1b11b1ac4c0674 U-ron-PC\ron committed May 20, 2009
Showing with 59 additions and 72 deletions.
  1. +15 −17 lib/IETF/RFC_Grammar/URI.pm
  2. +42 −27 lib/URI.pm
  3. +0 −27 lib/URI/Grammar.pm
  4. +2 −1 t/01.t
@@ -6,29 +6,30 @@ use v6;
use IETF::RFC_Grammar::IPv6;
grammar IETF::RFC_Grammar::URI is IETF::RFC_Grammar::IPv6 {
+ token TOP { <URI_reference> };
token URI_reference { <URI> | <relative_ref> };
- token absolute_URI { <scheme> ':' <hier_part> [ '?' query ]? };
+ token absolute_URI { <scheme> ':' <.hier_part> [ '?' query ]? };
token relative_ref {
<relative_part> [ '?' <query> ]? [ '#' <fragment> ]?
};
token relative_part {
- '//' <authority> <.path_abempty> |
- <.path_absolute> |
- <.path_noscheme> |
- <.path_empty>
+ '//' <authority> <path_abempty> |
+ <path_absolute> |
+ <path_noscheme> |
+ <path_empty>
};
token URI {
<scheme> ':' <hier_part> [ '?' <query> ]? [ '#' <fragment> ]?
};
token hier_part {
- '//' <authority> <.path_abempty> |
- <.path_absolute> |
- <.path_rootless> |
- <.path_empty>
+ '//' <authority> <path_abempty> |
+ <path_absolute> |
+ <path_rootless> |
+ <path_empty>
};
token scheme { <.uri_alpha> <[\-+.] +uri_alpha +digit>* };
@@ -46,10 +47,10 @@ grammar IETF::RFC_Grammar::URI is IETF::RFC_Grammar::IPv6 {
};
token reg_name { [ <+unreserved +sub_delims> | <.pct_encoded> ]* };
- token path_abempty { [ '/' <.segment> ]* };
- token path_absolute { '/' [ <.segment_nz> [ '/' <.segment> ]* ]? };
- token path_noscheme { <.segment_nz_nc> [ '/' <.segment> ]* };
- token path_rootless { <.segment_nz> [ '/' <.segment> ]* };
+ token path_abempty { [ '/' <segment> ]* };
+ token path_absolute { '/' [ <segment_nz> [ '/' <segment> ]* ]? };
+ token path_noscheme { <segment_nz_nc> [ '/' <segment> ]* };
+ token path_rootless { <segment_nz> [ '/' <segment> ]* };
token path_empty { <.pchar> ** 0 }; # yes - zero characters
token segment { <.pchar>* };
@@ -72,10 +73,7 @@ grammar IETF::RFC_Grammar::URI is IETF::RFC_Grammar::IPv6 {
token sub_delims { <[;!$&'()*+,=]> };
token uri_alphanum { <+uri_alpha +digit> };
- token uri_alpha { <+lowalpha +upalpha> };
-
- token lowalpha { <[a..z]> };
- token upalpha { <[A..Z]> };
+ token uri_alpha { <[A..Za..z]> };
}
# vim:ft=perl6
View
@@ -1,70 +1,85 @@
class URI;
-# RAKUDO: Match object does not do assignment properly :(
-#my Match $.parts; dies in init with 'Type mismatch in assignment';
-# workaround:
has $.uri;
+has $.path;
+has Bool $.is_absolute is ro;
+has $.scheme;
+has $.authority;
+has $.query;
+has $.frag;
has @.chunks;
method init ($str) {
- use URI::Grammar;
+ use IETF::RFC_Grammar::URI;
# clear string before parsing
my $c_str = $str;
$c_str .= subst(/^ \s* ['<' | '"'] /, '');
$c_str .= subst(/ ['>' | '"'] \s* $/, '');
- URI::Grammar.parse($c_str);
+ IETF::RFC_Grammar::URI.parse($c_str);
unless $/ { die "Could not parse URI: $str" }
-
+
+ $!uri = $!path = $!is_absolute = $!scheme = $!authority = $!query =
+ $!frag = undef;
+ @!chunks = undef;
+
$!uri = $/;
- @!chunks = $/<path><chunk> // ('');
+
+ my $comp_container = $/<URI_reference><URI> // $/<URI_reference><relative_ref>;
+ $!scheme = $comp_container<scheme>;
+ $!query = $comp_container<query>;
+ $!frag = $comp_container<fragment>;
+ $comp_container = $comp_container<hier_part> // $comp_container<relative_part>;
+
+ $!authority = $comp_container<authority>;
+ $!path = $comp_container<path_abempty> //
+ $comp_container<path_absolute> ;
+ $!is_absolute = ?($!path // $.scheme);
+
+ $!path //= $comp_container<path_noscheme> //
+ $comp_container<path_rootless> ;
+
+ @!chunks = $!path<segment> // ('');
+ if my $first_chunk = $!path<segment_nz_nc> // $!path<segment_nz> {
+ unshift @!chunks, $first_chunk;
+ }
+ @!chunks ||= ('');
}
method scheme {
- my $s = $.uri<scheme> // '';
- # RAKUDO: return 1 if use ~ below die because can`t do lc on Math after
- return ~$s.lc;
+ return ~$!scheme.lc;
}
method authority {
- my $a = $.uri<authority> // '';
- # RAKUDO: return 1 if use ~ below die because can`t do lc on Math after
- return ~$a.lc;
+ return ~$!authority.lc;
}
method host {
- #RAKUDO: $.uri<authority>[0]<host> return full <authority> now
- my $h = ~$.uri<authority>[0]<host>;
- return $h.lc // '';
+ return ($!authority<host> // '').lc;
}
method port {
- # TODO: send rakudobug
- # RAKUDO: $.uri<authority><port> return full <authority> now
- # workaround:
- item $.uri<authority>[0]<port> // '';
+ item $!authority<port> // '';
}
method path {
- my $p = ~$.uri<path> // '';
- return $p.lc;
+ return ~($!path // '').lc;
}
method absolute {
- return ?($.uri<path><slash> // $.scheme);
+ return $!is_absolute;
}
method relative {
- return !($.uri<path><slash> // $.scheme);
+ return ! $.absolute;
}
method query {
- item $.uri<query> // '';
+ item ~($!query // '');
}
method frag {
- my $f = $.uri<fragment> // '';
- return ~$f.lc;
+ return ~($!frag // '').lc;
}
method fragment { $.frag }
View
@@ -1,27 +0,0 @@
-use v6;
-
-use IETF::RFC_Grammar::URI;
-
-grammar URI::Grammar is IETF::RFC_Grammar::URI {
-
- token TOP {
- ^ [ <scheme> ':' ]?
- [ '//' <authority> ]? <path> [ '?' <query> ]?
- [ '#' <fragment> ]? $
- };
-
- token path { <slash>? [ <chunk>** '/'? ]* };
- token slash { '/' };
-
-#
-# following hangs rakudo (RT #37745 afaik)
-# token chunk { <[a..z]>* }; say 'ok' if 'index/' ~~ /[ <chunk> '/'?]*/
-#
-# so can't just use rfc segment but use small hack of requiring
-# at least one char. Use of ** seperator above makes behavior
-# consistent with rfc.
-#
- token chunk { <.pchar> <.segment> }
-}
-
-# vim:ft=perl6
View
3 t/01.t
@@ -8,7 +8,8 @@ ok(1,'We use URI and we are still alive');
my $u = URI.new;
$u.init('http://example.com:80/about/us?foo#bar');
-is($u.scheme, 'http', 'scheme');
+is($u.scheme, 'http', 'scheme');
+
is($u.host, 'example.com', 'host');
is($u.port, '80', 'port');
is($u.path, '/about/us', 'path');

0 comments on commit 9e4deeb

Please sign in to comment.