Permalink
Browse files

Moved from grammar based on RFC 2396 to grammar based on RFC 3986.

  • Loading branch information...
1 parent 81d0afc commit 5807b7e76aca614b68f8649b08ed91fc327e0a23 U-ron-PC\ron committed May 14, 2009
Showing with 80 additions and 66 deletions.
  1. +25 −13 lib/IETF/RFC_Grammar/IPv6.pm
  2. +51 −50 lib/IETF/RFC_Grammar/URI.pm
  3. +4 −2 lib/URI/Escape.pm
  4. +0 −1 lib/URI/Grammar.pm
@@ -1,24 +1,36 @@
use v6;
# Taken/Copied with relatively minor translation to Perl6
-# from rfc 2373 (http://www.faqs.org/rfcs/rfc2373.html)
+# from RFC 3986 (http://www.ietf.org/rfc/rfc3986.txt)
grammar IETF::RFC_Grammar::IPv6 {
- token IPv6address { <.hexpart> [ ':' IPv4address ]? };
- token IPv4address {
- <.digit> ** 1..3 '.'
- <.digit> ** 1..3 '.'
- <.digit> ** 1..3 '.'
- <.digit> ** 1..3
+ token IPv6address {
+ [ <.h16> ':' ] ** 6 <.ls32> |
+ '::' [ <.h16> ':' ] ** 5 <.ls32> |
+ [ <.h16> ]? '::' [ <.h16> ':' ] ** 4 <.ls32> |
+ [ [ <.h16> ':' ]? <.h16> ]? '::' [ <.h16> ':' ] ** 3 <.ls32> |
+ [ [ <.h16> ':' ] ** 0..2 <.h16> ]? '::' [ <.h16> ':' ] ** 2 <.ls32> |
+ [ [ <.h16> ':' ] ** 0..3 <.h16> ]? '::' <.h16> ':' <.ls32> |
+ [ [ <.h16> ':' ] ** 0..4 <.h16> ]? '::' <.ls32> |
+ [ [ <.h16> ':' ] ** 0..5 <.h16> ]? '::' <.h16> |
+ [ [ <.h16> ':' ] ** 0..6 <.h16> ]? '::'
};
- token IPv6prefix { <.hexpart> '/' <.digit> ** 1..2 };
+ token ls32 { [<.h16> ':' <.h16>] | <.IPv4address> };
+ token h16 { <.xdigit> ** 1..4 };
+
+ token IPv4address {
+ <.dec_octet> '.' <.dec_octet> '.' <.dec_octet> '.' <.dec_octet>
+ };
+
+ token dec_octet {
+ '25' <[0..5]> | # 250 - 255
+ '2' <[0..4]> <.digit> | # 200 - 249
+ '1' <.digit> ** 2 | # 100 - 199
+ <[1..9]> <.digit> | # 10 - 99
+ <.digit> # 0 - 9
+ }
- token hexpart { [ <.hexseq> '::' <.hexseq>? ] |
- [ '::' <.hexseq>? ] |
- <.hexseq> };
- token hexseq { <.hex4> [ ':' <.hex4> ]* };
- token hex4 { <.xdigit> ** 1..4 };
}
# vim:ft=perl6
View
@@ -1,76 +1,77 @@
use v6;
# Taken/Copied with relatively minor translation to Perl6
-# from rfc 2369 (http://www.ietf.org/rfc/rfc2396.txt)
+# from RFC 3986 (http://www.ietf.org/rfc/rfc3986.txt)
use IETF::RFC_Grammar::IPv6;
grammar IETF::RFC_Grammar::URI is IETF::RFC_Grammar::IPv6 {
- token URI-reference {
- [ <.absoluteURI> | <.relativeURI> ]? [ '#' <fragment> ]?
- };
- token absoluteURI { <scheme> ':' [ <.hier_part> | <.opaque_part> ] };
- token relativeURI {
- [ <.net_path> | <.abs_path> | <.rel_path> ] [ '?' <.query> ]?
+ token URI_reference { <URI> | <relative_ref> };
+
+ token absolute_URI { <scheme> ':' <hier_part> [ '?' query ]? };
+ token relative_ref {
+ <relative_part> [ '?' <query> ]? [ '#' <fragment> ]?
};
-
- token hier_part { [ <.net_path> | <.abs_path> ] [ '?' <.query> ] };
- token opaque_part { <.uric_no_slash> <.uric>* };
- token uric_no_slash { <[;?:@&=+$,] +unreserved +escape> };
+ token relative_part {
+ '//' <authority> <.path_abempty> |
+ <.path_absolute> |
+ <.path_noscheme> |
+ <.path_empty>
+ };
- token net_path { '//' <.authority> <.abs_path>? };
- token abs_path { '/' <.path_segments> };
- token rel_path { <.rel_segment> <.abs_path>? };
+ token URI {
+ <scheme> ':' <hier_part> [ '?' <query> ]? [ '#' <fragment> ]?
+ };
- token rel_segment { <[;@&=+$,] +unreserved +escaped>+ };
+ token hier_part {
+ '//' <authority> <.path_abempty> |
+ <.path_absolute> |
+ <.path_rootless> |
+ <.path_empty>
+ };
token scheme { <.uri_alpha> <[\-+.] +uri_alpha +digit>* };
+
+ token authority { [ <userinfo> '@' ]? <host> [ ':' <port> ]? };
+ token userinfo {
+ [ <[:] +unreserved +sub_delims> | <.pct_encoded> ]*
+ };
+ token host { <IPv4address> | <IP_literal> | <reg_name> };
+ token port { <.digit>* };
- token authority { <.server> | <.reg_name> };
+ token IP_literal { '[' [ <IPv6address> | <IPvFuture> ] ']' };
+ token IPvFuture {
+ 'v' <.xdigit>+ '.' <[:] +unreserved +sub_delims>+
+ };
+ token reg_name { [ <+unreserved +sub_delims> | <.pct_encoded> ]* };
- token reg_name { <[$,;:@&=+] +unreserved +escaped>+ };
+ token path_abempty { [ '/' <.segment> ]* };
+ token path_absolute { '/' [ <.segment_nz> [ '/' <.segment> ]* ]? };
+ token path_noscheme { <.segment_nz_nc> [ '/' <.segment> ]* };
+ token path_rootless { <.segment_nz> [ '/' <.segment> ]* };
+ token path_empty { <.pchar> ** 0 }; # yes - zero characters
- token server { [ [ userinfo '@' ]? hostport ]? };
- token userinfo { <[;:&=+$,] +unreserved +escaped> };
-
- token hostport { <host> [ ':' <port> ]? };
-
- token host { <hostname> | <IPv4address> | <IPv6reference> };
- token ipv6reference { '[' <IPv6address> ']'}
- regex hostname { [ <.domainlabel> '.' ] * <.toplabel> '.'? };
- regex domainlabel {
- [ <.uri_alphanum> <[\-] +uri_alphanum>* <.uri_alphanum> ] |
- <.uri_alphanum>
- };
- regex toplabel {
- [ <.uri_alpha> <[\-] +uri_alphanum>* <.uri_alphanum> ] |
- <.uri_alpha>
- };
-
- token port { <.digit>* };
+ token segment { <.pchar>* };
+ token segment_nz { <.pchar>+ };
+ token segment_nz_nc { [ <+unenc_pchar - [:]> | <.pct_encoded> ] + };
- token path { [ abs_path | opaque_part ]? };
+ token query { <.fragment> };
+ token fragment { [ <[/?] +unenc_pchar> | <.pct_encoded> ]* };
- token path_segments { <.segment> [ '/' <.segment> ] * };
-
- token segment { <.pchar>* [ ';' <.param>]* };
- token param { <.pchar>* };
- token pchar { <[:@&=+$,] +unreserved> | <.escaped> };
+ token pchar { <.unenc_pchar> | <.pct_encoded> };
+ token unenc_pchar { <[:@] +unreserved +sub_delims> };
- token query { <.uric>* };
- token fragment { <.uric>* };
+ token pct_encoded { '%' <.xdigit> <.xdigit> };
- token uric { <+reserved +unreserved> | <.escaped> };
- token reserved { <[;/?:@&=+$,\[\]]> };
- token unwise { <[{}|\\^`]> };
+ token unreserved { <[\-._~] +uri_alphanum> };
- token unreserved { <+uri_alphanum +mark> };
- token mark { <[\-_.!~*'()]> };
+ token reserved { <+gen_delims +sub_delims> };
- token escaped { '%' <.xdigit> <.xdigit> };
+ token gen_delims { <[:/?#\[\]@]> };
+ token sub_delims { <[;!$&'()*+,=]> };
- token uri_alphanum { <+uri_alpha +digit> };
+ token uri_alphanum { <+uri_alpha +digit> };
token uri_alpha { <+lowalpha +upalpha> };
token lowalpha { <[a..z]> };
View
@@ -10,14 +10,16 @@ package URI::Escape {
%escapes{ chr($c) } = sprintf "%%%02X", $c
}
+ token artifact_unreserved {<[!*'()] +IETF::RFC_Grammar::URI::unreserved>};
+
sub uri_escape($s is copy) is export {
my $rc;
while $s {
- if my $not_escape = $s ~~ /^<IETF::RFC_Grammar::URI::unreserved>+/ {
+ if my $not_escape = $s ~~ /^<artifact_unreserved>+/ {
$rc ~= $not_escape;
$s.=substr($not_escape.chars);
}
- if my $escape = $s ~~ /^<- IETF::RFC_Grammar::URI::unreserved>+/ {
+ if my $escape = $s ~~ /^<- artifact_unreserved>+/ {
$rc ~= ($escape.comb().map: {
%escapes{ chr(ord($_)) } || # chr(ord()) ??? @#^^!! it works
die 'Can\'t escape \\' ~ sprintf('x{%04X}, try uri_escape_utf8() instead',
View
@@ -10,7 +10,6 @@ grammar URI::Grammar is IETF::RFC_Grammar::URI {
[ '#' <fragment> ]? $
};
- token authority { <host> [ ':' <port> ]? };
token path { <slash>? [ <chunk>** '/'? ]* };
token slash { '/' };

0 comments on commit 5807b7e

Please sign in to comment.