Permalink
Browse files

regex compiler: implemented '?' quantifier

  • Loading branch information...
1 parent 4adcaa3 commit 0de2489446fdad643c23fade1e478fc26ba142eb @fglock committed Mar 25, 2010
View
@@ -1,5 +1,6 @@
(5.0)
- 'elsif'
+- regex quantifiers
4.1 2010-03-24
- floating point numbers
@@ -78,6 +78,18 @@ class Rul::Quantifier {
~ '1 '
~ '}';
}
+ if ($.quant eq '?') && ($.greedy eq '') {
+ $.term.set_captures_to_array;
+ return
+ 'do { '
+ ~ 'my $last_pos := $MATCH.to; '
+ ~ 'if !(do {' ~ $.term.emit ~ '}) '
+ ~ '{ '
+ ~ '$MATCH.to := $last_pos; '
+ ~ '}; '
+ ~ '1 '
+ ~ '}';
+ }
# TODO
warn "Rul::Quantifier: " ~ $.quant ~ $.greedy ~ " not implemented";
@@ -125,7 +137,16 @@ class Rul::Subrule {
}
elsif $.captures > 1 {
# TODO: capture level > 2
- $code := 'if $m2 { $MATCH.to := $m2.to; ($MATCH{\'' ~ $.metasyntax ~ '\'}).push( $m2 ); 1 } else { false } '
+ $code := 'if $m2 { '
+ ~ '$MATCH.to := $m2.to; '
+ ~ 'if exists $MATCH{\'' ~ $.metasyntax ~ '\'} { '
+ ~ '($MATCH{\'' ~ $.metasyntax ~ '\'}).push( $m2 ); '
+ ~ '} '
+ ~ 'else { '
+ ~ '$MATCH{\'' ~ $.metasyntax ~ '\'} := [ $m2 ]; '
+ ~ '} '
+ ~ '1 '
+ ~ '} else { false } '
}
else {
$code := 'if $m2 { $MATCH.to := $m2.to; 1 } else { false } '
@@ -5,9 +5,6 @@ grammar MiniPerl6::Grammar {
use MiniPerl6::Grammar::Regex;
use MiniPerl6::Grammar::Mapping;
use MiniPerl6::Grammar::Control;
-
-# XXX - move to v6.pm emitter
-#sub array($data) { use v5; @$data; use v6; }
my $Class_name; # for diagnostic messages
sub get_class_name { $Class_name }
@@ -47,9 +44,9 @@ token ws {
]+
}
-token opt_ws { <.ws> | '' }
-token opt_ws2 { <.ws> | '' }
-token opt_ws3 { <.ws> | '' }
+token opt_ws { <.ws>? }
+token opt_ws2 { <.ws>? }
+token opt_ws3 { <.ws>? }
token parse {
| <comp_unit>
@@ -62,7 +59,7 @@ token parse {
}
token comp_unit {
- <.opt_ws> [\; <.opt_ws> | '' ]
+ <.opt_ws> [ \; <.opt_ws> ]?
[ 'use' <.ws> 'v6-' <ident> <.opt_ws> \; <.ws>
| 'use' <.ws> 'v6' <.opt_ws> \; <.ws>
| ''
@@ -75,7 +72,7 @@ token comp_unit {
<exp_stmts>
<.opt_ws>
'}'
- <.opt_ws> [\; <.opt_ws> | '' ]
+ <.opt_ws> [ \; <.opt_ws> ]?
{
make CompUnit.new(
name => $$<full_ident>,
@@ -95,7 +92,7 @@ token infix_op {
}
token hyper_op {
- '>>' | ''
+ '>>'?
}
token prefix_op {
@@ -244,11 +241,11 @@ token term_meth {
}
token sub_or_method_name {
- <full_ident> [ \. <ident> | '' ]
+ <full_ident> [ \. <ident> ]?
}
token opt_type {
- | [ '::' | '' ] <full_ident> { make $$<full_ident> }
+ | '::'? <full_ident> { make $$<full_ident> }
| '' { make '' }
}
@@ -275,7 +272,7 @@ token exp_term {
{ make Do.new( block => $$<exp_stmts> ) } # do { stmt; ... }
| <declarator> <.ws> <opt_type> <.opt_ws> <var_ident> # my Int $variable
{ make Decl.new( decl => $$<declarator>, type => $$<opt_type>, var => $$<var_ident> ) }
- | use <.ws> <full_ident> [ - <ident> | '' ]
+ | use <.ws> <full_ident> [ - <ident> ]?
{ make Use.new( mod => $$<full_ident> ) }
| <val> { make $$<val> } # 'value'
| <lit> { make $$<lit> } # [literal construct]
@@ -289,16 +286,13 @@ token exp_term {
| <apply> { make $$<apply> } # self; print 1,2,3
}
-#token index { XXX }
-#token lookup { XXX }
-
}
#---- split into compilation units in order to use less RAM...
grammar MiniPerl6::Grammar {
token var_sigil { \$ |\% |\@ |\& }
-token var_twigil { [ \. | \! | \^ | \* ] | '' }
+token var_twigil { [ \. | \! | \^ | \* ]? }
token var_name { <full_ident> | '/' | <digit> }
@@ -316,7 +310,6 @@ token var_ident {
token val {
| <val_undef> { make $$<val_undef> } # undef
- # | $<exp> := <val_object> # (not exposed to the outside)
| <val_num> { make $$<val_num> } # 123.456
| <val_int> { make $$<val_int> } # 123
| <val_bit> { make $$<val_bit> } # True, False
@@ -344,10 +337,7 @@ token val_undef {
token val_num {
<digits>
[ [ 'e' | 'E' ] <digits>
- | \. <digits>
- [ [ 'e' | 'E' ] <digits>
- | ''
- ]
+ | \. <digits> [ [ 'e' | 'E' ] <digits> ]?
]
{ make Val::Num.new( num => ~$/ ) }
}
@@ -424,18 +414,9 @@ token exp_seq {
grammar MiniPerl6::Grammar {
token lit {
- #| <lit_seq> { make $$<lit_seq> } # (a, b, c)
- #| <lit_array> { make $$<lit_array> } # [a, b, c]
- #| <lit_hash> { make $$<lit_hash> } # {a => x, b => y}
- #| <lit_code> { make $$<lit_code> } # sub $x {...}
- | <lit_object> { make $$<lit_object> } # Tree.new(a => x, b => y);
+ <lit_object> { make $$<lit_object> } # Tree.new(a => x, b => y);
}
-token lit_seq { XXX { make 'TODO: lit_seq' } }
-token lit_array { XXX { make 'TODO: lit_array' } }
-token lit_hash { XXX { make 'TODO: lit_hash' } }
-token lit_code { XXX { make 'TODO - Lit::Code' } }
-
token lit_object {
'::'
<full_ident>
@@ -499,7 +480,7 @@ token apply {
]
}
-token opt_name { <ident> | '' }
+token opt_name { <ident>? }
token var_invocant {
@@ -18,7 +18,7 @@ sub greedy { $_[0]->{greedy} };
sub ws1 { $_[0]->{ws1} };
sub ws2 { $_[0]->{ws2} };
sub ws3 { $_[0]->{ws3} };
-sub emit { my $self = $_[0]; if ((($self->{quant} eq '') && ($self->{greedy} eq ''))) { return($self->{term}->emit()) } else { }; if ((($self->{quant} eq '+') && ($self->{greedy} eq ''))) { $self->{term}->set_captures_to_array();return('do { ' . 'my $last_match_null := 0; ' . 'my $last_pos := $MATCH.to; ' . 'my $count := 0; ' . 'while ' . $self->{term}->emit() . ' && ($last_match_null < 2) ' . '{ ' . 'if $last_pos == $MATCH.to { ' . '$last_match_null := $last_match_null + 1; ' . '} ' . 'else { ' . '$last_match_null := 0; ' . '} ' . '$last_pos := $MATCH.to; ' . '$count := $count + 1; ' . '}; ' . '$MATCH.to := $last_pos; ' . '$count > 0; ' . '}') } else { }; if ((($self->{quant} eq '*') && ($self->{greedy} eq ''))) { $self->{term}->set_captures_to_array();return('do { ' . 'my $last_match_null := 0; ' . 'my $last_pos := $MATCH.to; ' . 'while ' . $self->{term}->emit() . ' && ($last_match_null < 2) ' . '{ ' . 'if $last_pos == $MATCH.to { ' . '$last_match_null := $last_match_null + 1; ' . '} ' . 'else { ' . '$last_match_null := 0; ' . '} ' . '$last_pos := $MATCH.to; ' . '}; ' . '$MATCH.to := $last_pos; ' . '1 ' . '}') } else { }; warn('Rul::Quantifier: ' . $self->{quant} . $self->{greedy} . ' not implemented'); $self->{term}->emit() };
+sub emit { my $self = $_[0]; if ((($self->{quant} eq '') && ($self->{greedy} eq ''))) { return($self->{term}->emit()) } else { }; if ((($self->{quant} eq '+') && ($self->{greedy} eq ''))) { $self->{term}->set_captures_to_array();return('do { ' . 'my $last_match_null := 0; ' . 'my $last_pos := $MATCH.to; ' . 'my $count := 0; ' . 'while ' . $self->{term}->emit() . ' && ($last_match_null < 2) ' . '{ ' . 'if $last_pos == $MATCH.to { ' . '$last_match_null := $last_match_null + 1; ' . '} ' . 'else { ' . '$last_match_null := 0; ' . '} ' . '$last_pos := $MATCH.to; ' . '$count := $count + 1; ' . '}; ' . '$MATCH.to := $last_pos; ' . '$count > 0; ' . '}') } else { }; if ((($self->{quant} eq '*') && ($self->{greedy} eq ''))) { $self->{term}->set_captures_to_array();return('do { ' . 'my $last_match_null := 0; ' . 'my $last_pos := $MATCH.to; ' . 'while ' . $self->{term}->emit() . ' && ($last_match_null < 2) ' . '{ ' . 'if $last_pos == $MATCH.to { ' . '$last_match_null := $last_match_null + 1; ' . '} ' . 'else { ' . '$last_match_null := 0; ' . '} ' . '$last_pos := $MATCH.to; ' . '}; ' . '$MATCH.to := $last_pos; ' . '1 ' . '}') } else { }; if ((($self->{quant} eq '?') && ($self->{greedy} eq ''))) { $self->{term}->set_captures_to_array();return('do { ' . 'my $last_pos := $MATCH.to; ' . 'if !(do {' . $self->{term}->emit() . '}) ' . '{ ' . '$MATCH.to := $last_pos; ' . '}; ' . '1 ' . '}') } else { }; warn('Rul::Quantifier: ' . $self->{quant} . $self->{greedy} . ' not implemented'); $self->{term}->emit() };
sub set_captures_to_array { my $self = $_[0]; $self->{term}->set_captures_to_array() }
}
@@ -43,7 +43,7 @@ package Rul::Subrule;
sub new { shift; bless { @_ }, "Rul::Subrule" }
sub metasyntax { $_[0]->{metasyntax} };
sub captures { $_[0]->{captures} };
-sub emit { my $self = $_[0]; (my $meth = ((1 + index($self->{metasyntax}, '.')) ? $self->{metasyntax} : '$grammar.' . $self->{metasyntax})); my $code; if (($self->{captures} == 1)) { ($code = 'if $m2 { $MATCH.to := $m2.to; $MATCH{\'' . $self->{metasyntax} . '\'} := $m2; 1 } else { false } ') } else { if (($self->{captures} > 1)) { ($code = 'if $m2 { $MATCH.to := $m2.to; ($MATCH{\'' . $self->{metasyntax} . '\'}).push( $m2 ); 1 } else { false } ') } else { ($code = 'if $m2 { $MATCH.to := $m2.to; 1 } else { false } ') } }; 'do { ' . 'my $m2 := ' . $meth . '($str, $MATCH.to); ' . $code . '}' };
+sub emit { my $self = $_[0]; (my $meth = ((1 + index($self->{metasyntax}, '.')) ? $self->{metasyntax} : '$grammar.' . $self->{metasyntax})); my $code; if (($self->{captures} == 1)) { ($code = 'if $m2 { $MATCH.to := $m2.to; $MATCH{\'' . $self->{metasyntax} . '\'} := $m2; 1 } else { false } ') } else { if (($self->{captures} > 1)) { ($code = 'if $m2 { ' . '$MATCH.to := $m2.to; ' . 'if exists $MATCH{\'' . $self->{metasyntax} . '\'} { ' . '($MATCH{\'' . $self->{metasyntax} . '\'}).push( $m2 ); ' . '} ' . 'else { ' . '$MATCH{\'' . $self->{metasyntax} . '\'} := [ $m2 ]; ' . '} ' . '1 ' . '} else { false } ') } else { ($code = 'if $m2 { $MATCH.to := $m2.to; 1 } else { false } ') } }; 'do { ' . 'my $m2 := ' . $meth . '($str, $MATCH.to); ' . $code . '}' };
sub set_captures_to_array { my $self = $_[0]; ($self->{captures} = ($self->{captures} + 1)) }
}
Oops, something went wrong.

0 comments on commit 0de2489

Please sign in to comment.