Skip to content

Commit

Permalink
Better handling of discardable tokens after exhausted R1 parse: t+
Browse files Browse the repository at this point in the history
  • Loading branch information
Jeffrey Kegler committed Feb 23, 2013
1 parent de60d8f commit 9021bc1
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 19 deletions.
10 changes: 9 additions & 1 deletion r2/lib/Marpa/R2/Scanless.pm
Original file line number Diff line number Diff line change
Expand Up @@ -1454,7 +1454,7 @@ sub Marpa::R2::Scanless::R::read {
my $raw_token_value = substr ${$p_string},
$lexeme_start_pos,
$lexeme_end_pos - $lexeme_start_pos;
say {$trace_file_handle} 'Found lexeme @',
say {$trace_file_handle} 'Accepted lexeme @',
$lexeme_start_pos,
q{-},
$lexeme_end_pos, q{: },
Expand Down Expand Up @@ -1532,6 +1532,14 @@ sub Marpa::R2::Scanless::R::read {
"$start-$end: ", join " ", @rhs;
next EVENT;
} ## end if ( $status eq 'discarded lexeme' )
if ( $status eq 'ignored lexeme' ) {
my ( undef, $g1_symbol_id, $start, $end ) = @{$event};
my $lexeme = Marpa::R2::Grammar::original_symbol_name(
$g1_tracer->symbol_name($g1_symbol_id) );
say {$trace_file_handle} 'Ignored lexeme @',
"$start-$end: $lexeme";
next EVENT;
} ## end if ( $status eq 'ignored lexeme' )
say {$trace_file_handle} 'Event: ', join " ", @{$event};
next EVENT;
} ## end EVENT: while ( my $event = $thin_self->event() )
Expand Down
24 changes: 12 additions & 12 deletions r2/t/sl_diag.t
Original file line number Diff line number Diff line change
Expand Up @@ -146,38 +146,38 @@ END_OF_EXPECTED_OUTPUT

Marpa::R2::Test::is( $actual_value, $expected_value,
qq{Value of "$test_string"} );
Marpa::R2::Test::is( $trace_output, <<'END_OF_OUTPUT', qq{Trace output} );
Marpa::R2::Test::is( $trace_output, <<'END_OF_OUTPUT', qq{Trace output for "$test_string"} );
Registering character U+002b as symbol 5: [[\+]]
Registering character U+002b as symbol 19: [[^\x{A}\x{B}\x{C}\x{D}\x{2028}\x{2029}]]
Found lexeme @0-1: [Lex-1]; value="+"
Found lexeme @1-2: [Lex-1]; value="+"
Found lexeme @2-3: [Lex-1]; value="+"
Accepted lexeme @0-1: [Lex-1]; value="+"
Accepted lexeme @1-2: [Lex-1]; value="+"
Accepted lexeme @2-3: [Lex-1]; value="+"
Registering character U+0020 as symbol 10: [[\s]]
Registering character U+0020 as symbol 19: [[^\x{A}\x{B}\x{C}\x{D}\x{2028}\x{2029}]]
Registering character U+0031 as symbol 7: [[\d]]
Registering character U+0031 as symbol 19: [[^\x{A}\x{B}\x{C}\x{D}\x{2028}\x{2029}]]
Discarded lexeme @3-4: whitespace
Found lexeme @4-5: Number; value="1"
Accepted lexeme @4-5: Number; value="1"
Registering character U+0032 as symbol 7: [[\d]]
Registering character U+0032 as symbol 19: [[^\x{A}\x{B}\x{C}\x{D}\x{2028}\x{2029}]]
Discarded lexeme @5-6: whitespace
Found lexeme @6-7: Number; value="2"
Accepted lexeme @6-7: Number; value="2"
Registering character U+0033 as symbol 7: [[\d]]
Registering character U+0033 as symbol 19: [[^\x{A}\x{B}\x{C}\x{D}\x{2028}\x{2029}]]
Discarded lexeme @7-8: whitespace
Found lexeme @8-9: Number; value="3"
Accepted lexeme @8-9: Number; value="3"
Discarded lexeme @9-10: whitespace
Found lexeme @10-11: [Lex-1]; value="+"
Accepted lexeme @10-11: [Lex-1]; value="+"
Discarded lexeme @11-12: whitespace
Found lexeme @12-13: [Lex-1]; value="+"
Accepted lexeme @12-13: [Lex-1]; value="+"
Discarded lexeme @13-14: whitespace
Found lexeme @14-15: Number; value="1"
Accepted lexeme @14-15: Number; value="1"
Discarded lexeme @15-16: whitespace
Found lexeme @16-17: Number; value="2"
Accepted lexeme @16-17: Number; value="2"
Registering character U+0034 as symbol 7: [[\d]]
Registering character U+0034 as symbol 19: [[^\x{A}\x{B}\x{C}\x{D}\x{2028}\x{2029}]]
Discarded lexeme @17-18: whitespace
Found lexeme @18-19: Number; value="4"
Accepted lexeme @18-19: Number; value="4"
END_OF_OUTPUT
} ## end for my $test_data (@tests_data)

Expand Down
26 changes: 20 additions & 6 deletions r2/xs/R2.xs
Original file line number Diff line number Diff line change
Expand Up @@ -1430,7 +1430,7 @@ slr_alternatives (Scanless_R * slr, IV * lexemes_found,
event_data[0] = newSVpvs ("discarded lexeme");
/* We do not have the lexeme, but we have the
* g0 rule.
* Let the upper level figure things out.
* The upper level will have to figure things out.
*/
event_data[1] = newSViv (rule_id);
event_data[2] = newSViv (slr->start_of_lexeme);
Expand All @@ -1443,11 +1443,25 @@ slr_alternatives (Scanless_R * slr, IV * lexemes_found,
/* We don't try to read lexemes into an exhasuted
* R1 -- we only are looking for discardable tokens.
*/
if (!r1_is_exhausted)
if (r1_is_exhausted)
{
slr_alternative (slr, g1_lexeme, *lexemes_attempted);
(*lexemes_attempted)++;
if (slr->trace_lexemes)
{
AV *event;
SV *event_data[4];
event_data[0] = newSVpvs ("ignored lexeme");
event_data[1] = newSViv (g1_lexeme);
event_data[2] = newSViv (slr->start_of_lexeme);
event_data[3] = newSViv (slr->end_of_lexeme);
event = av_make (Dim (event_data), event_data);
av_push (slr->event_queue, newRV_noinc ((SV *) event));
}
goto NEXT_REPORT_ITEM;
}

/* trace_lexemes done inside slr_alternative */
slr_alternative (slr, g1_lexeme, *lexemes_attempted);
(*lexemes_attempted)++;
NEXT_REPORT_ITEM:;
}
NO_MORE_REPORT_ITEMS:;
Expand All @@ -1463,10 +1477,10 @@ slr_alternatives (Scanless_R * slr, IV * lexemes_found,
* 1) we reached the longest tokens match (or no match)
* 2) no tokens were discarded, and
* 3) r1 is exhausted, then
* r1 was exhausted with unconsumed tokens.
* r1 was exhausted with unconsumed text.
* Report that as an error.
*/
LEXEMES_FOUND:;
LEXEMES_FOUND:;
if (r1_is_exhausted && !lexemes_discarded)
{
return -4;
Expand Down

0 comments on commit 9021bc1

Please sign in to comment.