Skip to content

Commit

Permalink
Parrot m4 now uses PGE instead of PCRE.
Browse files Browse the repository at this point in the history
Call it Parrot m4 0.0.13


git-svn-id: http://svn.perl.org/parrot/trunk@8479 d31e2699-5ff4-0310-a27c-f18f2fbe73fe
  • Loading branch information
bernhard committed Jun 29, 2005
1 parent 4699c46 commit 0b15f0b
Show file tree
Hide file tree
Showing 12 changed files with 121 additions and 408 deletions.
4 changes: 4 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# $Id$

2005-06-29 Bernhard Schmalhofer
* Switch from PCRE to PGE
* Call it Parrot m4 0.0.13

2005-06-12 Bernhard Schmalhofer
* Move Parrot::Test::m4 to 'languages/m4/lib'
* Yank it up to 0.0.12
Expand Down
6 changes: 1 addition & 5 deletions INSTALL
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,7 @@

The source of Parrot m4 is part of the Parrot distribution.

Internally Parrot m4 uses the "Perl Compatible Regular Expressions"-library.
The source of the PCRE-library can be fetched fromt http://www.pcre.org/.
Please follow the the instructions in INSTALL in the fetched distribution.
When the shared library 'libpcre' is installed in a non-default location,
make sure that LD_LIBRARY_PATH is set accordingly.
Internally Parrot m4 uses the "Parrot Grammar Enging PGE".

Build and test with:

Expand Down
163 changes: 56 additions & 107 deletions src/input.pir
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,15 @@ References: http://www.gnu.org/software/m4/m4.html

=head2 void input_init( Hash state )

Initialise the input stack and various regexes.
Initialise some stacks and some regexes

'token_stack'
'input_stack' contains files, strings and macro definitions
'word_regex' recognizes TOKEN_WORD.
'string_regex' recognizes TOKEN_STRING
'simple_regex' recognizes TOKEN_SIMPLE
'comment_regex' recognizes comments, returned as TOKEN_SIMPLE
'wrapup_stack'
'word_rulesub' recognizes TOKEN_WORD.
'string_rulesub' recognizes TOKEN_STRING
'simple_rulesub' recognizes TOKEN_SIMPLE
'comment_rulesub' recognizes comments, returned as TOKEN_SIMPLE

TODO: recognize nested quoted strings

Expand All @@ -30,72 +32,30 @@ TODO: recognize nested quoted strings
.include "datatypes.pasm"

.sub input_init
.param pmc state

.local pmc empty_array
.param pmc state

# setup of stacks
.local pmc empty_array
empty_array = new ResizablePMCArray
state['token_stack'] = empty_array
empty_array = new ResizablePMCArray
state['input_stack'] = empty_array
empty_array = new ResizablePMCArray
state['wrapup_stack'] = empty_array

# setup of regexes
# regular expressions are needed for finding words and quoted strings
.local pmc regex
.local pmc erroffset
erroffset = new Integer
erroffset = 0
.local pmc NULL
NULL = null

.local pmc init_func, compile_func, match_func, dollar_func, pcre_lib
init_func = find_global 'PCRE', 'init'
compile_func = find_global 'PCRE', 'compile'
dollar_func = find_global 'PCRE', 'dollar'
match_func = find_global 'PCRE', 'match'
state['pcre_match_func'] = match_func
pcre_lib = init_func()

.local pmc err_decl
.local pmc err
err_decl = new ResizablePMCArray
push err_decl, .DATATYPE_CSTR
push err_decl, 0
push err_decl, 0
err = new .ManagedStruct
assign err, err_decl

# pcre *pcre_compile( const char *pattern, int options,
# const char **errptr, int *erroffset,
# const unsigned char *tableptr
.local pmc pcre_compile
pcre_compile = dlfunc pcre_lib, "pcre_compile", "ptip3P"

#int pcre_exec( const pcre *code, const pcre_extra *extra,
# const char *subject, int length, int startoffset,
# int options, int *ovector, int ovecsize );
.local pmc pcre_exec
pcre_exec = dlfunc pcre_lib, "pcre_exec", "ipPtiiipi"
state['pcre_exec'] = pcre_exec

#int pcre_copy_substring( const char *subject, int *ovector,
# int stringcount, int stringnumber, char *buffer,
# int buffersize );
.local pmc pcre_copy_substring
pcre_copy_substring = dlfunc pcre_lib, "pcre_copy_substring", "itpiibi"
state['pcre_copy_substring'] = pcre_copy_substring

regex = pcre_compile( '^[^`#_a-zA-Z]', 0, err, erroffset, NULL )
state['simple_regex'] = regex
regex = pcre_compile( '^#[^\n]*\n', 0, err, erroffset, NULL )
state['comment_regex'] = regex
regex = pcre_compile( '^[_a-zA-Z][_a-zA-Z0-9]*', 0, err, erroffset, NULL )
state['word_regex'] = regex
regex = pcre_compile( "^`[^`]*'", 0, err, erroffset, NULL )
state['string_regex'] = regex
# setup of some rules
# these rules should be kept in sync with t/regex/002_tokens.t
.local pmc p6rule
find_global p6rule, "PGE", "p6rule"
.local pmc rulesub
rulesub = p6rule( "^<[_a..zA..Z]><[_a..zA..Z0..9]>*" )
state['word_rulesub'] = rulesub
rulesub = p6rule( "^`<-[`]>*'" )
state['string_rulesub'] = rulesub
rulesub = p6rule( "^<-[`#_a..zA..Z]>" )
state['simple_rulesub'] = rulesub
rulesub = p6rule( "^\#\N*\n" )
state['comment_rulesub'] = rulesub

.end

Expand Down Expand Up @@ -175,76 +135,65 @@ Uses regular expressions for finding tokens.
input_string = input_block['string']
.local int current_file_len
current_file_len = length input_string
.local pmc pcre_exec
pcre_exec = state['pcre_exec']
.local pmc NULL
null NULL
.local pmc ovector
ovector = new ManagedStruct
ovector = 120 # 1/(2/3) * 4 * 2 * 10 for 10 result pairs
.local int is_match
.local pmc regex
.local pmc rulesub
.local string token_type
token_type = 'TOKEN_EOF'
.local string token_data
token_data = ''
.local int is_string_match
is_string_match = 0
.local pmc match
# look for 'TOKEN_SIMPLE'
# read a whole bunch of non-macro and non-word charcters
regex = state['simple_regex']
rulesub = state['simple_rulesub']
token_type = 'TOKEN_SIMPLE'
is_match = pcre_exec( regex, NULL, input_string, current_file_len, 0, 0, ovector, 10 )
if is_match == 1 goto MATCH
if is_match != -1 goto MATCH_FAILED
match = rulesub( input_string )
if match goto MATCH
# look for comments and return it as 'TOKEN_SIMPLE'
regex = state['comment_regex']
rulesub = state['comment_rulesub']
token_type = 'TOKEN_SIMPLE'
is_match = pcre_exec( regex, NULL, input_string, current_file_len, 0, 0, ovector, 10 )
if is_match == 1 goto MATCH
if is_match != -1 goto MATCH_FAILED
match = rulesub( input_string )
if match goto MATCH
# look for 'TOKEN_STRING'
regex = state['string_regex']
rulesub = state['string_rulesub']
token_type = 'TOKEN_STRING'
is_string_match = 1
is_match = pcre_exec( regex, NULL, input_string, current_file_len, 0, 0, ovector, 10 )
if is_match == 1 goto MATCH
if is_match != -1 goto MATCH_FAILED
is_string_match = 0
match = rulesub( input_string )
if match goto MATCH
# look for 'TOKEN_WORD'
# this will be checked for macro substitution
regex = state['word_regex']
rulesub = state['word_rulesub']
token_type = 'TOKEN_WORD'
is_match = pcre_exec( regex, NULL, input_string, current_file_len, 0, 0, ovector, 10 )
if is_match == 1 goto MATCH
if is_match != -1 goto MATCH_FAILED
match = rulesub( input_string )
if match goto MATCH
if current_file_len != 0 goto MATCH_FAILED
token_type = 'TOKEN_EOF'
token_data = ''
goto FINISH_NEXT_TOKEN
MATCH:
# ovector is an int arrary containing start stop coords
.local int start_line, end_line
.local pmc struct
struct = new SArray
struct = 3
struct[0] = .DATATYPE_INT
struct[1] = 2
struct[2] = 0
assign ovector, struct
start_line = ovector[0;0]
end_line = ovector[0;1]
token_data = substr input_string, start_line, end_line, ''
unless is_string_match goto NO_STRING_MATCH
substr token_data, 0, 1, ''
substr token_data, -1, 1, ''
NO_STRING_MATCH:
# TODO: is there a method for extraction the matched string?
.local int token_from, token_to
token_from = match.from()
token_to = match.to()
token_data = substr input_string, token_from, token_to, ''
goto SKIP_DEBUG_1
print "\ntoken_type: "
print token_type
print "\ntoken_from: "
print token_from
print "\ntoken_to: "
print token_to
print "\ntoken_data: "
print token_data
print "\n"
SKIP_DEBUG_1:
ne token_type, 'TOKEN_STRING', NO_STRING_MATCH
substr token_data, 0, 1, ''
substr token_data, -1, 1, ''
NO_STRING_MATCH:
goto FINISH_NEXT_TOKEN
MATCH_FAILED:
Expand Down
20 changes: 17 additions & 3 deletions src/m4.pir
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,18 @@ References: http://www.gnu.org/software/m4/m4.html


=head1 SUBROUTINES

=head2 __onload

Load needed libraries

=cut

.sub "__onload" @LOAD


.end


=head2 m4

Expand All @@ -49,8 +61,10 @@ Looks at the command line arguments and acts accordingly.
.sub m4 @MAIN
.param pmc argv

load_bytecode "pcre.imc"
load_bytecode "Getopt/Long.pbc"
# TODO: put this into __onload
# load_bytecode "PGE.pbc" # Loaded by Getopt/Long.pbc
load_bytecode "Getopt/Long.pbc" # This also loads PGE

.local pmc get_options
find_global get_options, "Getopt::Long", "get_options"

Expand Down Expand Up @@ -120,7 +134,7 @@ Looks at the command line arguments and acts accordingly.
# Was '--version' passed ?
is_defined = defined opt["version"]
unless is_defined goto NO_VERSION_FLAG
print "Parrot m4 0.0.12\n"
print "Parrot m4 0.0.13\n"
end
NO_VERSION_FLAG:

Expand Down
2 changes: 1 addition & 1 deletion t/basic/003_getopt.t
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ END_OUT
#--------------------------------------------
$real_out = `$parrot_m4 --version 2>&1`;
is( $real_out, << 'END_OUT', '--version' );
Parrot m4 0.0.12
Parrot m4 0.0.13
END_OUT


Expand Down
18 changes: 13 additions & 5 deletions t/basic/005_define_with_blanks.t
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@ use strict;
use FindBin;
use lib "$FindBin::Bin/../../lib", "$FindBin::Bin/../../../../lib";

use Parrot::Test tests => 4;
use Parrot::Test tests => 5;

{
language_output_is( 'm4', <<'CODE', <<'OUT', 'hello' );
language_output_is( 'm4', <<'CODE', <<'OUT', 'two valid defines' );
define( `foo', `Hello World')
define(`furcht', `Hallo Welt')
In German foo is furcht.
Expand All @@ -19,7 +19,7 @@ OUT
}

{
language_output_is( 'm4', <<'CODE', <<'OUT', 'hello' );
language_output_is( 'm4', <<'CODE', <<'OUT', 'space in substitution' );
define( `foo', `Hello World ')
define(`furcht', `Hallo Welt')
In German foo is furcht.
Expand All @@ -31,7 +31,7 @@ OUT
}

{
language_output_is( 'm4', <<'CODE', <<'OUT', 'hello' );
language_output_is( 'm4', <<'CODE', <<'OUT', 'space in substitution 2' );
define( `foo', `Hello World ')
define(`furcht', `Hallo Welt')
In German foo is furcht.
Expand All @@ -43,7 +43,15 @@ OUT
}

{
language_output_is( 'm4', <<'CODE', <<'OUT', 'hello' );
language_output_is( 'm4', <<'CODE', <<'OUT', 'not a macro' );
define ( `foo', `Hello World ')
CODE
define ( foo, Hello World )
OUT
}

{
language_output_is( 'm4', <<'CODE', <<'OUT', 'only one macro' );
define ( `foo', `Hello World ')
define(`furcht', `Hallo Welt')
In German foo is furcht.
Expand Down
9 changes: 8 additions & 1 deletion t/basic/010_token_string.t
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,15 @@ use strict;
use FindBin;
use lib "$FindBin::Bin/../../lib", "$FindBin::Bin/../../../../lib";

use Parrot::Test tests => 2;
use Parrot::Test tests => 3;

{
language_output_is( 'm4', <<'CODE', <<'OUT', 'hello' );
`foo'
CODE
foo
OUT
}
{
language_output_is( 'm4', <<'CODE', <<'OUT', 'hello' );
define(`foo', `Hello World')
Expand Down
2 changes: 1 addition & 1 deletion t/builtins/011_eval.t
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ OUT
10 eval( ` 256 != 1024 / 4' )
11 eval( ` 256 <= 256' )
12 eval( ` 256 >= 256' )
13 eval( ` 256 > 256' )
13 eval( ` 254 > 256' )
14 eval( ` 256 > 256' )
15 eval( ` 256 > 257' )
16 eval( ` ! 256 > 257' )
Expand Down
Loading

0 comments on commit 0b15f0b

Please sign in to comment.