Skip to content

Commit

Permalink
* Fixed string escaping in ORegexp#to_str and ORegexp#inspect.
Browse files Browse the repository at this point in the history
* Added begin parameter to ORegexp#match.
  • Loading branch information
dichodaemon committed Apr 16, 2007
1 parent d3308bf commit 65843e1
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 12 deletions.
3 changes: 3 additions & 0 deletions History.txt
@@ -1,3 +1,6 @@
== 1.0.2 /
* Fixed string escaping in ORegexp#to_str and ORegexp#inspect.
* Added begin parameter to ORegexp#match.
== 1.0.1 / 2007-03-28
* Minimal recommended version of oniglib changed to be compatible with Ruby 1.9, now is 4.6 or higher.
* Restore check for onig version to build with 4.6
Expand Down
52 changes: 42 additions & 10 deletions ext/oregexp.c
Expand Up @@ -2,7 +2,7 @@
#include <oniguruma.h>
/*
TODO:
- Add named backreferences.
- Complete oregexp_match with range parameter.
*/

typedef struct _oregexp {
Expand Down Expand Up @@ -181,24 +181,53 @@ static VALUE oregexp_make_match_data(ORegexp * oregexp, OnigRegion * region, VAL

/*
* call-seq:
* rxp.match(str) => matchdata or nil
* rxp.match(str) => matchdata or nil
* rxp.match(str, begin, end) => matchdata or nil
*
* Returns a <code>MatchData</code> object describing the match, or
* <code>nil</code> if there was no match. This is equivalent to retrieving the
* value of the special variable <code>$~</code> following a normal match.
*
* /(.)(.)(.)/.match("abc")[2] #=> "b"
* ORegexp.new('(.)(.)(.)').match("abc")[2] #=> "b"
*
* The second form allows to perform the match in a region
* defined by <code>begin</code> and <code>end</code> while
* still taking into account look-behinds and look-forwards.
*
* ORegexp.new('1*2*').match('11221122').offset => [4,8]
* ORegexp.new('(?<=2)1*2*').match('11221122').offset => [4,8]
*
* Compare with:
*
* ORegexp.new('(?<=2)1*2*').match('11221122'[4..-1]) => nil
*/
static VALUE oregexp_match( VALUE self, VALUE string ) {
static VALUE oregexp_match( int argc, VALUE * argv, VALUE self ) {
ORegexp *oregexp;
Data_Get_Struct( self, ORegexp, oregexp );

VALUE string_str = StringValue( string );


if ( argc == 0 || argc > 2) {
rb_raise(rb_eArgError, "wrong number of arguments (%d for 2)", argc);
exit;
}

VALUE string_str = StringValue( argv[0] );
UChar* str_ptr = RSTRING(string_str)->ptr;
int str_len = RSTRING(string_str)->len;

int begin = 0;
int end = str_len;

if (argc > 1 ) {
begin = NUM2INT( argv[1] );
}
// if (argc > 2) {
// end = NUM2INT( argv[2] );
// }


OnigRegion *region = onig_region_new();
int r = onig_search(oregexp->reg, str_ptr, str_ptr + str_len, str_ptr, str_ptr + str_len, region, ONIG_OPTION_NONE);
int r = onig_search(oregexp->reg, str_ptr, str_ptr + str_len, str_ptr + begin, str_ptr + end, region, ONIG_OPTION_NONE);
rb_backref_set(Qnil);
if (r >= 0) {
VALUE matchData = oregexp_make_match_data( oregexp, region, string_str);
Expand Down Expand Up @@ -642,6 +671,7 @@ static VALUE oregexp_m_scan(VALUE self, VALUE str) {
return rb_ensure( oregexp_packed_scan, (VALUE)&call_args, oregexp_cleanup_region, (VALUE)region);
}


/**
* call-seq:
* rxp === str => true or false
Expand Down Expand Up @@ -671,7 +701,8 @@ static VALUE oregexp_m_eqq(VALUE self, VALUE str) {
}
}
StringValue(str);
match = oregexp_match(self, str);
VALUE args[] = {str};
match = oregexp_match(1, args, self);
if (Qnil == match) {
return Qfalse;
}
Expand All @@ -689,7 +720,8 @@ static VALUE oregexp_m_eqq(VALUE self, VALUE str) {
* ORegexp.new( 'SIT', :options => OPTION_IGNORECASE ) =~ "insensitive" #=> 5
**/
static VALUE oregexp_match_op(VALUE self, VALUE str) {
VALUE ret = oregexp_match(self, str);
VALUE args[] = {str};
VALUE ret = oregexp_match(1, args, self);
if(ret == Qnil)
return Qnil;
return INT2FIX(RMATCH(ret)->regs->beg[0]);
Expand All @@ -700,7 +732,7 @@ void Init_oregexp() {
VALUE cORegexp = rb_define_class_under(mOniguruma, "ORegexp", rb_cObject);
rb_define_alloc_func(cORegexp, oregexp_allocate);
rb_define_method( cORegexp, "initialize", oregexp_initialize, 2 );
rb_define_method( cORegexp, "match", oregexp_match, 1 );
rb_define_method( cORegexp, "match", oregexp_match, -1 );
rb_define_method( cORegexp, "=~", oregexp_match_op, 1 );
rb_define_method( cORegexp, "gsub", oregexp_m_gsub, -1 );
rb_define_method( cORegexp, "sub", oregexp_m_sub, -1 );
Expand Down
4 changes: 2 additions & 2 deletions lib/oniguruma.rb
Expand Up @@ -256,7 +256,7 @@ def to_s
opt_str += "x" if (@options[:options] & OPTION_EXTEND) == 0
end
opt_str += ")"
opt_str + ORegexp.escape( @pattern )
opt_str + @pattern
end


Expand All @@ -273,7 +273,7 @@ def inspect
opt_str += "i" if (@options[:options] & OPTION_IGNORECASE) > 0
opt_str += "m" if (@options[:options] & OPTION_MULTILINE) > 0
opt_str += "x" if (@options[:options] & OPTION_EXTEND) > 0
"/" + ORegexp.escape( @pattern ) + "/" + opt_str
"/" + @pattern + "/" + opt_str
end

# call-seq:
Expand Down
10 changes: 10 additions & 0 deletions test/test_oniguruma.rb
Expand Up @@ -35,6 +35,16 @@ def test_bad_initialization
def test_match
reg = Oniguruma::ORegexp.new( "(3.)(.*)(3.)" )
assert_not_nil( reg.match( "12345634" ) )

reg = Oniguruma::ORegexp.new( '1+2+')
s = "11221122"
assert_equal( [0,4], reg.match( s ).offset )
assert_equal( [1,4], reg.match( s, 1 ).offset )
assert_equal( [4,8], reg.match( s, 2).offset )

reg = Oniguruma::ORegexp.new( '(?<=2)1+2+')
assert_equal( [4,8], reg.match( s, 4 ).offset )
assert_equal( nil, reg.match( s[4..-1] ) )
end

def test_no_match
Expand Down

0 comments on commit 65843e1

Please sign in to comment.