Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

- FIX: Buggy resolution of numeric codes for encoding and syntax opti…

…ons (Nikolai Lugovoi)

- FIX: Buggy implementation of ORegexp#gsub and  ORegexp#gsub methods. Now code is all C (Nikolai Lugovoi)
  • Loading branch information...
commit c221f57d56f196c543692d90b60a40c823c9ff78 1 parent 8716b26
@dichodaemon dichodaemon authored
View
7 History.txt
@@ -1,4 +1,9 @@
-== 1.0.0 / 2007-03-19
+== 0.9.1 / 2007-03-25
+* FIX: Buggy resolution of numeric codes for encoding and syntax options (Nikolai Lugovoi)
+* FIX: Buggy implementation of ORegexp#gsub and ORegexp#gsub methods. Now code is all C (Nikolai Lugovoi)
+* Added documentation for class ORegexp
+
+== 0.9.0 / 2007-03-19
* 1 major enhancement
* Birthday!
View
146 ext/Makefile
@@ -1,146 +0,0 @@
-
-SHELL = /bin/sh
-
-#### Start of system configuration section. ####
-
-srcdir = .
-topdir = /usr/lib/ruby/1.8/i486-linux
-hdrdir = $(topdir)
-VPATH = $(srcdir):$(topdir):$(hdrdir)
-prefix = $(DESTDIR)/usr
-exec_prefix = $(DESTDIR)/usr
-sitedir = $(DESTDIR)/usr/local/lib/site_ruby
-rubylibdir = $(libdir)/ruby/$(ruby_version)
-docdir = $(datarootdir)/doc/$(PACKAGE)
-dvidir = $(docdir)
-datarootdir = $(prefix)/share
-archdir = $(rubylibdir)/$(arch)
-sbindir = $(exec_prefix)/sbin
-psdir = $(docdir)
-localedir = $(datarootdir)/locale
-htmldir = $(docdir)
-datadir = $(prefix)/share
-includedir = $(prefix)/include
-infodir = $(datarootdir)/info
-sysconfdir = $(DESTDIR)/etc
-mandir = $(datadir)/man
-libdir = $(DESTDIR)/usr/lib
-sharedstatedir = $(prefix)/com
-oldincludedir = $(DESTDIR)/usr/include
-pdfdir = $(docdir)
-sitearchdir = $(sitelibdir)/$(sitearch)
-bindir = $(exec_prefix)/bin
-localstatedir = $(DESTDIR)/var
-sitelibdir = $(sitedir)/$(ruby_version)
-libexecdir = $(exec_prefix)/libexec
-
-CC = gcc
-LIBRUBY = $(LIBRUBY_SO)
-LIBRUBY_A = lib$(RUBY_SO_NAME)-static.a
-LIBRUBYARG_SHARED = -l$(RUBY_SO_NAME)
-LIBRUBYARG_STATIC = -l$(RUBY_SO_NAME)-static
-
-CFLAGS = -fPIC -Wall -g -fno-strict-aliasing -O2 -fPIC
-CPPFLAGS = -I. -I$(topdir) -I$(hdrdir) -I$(srcdir)
-CXXFLAGS = $(CFLAGS)
-DLDFLAGS =
-LDSHARED = $(CC) -shared
-AR = ar
-EXEEXT =
-
-RUBY_INSTALL_NAME = ruby1.8
-RUBY_SO_NAME = ruby1.8
-arch = i486-linux
-sitearch = i486-linux
-ruby_version = 1.8
-ruby = /usr/bin/ruby1.8
-RUBY = $(ruby)
-RM = rm -f
-MAKEDIRS = mkdir -p
-INSTALL = /usr/bin/install -c
-INSTALL_PROG = $(INSTALL) -m 0755
-INSTALL_DATA = $(INSTALL) -m 644
-COPY = cp
-
-#### End of system configuration section. ####
-
-preload =
-
-libpath = $(libdir)
-LIBPATH = -L"$(libdir)"
-DEFFILE =
-
-CLEANFILES =
-DISTCLEANFILES =
-
-extout =
-extout_prefix =
-target_prefix =
-LOCAL_LIBS =
-LIBS = $(LIBRUBYARG_SHARED) -lonig -lpthread -ldl -lcrypt -lm -lc
-SRCS = oregexp.c
-OBJS = oregexp.o
-TARGET = oregexp
-DLLIB = $(TARGET).so
-STATIC_LIB =
-
-RUBYCOMMONDIR = $(sitedir)$(target_prefix)
-RUBYLIBDIR = $(sitelibdir)$(target_prefix)
-RUBYARCHDIR = $(sitearchdir)$(target_prefix)
-
-TARGET_SO = $(DLLIB)
-CLEANLIBS = $(TARGET).so $(TARGET).il? $(TARGET).tds $(TARGET).map
-CLEANOBJS = *.o *.a *.s[ol] *.pdb *.exp *.bak
-
-all: $(DLLIB)
-static: $(STATIC_LIB)
-
-clean:
- @-$(RM) $(CLEANLIBS) $(CLEANOBJS) $(CLEANFILES)
-
-distclean: clean
- @-$(RM) Makefile extconf.h conftest.* mkmf.log
- @-$(RM) core ruby$(EXEEXT) *~ $(DISTCLEANFILES)
-
-realclean: distclean
-install: install-so install-rb
-
-install-so: $(RUBYARCHDIR)
-install-so: $(RUBYARCHDIR)/$(DLLIB)
-$(RUBYARCHDIR)/$(DLLIB): $(DLLIB)
- $(INSTALL_PROG) $(DLLIB) $(RUBYARCHDIR)
-install-rb: pre-install-rb install-rb-default
-install-rb-default: pre-install-rb-default
-pre-install-rb: Makefile
-pre-install-rb-default: Makefile
-$(RUBYARCHDIR):
- $(MAKEDIRS) $@
-
-site-install: site-install-so site-install-rb
-site-install-so: install-so
-site-install-rb: install-rb
-
-.SUFFIXES: .c .m .cc .cxx .cpp .C .o
-
-.cc.o:
- $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c $<
-
-.cxx.o:
- $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c $<
-
-.cpp.o:
- $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c $<
-
-.C.o:
- $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c $<
-
-.c.o:
- $(CC) $(CFLAGS) $(CPPFLAGS) -c $<
-
-$(DLLIB): $(OBJS)
- @-$(RM) $@
- $(LDSHARED) $(DLDFLAGS) $(LIBPATH) -o $@ $(OBJS) $(LOCAL_LIBS) $(LIBS)
-
-
-
-$(OBJS): ruby.h defines.h
View
1  ext/extconf.rb
@@ -1,3 +1,4 @@
require 'mkmf'
have_library("onig")
+$CFLAGS='-Wall'
create_makefile( "oregexp" )
View
13 ext/mkmf.log
@@ -1,13 +0,0 @@
-have_library: checking for main() in -lonig... -------------------- yes
-
-"gcc -o conftest -I. -I/usr/lib/ruby/1.8/i486-linux -Wall -g -fno-strict-aliasing -O2 -fPIC conftest.c -L"/usr/lib" -lruby1.8-static -lonig -lpthread -ldl -lcrypt -lm -lc"
-checked program was:
-/* begin */
-
-/*top*/
-int main() { return 0; }
-int t() { main(); return 0; }
-/* end */
-
---------------------
-
View
343 ext/oregexp.c
@@ -24,7 +24,10 @@ static VALUE oregexp_allocate( VALUE klass ) {
}
-static OnigEncodingType * int2encoding( int index ) {
+static OnigEncodingType * int2encoding( VALUE v_index ) {
+ int index;
+ if( ! NIL_P(v_index) ) {
+ index = FIX2INT(v_index);
switch( index ) {
case 0: return ONIG_ENCODING_ASCII;
case 1: return ONIG_ENCODING_ISO_8859_1;
@@ -60,10 +63,14 @@ static OnigEncodingType * int2encoding( int index ) {
case 31: return ONIG_ENCODING_GB18030;
case 32: return ONIG_ENCODING_UNDEF;
}
+ }
return ONIG_ENCODING_UNDEF;
}
-static OnigSyntaxType * int2syntax( int index ) {
+static OnigSyntaxType * int2syntax( VALUE v_index ) {
+ int index;
+ if( ! NIL_P(v_index) ) {
+ index = FIX2INT(v_index);
switch( index ) {
case 0: return ONIG_SYNTAX_ASIS;
case 1: return ONIG_SYNTAX_POSIX_BASIC;
@@ -77,25 +84,32 @@ static OnigSyntaxType * int2syntax( int index ) {
case 9: return ONIG_SYNTAX_RUBY;
case 10: return ONIG_SYNTAX_DEFAULT;
}
+ }
return ONIG_SYNTAX_DEFAULT;
}
+struct callback_packet {
+ VALUE hash;
+ OnigRegion * region;
+};
+
static int name_callback(
const UChar* name,
const UChar* name_end,
int ngroup_num,
int* group_nums,
regex_t* reg,
- void* arg
+ struct callback_packet* arg
) {
int i, gn, ref;
- OnigRegion *region = (OnigRegion* )arg;
+ OnigRegion *region = arg->region;
+ VALUE nameHash = arg->hash;
for (i = 0; i < ngroup_num; i++) {
gn = group_nums[i];
ref = onig_name_to_backref_number(reg, name, name_end, region);
if (ref != gn )
- rb_raise(rb_eException, "Oniguruma Error: group and backreference names are different");
+ return 1;
rb_hash_aset( nameHash, ID2SYM(rb_intern(name)), INT2FIX( gn ) );
}
return 0;
@@ -110,13 +124,16 @@ static VALUE oregexp_initialize( VALUE self, VALUE pattern, VALUE options ) {
rb_iv_set( self, "@options", options );
UChar* pat_ptr = RSTRING(pattern_str)->ptr;
int pat_len = RSTRING(pattern_str)->len;
+ if( pat_len == 0 ) {
+ rb_raise(rb_eArgError, "Empty pattern makes no sense.");
+ }
VALUE rOptions = rb_hash_aref( options, ID2SYM( rb_intern( "options" ) ) );
VALUE rEncoding = rb_hash_aref( options, ID2SYM( rb_intern( "encoding" ) ) );
VALUE rSyntax = rb_hash_aref( options, ID2SYM( rb_intern( "syntax" ) ) );
int iOptions = NUM2INT( rOptions );
- int iEncoding = int2encoding( rEncoding );
- int iSyntax = int2syntax( rSyntax );
+ OnigEncodingType * iEncoding = int2encoding( rEncoding );
+ OnigSyntaxType * iSyntax = int2syntax( rSyntax );
int r;
@@ -130,6 +147,40 @@ static VALUE oregexp_initialize( VALUE self, VALUE pattern, VALUE options ) {
return self;
}
+struct RMatch {
+ struct RBasic basic;
+ VALUE str;
+ struct re_registers *regs;
+};
+
+static VALUE oregexp_make_match_data(ORegexp * oregexp, OnigRegion * region, VALUE string_str) {
+ VALUE rb_cMatch = rb_const_get(rb_cObject, rb_intern("MatchData")) ;
+ NEWOBJ(match, struct RMatch);
+ OBJSETUP(match, rb_cMatch, T_MATCH);
+ VALUE kORegexp = rb_const_get( mOniguruma, rb_intern( "ORegexp" ) ) ;
+ int i , count = region->num_regs;
+ struct callback_packet packet;
+
+ match->str = rb_str_new4(string_str);
+ match->regs = ALLOC(struct re_registers);
+ match->regs->allocated = count+1;
+ match->regs->num_regs = count;
+ match->regs->beg = ALLOC_N(int, (count+1));
+ match->regs->end = ALLOC_N(int, (count+1));
+
+ for ( i = 0; i <= count; i++){
+ match->regs->beg[i] = region->beg[i];
+ match->regs->end[i] = region->end[i];
+ }
+ rb_cv_set( kORegexp, "@@last_match", (VALUE)match );
+ packet.region = region;
+ packet.hash = rb_hash_new();
+ if( onig_foreach_name(oregexp->reg, name_callback, &packet) )
+ rb_raise(rb_eException, "Oniguruma Error: group and backreference names are different");
+ rb_iv_set((VALUE)match, "@named_captures", packet.hash);
+ return (VALUE)match;
+}
+
/*
* call-seq:
* rxp.match(str) => matchdata or nil
@@ -151,25 +202,7 @@ static VALUE oregexp_match( VALUE self, VALUE string ) {
OnigRegion *region = onig_region_new();
int r = onig_search(oregexp->reg, str_ptr, str_ptr + str_len, str_ptr, str_ptr + str_len, region, ONIG_OPTION_NONE);
if (r >= 0) {
-
- VALUE begins = rb_ary_new();
- VALUE ends = rb_ary_new();
- nameHash = rb_hash_new();
-
- onig_foreach_name(oregexp->reg, name_callback, (void* )region);
-
-
- int i;
-
- for (i = 0; i < region->num_regs; i++) {
- rb_ary_push( begins, INT2FIX( region->beg[i] ) );
- rb_ary_push( ends, INT2FIX( region->end[i] ) );
- }
- VALUE kMatchData = rb_const_get( mOniguruma, rb_intern( "MatchData" ) );
- VALUE kORegexp = rb_const_get( mOniguruma, rb_intern( "ORegexp" ) );
- VALUE matchData = rb_funcall(kMatchData, rb_intern("new"), 4, string_str, begins, ends, nameHash );
- rb_cv_set( kORegexp, "@@last_match", matchData );
-
+ VALUE matchData = oregexp_make_match_data( oregexp, region, string_str);
onig_region_free(region, 1 );
return matchData;
} else if (r == ONIG_MISMATCH) {
@@ -184,11 +217,267 @@ static VALUE oregexp_match( VALUE self, VALUE string ) {
}
+static const UChar BACKSLASH = 0x5c;
+
+/* Additional backslash sequences work in substitution strings: \& (last match), \+ (last
+matched group), \` (string prior to match), \' (string after match), and \\ (a literal
+backslash). */
+
+/* scan the replacement text, looking for substitutions (\n) and \escapes. */
+static VALUE
+oregexp_get_replacement(pat, src_text, repl_text, region)
+ VALUE pat,
+ src_text,
+ repl_text;
+ OnigRegion * region;
+{
+ ORegexp *oregexp;
+ VALUE ret;
+ int32_t replIdx = 0;
+ int32_t replacementLength = RSTRING(repl_text)->len;
+ UChar *replacementText = RSTRING(repl_text)->ptr;
+ UChar *replacementEnd = replacementText + (replacementLength-1);
+ long numDigits = 0;
+ long groupNum = 0, g_start, g_end;
+ OnigCodePoint digitC;
+ OnigEncoding enc;
+ const UChar * matchText;
+ long matchLen;
+
+ matchText = RSTRING(src_text)->ptr;
+ matchLen = RSTRING(src_text)->len;
+ Data_Get_Struct( pat, ORegexp, oregexp );
+ enc = onig_get_encoding( oregexp->reg );
+
+ ret = rb_str_buf_new(RSTRING(repl_text)->len);
+
+ while (replIdx < replacementLength) {
+ OnigCodePoint c = ONIGENC_MBC_TO_CODE(enc, replacementText+replIdx, replacementEnd);
+ int c_len =ONIGENC_MBC_ENC_LEN(enc, replacementText+replIdx) ;
+ replIdx += c_len;
+ if ( c != BACKSLASH) {
+ /* Common case, no substitution, no escaping, */
+ /* just copy the char to the dest buf. */
+ rb_str_buf_cat( ret, replacementText+replIdx-c_len, c_len);
+ continue;
+ }
+ if (replIdx >= replacementLength) {
+ rb_str_buf_cat(ret, replacementText+(replIdx-c_len), c_len);
+ break;
+ }
+ /* Pick up a capture group number if one follows. */
+ numDigits = 0;
+ groupNum = 0;
+ for (;;) {
+ if (replIdx >= replacementLength) {
+ break;
+ }
+ digitC = ONIGENC_MBC_TO_CODE(enc, replacementText+replIdx, replacementEnd);
+ c_len = ONIGENC_MBC_ENC_LEN(enc, replacementText+replIdx) ;
+ if ( ! ONIGENC_IS_CODE_DIGIT(enc, digitC) ) {
+ break;
+ }
+ replIdx += c_len;
+ groupNum=groupNum*10 + (digitC - '0');
+ numDigits++;
+ if (numDigits >= 2) { /* limit 99 groups */
+ break;
+ }
+ }
+ if (numDigits == 0) {
+ /* Additional backslash sequences work in substitution strings: \& (last match), \+ (last
+ matched group), \` (string prior to match), \' (string after match), and \\ (a literal
+ backslash). */
+ int p_len = c_len;
+ c = ONIGENC_MBC_TO_CODE(enc, replacementText+replIdx, replacementEnd);
+ c_len = ONIGENC_MBC_ENC_LEN(enc, replacementText+replIdx) ;
+ switch(c) {
+ case '&' : // matched substring
+ rb_str_buf_cat(ret, matchText+region->beg[0], region->end[0] - region->beg[0]);
+ replIdx += c_len;
+ break;
+ case '`' : // prematch
+ rb_str_buf_cat(ret, matchText, region->beg[0]);
+ replIdx += c_len;
+ break;
+ case '\'': // postmatch
+ rb_str_buf_cat(ret, matchText+region->end[0], matchLen - region->end[0]);
+ replIdx += c_len;
+ break;
+ case '\\': // literal backslash
+ // place single backslash
+ rb_str_buf_cat(ret, replacementText+replIdx, c_len);
+ replIdx += c_len;
+ break;
+ case '+': // last matched group
+ replIdx += c_len;
+ for(groupNum = region->num_regs; groupNum > 0; groupNum --) {
+ g_start = region->beg[ groupNum ];
+ g_end = region->end[ groupNum ];
+ if( g_start != -1 ) {
+ rb_str_buf_cat(ret, matchText+g_start, g_end-g_start);
+ break;
+ }
+ }
+ break;
+
+ default:
+ rb_str_buf_cat(ret, replacementText+(replIdx-p_len), p_len+c_len);
+ replIdx += c_len;
+
+ }
+ } else {
+ /* Finally, append the capture group data to the destination. */
+ if( groupNum < region->num_regs && region->beg[groupNum] >= 0 && region->end[groupNum]>= region->beg[groupNum] ) {
+ rb_str_buf_cat(ret, matchText+region->beg[groupNum], region->end[groupNum]-region->beg[groupNum]);
+ }
+ }
+ }
+ return ret;
+}
+
+static inline void
+str_mod_check(s, p, len)
+ VALUE s;
+ char *p;
+ long len;
+{
+ if (RSTRING(s)->ptr != p || RSTRING(s)->len != len) {
+ rb_raise(rb_eRuntimeError, "string modified");
+ }
+}
+
+static VALUE
+oregexp_gsub(self, argc, argv, bang, once, region)
+ VALUE self; // pattern
+ int argc; // should be 1 if block given
+ VALUE *argv; // either replacement string
+ int bang;
+ int once;
+ OnigRegion *region;
+{
+ VALUE repl;
+ long beg,
+ end,
+ prev_end;
+ int tainted = 0,
+ iter = 0;
+
+ VALUE buf, curr_repl, block_res;
+ ORegexp *oregexp;
+
+ if (argc == 1 && rb_block_given_p()) {
+ iter = 1;
+ } else if (argc == 2) {
+ repl = argv[1];
+ Check_Type(repl, T_STRING);
+ if (OBJ_TAINTED(argv[1]))
+ tainted = 1;
+ } else {
+ rb_raise(rb_eArgError, "wrong number of arguments (%d for 2)", argc);
+ }
+ Data_Get_Struct( self, ORegexp, oregexp );
+
+ VALUE string_str = StringValue( argv[0] );
+ UChar* str_ptr = RSTRING(string_str)->ptr;
+ int str_len = RSTRING(string_str)->len;
+
+ beg = onig_search(oregexp->reg, str_ptr, str_ptr + str_len, str_ptr, str_ptr + str_len, region, ONIG_OPTION_NONE);
+
+ if (beg < 0) {
+ /* no match */
+ if (bang)
+ return Qnil;
+ return rb_str_dup(string_str);
+ }
+ end = 0;
+ buf = rb_str_buf_new(str_len);
+ do {
+ prev_end = end;
+ beg = region->beg[0];
+ end = region->end[0];
+ rb_str_buf_cat(buf, str_ptr+prev_end, beg-prev_end);
+ if ( iter ) {
+ VALUE match_data = oregexp_make_match_data( oregexp, region, string_str );
+ rb_backref_set(match_data);
+ if( once )
+ block_res = rb_yield( match_data );
+ else {
+ VALUE match_string = rb_str_new( str_ptr+beg, end-beg);
+ block_res = rb_yield_values(2, match_string, match_data );
+ }
+ str_mod_check( string_str, str_ptr, str_len);
+ curr_repl = rb_obj_as_string(block_res);
+ } else {
+ curr_repl = oregexp_get_replacement(self, string_str, repl, region);
+ }
+ rb_str_append(buf, curr_repl);
+ if( once ) break;
+ // find next match
+ beg=onig_search(oregexp->reg, str_ptr, str_ptr + str_len,
+ str_ptr+end, str_ptr + str_len,
+ region, ONIG_OPTION_NONE);
+ } while ( beg >= 0);
+ rb_str_buf_cat( buf, str_ptr+end, str_len - end);
+
+ if(tainted)
+ OBJ_INFECT(buf, repl);
+ OBJ_INFECT(buf, string_str);
+ if (bang) {
+ rb_funcall(string_str, rb_intern("replace"), 1, buf);
+ return string_str;
+ } else {
+ return buf;
+ }
+}
+
+typedef struct gsub_packet_t {
+ VALUE self; // pattern
+ int argc; // should be 1 if block given
+ VALUE *argv; // either replacement string
+ int bang;
+ int once;
+ OnigRegion *region;
+} gsub_packet;
+static VALUE oregexp_packed_gsub( gsub_packet* args ) {
+ return oregexp_gsub(args->self, args->argc, args->argv, args->bang, args->once, args->region);
+}
+void oregexp_cleanup_region(OnigRegion * region){
+ onig_region_free(region, 1);
+}
+static VALUE oregexp_safe_gsub(self, argc, argv, bang, once)
+ VALUE self; // pattern
+ int argc; // should be 1 if block given
+ VALUE *argv; // either replacement string
+ int bang;
+ int once;
+{
+ OnigRegion * region = onig_region_new();
+ gsub_packet call_args = {self, argc, argv, bang, once, region};
+ return rb_ensure( oregexp_packed_gsub, (VALUE)&call_args, oregexp_cleanup_region, (VALUE)region);
+}
+static VALUE oregexp_m_gsub(int argc, VALUE *argv, VALUE self) {
+ return oregexp_safe_gsub(self, argc, argv, 0, 0);
+}
+static VALUE oregexp_m_sub(int argc, VALUE *argv, VALUE self) {
+ return oregexp_safe_gsub(self, argc, argv, 0, 1);
+}
+
+static VALUE oregexp_m_gsub_bang(int argc, VALUE *argv, VALUE self) {
+ return oregexp_safe_gsub(self, argc, argv, 1, 0);
+}
+static VALUE oregexp_m_sub_bang(int argc, VALUE *argv, VALUE self) {
+ return oregexp_safe_gsub(self, argc, argv, 1, 1);
+}
+
void Init_oregexp() {
mOniguruma = rb_define_module("Oniguruma");
VALUE cORegexp = rb_define_class_under(mOniguruma, "ORegexp", rb_cObject);
rb_define_alloc_func(cORegexp, oregexp_allocate);
rb_define_method( cORegexp, "initialize", oregexp_initialize, 2 );
rb_define_method( cORegexp, "match", oregexp_match, 1 );
-
+ rb_define_method( cORegexp, "gsub", oregexp_m_gsub, -1 );
+ rb_define_method( cORegexp, "sub", oregexp_m_sub, -1 );
+ rb_define_method( cORegexp, "gsub!", oregexp_m_gsub_bang, -1 );
+ rb_define_method( cORegexp, "sub!", oregexp_m_sub_bang, -1 );
}
View
BIN  ext/oregexp.so
Binary file not shown
View
150 lib/oniguruma.rb
@@ -254,7 +254,7 @@ def =~ string
return nil unless string
m = match( string )
return nil unless m
- m.begin
+ m.begin(0)
end
# call-seq:
@@ -289,7 +289,7 @@ def match_all string
matches << m
positions << position
tmp_string = m.post_match
- position += m.end
+ position += m.end(0)
#if m.end == m.begin
# tmp_string = tmp_string[1..-1]
# position += 1
@@ -304,51 +304,6 @@ def match_all string
nil
end
end
-
- def sub string, replacement = nil
- matches = match( string )
- if matches
- replacement = yield matches[0] unless replacement
- string.sub( matches[0], replacement )
- else
- return string
- end
- end
-
- def gsub string, replacement = nil
- result = string
- matches = match_all( string )
- string_replace = replacement
- if matches
- matches.each do |m, p|
- replacement = yield( m[0], m ) unless string_replace
- result = result.sub( m[0], replacement )
- end
- end
- result
- end
-
- def sub! string, replacement = nil
- matches = match( string )
- if matches
- replacement = yield matches[0] unless replacement
- string.sub!( matches[0], replacement )
- else
- return string
- end
- end
-
- def gsub! string, replacement = nil
- matches = match_all( string )
- string_replace = replacement
- if matches
- matches.each do |m, p|
- replacement = yield( m[0], m ) unless string_replace
- string.sub!( m[0], replacement )
- end
- end
- string
- end
end
class MultiMatchData
@@ -371,11 +326,11 @@ def [] ( value1, value2 = nil )
end
def begin index
- @matches[index].begin + @positions[index]
+ @matches[index].begin(0) + @positions[index]
end
def end index
- @matches[index].end + @positions[index]
+ @matches[index].end(0) + @positions[index]
end
def length
@@ -402,90 +357,15 @@ def each
end
end
- class MatchData
- def initialize( string, starts, ends, names )
- @string = string
- @starts = starts
- @ends = ends
- @matches = []
- @starts.size.times do |i|
- @matches << @string[@starts[i]...@ends[i]]
- end
- @match_count = @matches.size
- @start_pos = 0
- @names = names
- end
-
- def [] ( value1, value2 = nil )
- unless value2
- if index = to_index( value1 )
- @matches[index]
- else
- nil
- end
- else
- @matches[value1, value2]
- end
- end
-
- def to_index name
- if name.is_a? Symbol
- @names[name]
- else
- name
- end
- end
-
- def begin index = 0
- @starts[to_index( index )]
- end
-
- def end index = 0
- @ends[to_index( index )]
- end
-
- def captures
- @matches[1..-1]
- end
-
- def length
- @match_count
- end
- alias size length
-
- def offset index = 0
- [@starts[to_index( index )], @ends[to_index( index )]]
- end
-
- def post_match
- @string[@ends[0], @string.length]
- end
-
- def pre_match
- @string[0, @starts[0]]
- end
-
- def select &block
- @matches.select( &block )
- end
-
- def string
- @string.freeze
- end
-
- def to_a
- @matches
- end
-
- def to_s
- @matches[0]
- end
-
- def values_at *values
- result = []
- values.each { |v| result << @matches[v] }
- result
- end
- end
end
-
+class ::MatchData
+ alias old_aref :[]
+ def [](*idx)
+ if idx[0].is_a?(Symbol)
+ k = @named_captures && @named_captures[idx[0]]
+ k && old_aref(k)
+ else
+ old_aref(*idx)
+ end
+ end
+end
View
63 test/test_oniguruma.rb
@@ -210,5 +210,68 @@ def test_group_by_name
assert_equal( ')', matches[:end] )
assert_equal( nil, matches[:inexistent])
end
+
+ def test_utf8_ignore_case
+ reg = Oniguruma::ORegexp.new( '([а-я])+', :options => Oniguruma::OPTION_IGNORECASE, :encoding => Oniguruma::ENCODING_UTF8 )
+ matches = reg.match("Text: Ехал Грека Через Реку")
+ assert_not_nil( matches )
+ assert_equal("Ехал", matches[0])
+ reg = Oniguruma::ORegexp.new( 'р(уби.*)', :options => Oniguruma::OPTION_IGNORECASE, :encoding => Oniguruma::ENCODING_UTF8 )
+ assert_equal("*убил бы*", reg.gsub("Руби", '*\1л бы*') )
+ end
+
+ def test_utf8_gsub
+ reg = Oniguruma::ORegexp.new( '([а-я])([а-я])([а-я]+)', :options => Oniguruma::OPTION_IGNORECASE, :encoding => Oniguruma::ENCODING_UTF8 )
+ new_str = reg.gsub("Text: Ехал Грека Через Реку") {|s,m| m[1]*2+m[2]*2+m[3] }
+ assert_equal("Text: ЕЕххал ГГррека ЧЧеерез РРееку", new_str)
+ end
+
+ def test_utf8_gsub2
+ reg = Oniguruma::ORegexp.new( '[а-я]', :options => Oniguruma::OPTION_IGNORECASE, :encoding => Oniguruma::ENCODING_UTF8 )
+ new_str = reg.gsub("Text: Ехал Грека Через Реку") {|s,m| s*2 }
+ assert_equal("Text: ЕЕххаалл ГГррееккаа ЧЧеерреезз РРееккуу", new_str)
+ end
+ def test_sub_compatibility
+ $x = "a.gif"
+ assert_equal("b.gif", $x.osub('.*\.([^\.]+)$', 'b.\1'))
+ assert_equal("\\.gif", $x.osub('.*\.([^\.]+)$', '\\.\1'))
+ assert_equal("gif", $x.osub('.*\.([^\.]+)$', '\1'))
+ assert_equal("", $x.osub('.*\.([^\.]+)$', '\2'))
+ assert_equal("ab", $x.osub('.*\.([^\.]+)$', 'a\2b'))
+ assert_equal("<a.gif>", $x.osub('.*\.([^\.]+)$', '<\&>'))
+ assert_equal("a.a.", $x.osub('(gif)', '\`') )
+ end
+
+ class ::String
+ def ogsub(*args)
+ Oniguruma::ORegexp.new(args.shift).gsub(self, *args)
+ end
+ def ogsub!(*args)
+ Oniguruma::ORegexp.new(args.shift).gsub!(self, *args)
+ end
+ def osub(re, *args)
+ Oniguruma::ORegexp.new( re ).sub(self, *args)
+ end
+ end
+
+ def test_gsub_compat
+ assert_equal("hello".ogsub('[aeiou]', '*') , "h*ll*")
+ assert_equal("hello".ogsub('([aeiou])', '<\1>') , "h<e>ll<o>")
+ i = 0
+ assert_equal("12345" , Oniguruma::ORegexp.new('.').gsub("hello") {|s,m| i+=1; i.to_s})
+ assert_equal("214365", Oniguruma::ORegexp.new('(.)(.)').gsub("123456") {|s,m| m[2] + m[1] })
+ a = "test"
+ a.ogsub!('t', a)
+ assert_equal("testestest", a)
+ end
+
+ def test_match_compat
+ t = Oniguruma::ORegexp.new('(.)(.)').gsub("123456") {|s,m| "#$2#$1" }
+ assert_equal("214365", t )
+ t = Oniguruma::ORegexp.new('([aeiou])').gsub("hello") {|s,m| "<#$1>" }
+ assert_equal( "h<e>ll<o>", t)
+ end
+
+
end
Please sign in to comment.
Something went wrong with that request. Please try again.