Permalink
Browse files

* Added documentation for MatchData.

* Added ogsub, ogsub!, sub and sub! to ::String.
* Removed ::String definitions from tests.
* Now the minimal recommended version of Oniguruma is 5.5 or higher.
* Removed ugly #if statements from c code.
* Updated rakefile and History.txt for v1.0.0.
  • Loading branch information...
1 parent 1108b46 commit 62c7eac6a01efdbd93a1f41f19d7cd42f7cf04ba @dichodaemon dichodaemon committed Mar 27, 2007
Showing with 176 additions and 28 deletions.
  1. +31 −0 History.txt
  2. +3 −2 README.txt
  3. +1 −1 Rakefile
  4. +0 −13 ext/oregexp.c
  5. +141 −0 lib/oniguruma.rb
  6. +0 −12 test/test_oniguruma.rb
View
@@ -1,3 +1,34 @@
+== 1.0.0 / 2007-03-27
+* Added documentation for MatchData.
+* Added ogsub, ogsub!, sub and sub! to ::String.
+* Removed ::String definitions from tests.
+* Now the minimal recommended version of oniglib is 5.5 or higher.
+* Removed ugly #if statements from c code.
+* Do not create @named_captures hash if there are no named groups for regexp -- somewhat improve speed for repetive calls
+* Fixed usage of named backreferences in gsub with non-ascii names
+* Move ORegexp#=~ to C code, make it work just like Regexp#=~, i.e. set $~. Throw ArgumentError instead of Exception if pattern does not compile
+* Fix implementation of ORegexp#===, so it now does not raise errors in case statement anymore
+ (resembles plain Ruby Regexp#=== behaviour)
+* Modified begin, end and offset methods in MatchData to handle named groups and default to group 0.
+* Exception is not longer thrown when in oregexp_make_match_data.
+* Removed references to MultiMatchData from documentation
+* Removed class MultiMatchData
+* Fix off by one error in region->num_regs usage
+* Fix dumb bug with zero-width matches that made infinite loops. now consume at least one char in gsub and scan
+* ORegexp API changes:
+ * Pass only MatchData to sub/gsub with blocks
+ oregexp.sub( str ) {|match_data| ... }
+ oregexp.gsub( str ) {|match_data| ... }
+ * Add ORegexp#scan instead of match_all
+ oregexp.scan(str) {|match_data| ... } # => MultiMatchData
+ * Friendly way to set options
+ ORegexp.new( pattern, options_str, encoding, syntax)
+ ORegexp.new('\w+', 'imsx', 'koi8r', 'perl')
+ * Named backreferences in substitions
+ ORegexp.new('(?<pre>\w+)\d+(?<after>\w+)').sub('abc123def', '\<after>123\<pre>') #=> 'def123abc'
+* couple of bugfixes with region's num_regs
+* some docs for substitution methods added
+
== 0.9.1 / 2007-03-25
* FIX: Buggy resolution of numeric codes for encoding and syntax options (Nikolai Lugovoi)
* FIX: Buggy implementation of ORegexp#gsub and ORegexp#gsub methods. Now code is all C (Nikolai Lugovoi)
View
@@ -8,6 +8,7 @@ Ruby bindings to the Oniguruma[http://www.geocities.jp/kosako3/oniguruma/] regul
* Same interface than standard Regexp class (easy transition!).
* Support for named groups, look-ahead, look-behind, and other
cool features!
+* Support for other regexp syntaxes (Perl, Python, Java, etc.)
== SYNOPSIS:
@@ -23,7 +24,7 @@ Consult the Syntax.txt[link:files/Syntax_txt.html] page.
== REQUIREMENTS:
-* Oniguruma[http://www.geocities.jp/kosako3/oniguruma/] library v. 2.0 or greater
+* Oniguruma[http://www.geocities.jp/kosako3/oniguruma/] library v. 5.5 or higher
== INSTALL:
@@ -43,7 +44,7 @@ sudo gem install -r oniguruma
== CREDITS:
-* N. Lugovoi. ORegexp.sub and ORegexp.gsub code, plus other patches.
+* N. Lugovoi. ORegexp.sub and ORegexp.gsub code and lots of other stuff.
* K. Kosako. For his great library.
* A lot of the documentation has been copied from the original Ruby Regex documentation.
View
@@ -3,7 +3,7 @@ require 'hoe'
class Hoe; def extra_deps; @extra_deps.reject { |x| Array(x).first == 'hoe' }; end end
-Hoe.new('oniguruma', '0.9.1') do |p|
+Hoe.new('oniguruma', '1.0.0') do |p|
p.rubyforge_name = 'oniguruma'
p.author = 'Dizan Vasquez'
p.email = 'dix_ans@yahoo.com'
View
@@ -47,26 +47,20 @@ static OnigEncodingType * int2encoding( VALUE v_index ) {
case 15: return ONIG_ENCODING_ISO_8859_15;
case 16: return ONIG_ENCODING_ISO_8859_16;
case 17: return ONIG_ENCODING_UTF8;
-#if ONIGURUMA_VERSION_MAJOR != 2
case 18: return ONIG_ENCODING_UTF16_BE;
case 19: return ONIG_ENCODING_UTF16_LE;
case 20: return ONIG_ENCODING_UTF32_BE;
case 21: return ONIG_ENCODING_UTF32_LE;
-#endif
case 22: return ONIG_ENCODING_EUC_JP;
case 23: return ONIG_ENCODING_EUC_TW;
case 24: return ONIG_ENCODING_EUC_KR;
case 25: return ONIG_ENCODING_EUC_CN;
case 26: return ONIG_ENCODING_SJIS;
/*case 27: return ONIG_ENCODING_KOI8;*/
case 28: return ONIG_ENCODING_KOI8_R;
-#if ONIGURUMA_VERSION_MAJOR == 5
case 29: return ONIG_ENCODING_CP1251;
-#endif
case 30: return ONIG_ENCODING_BIG5;
-#if ONIGURUMA_VERSION_MAJOR != 2
case 31: return ONIG_ENCODING_GB18030;
-#endif
case 32: return ONIG_ENCODING_UNDEF;
}
}
@@ -78,19 +72,15 @@ static OnigSyntaxType * int2syntax( VALUE v_index ) {
if( ! NIL_P(v_index) ) {
index = FIX2INT(v_index);
switch( index ) {
-#if ONIGURUMA_VERSION_MAJOR != 2
case 0: return ONIG_SYNTAX_ASIS;
-#endif
case 1: return ONIG_SYNTAX_POSIX_BASIC;
case 2: return ONIG_SYNTAX_POSIX_EXTENDED;
case 3: return ONIG_SYNTAX_EMACS;
case 4: return ONIG_SYNTAX_GREP;
case 5: return ONIG_SYNTAX_GNU_REGEX;
case 6: return ONIG_SYNTAX_JAVA;
case 7: return ONIG_SYNTAX_PERL;
-#if ONIGURUMA_VERSION_MAJOR != 2
case 8: return ONIG_SYNTAX_PERL_NG;
-#endif
case 9: return ONIG_SYNTAX_RUBY;
case 10: return ONIG_SYNTAX_DEFAULT;
}
@@ -233,9 +223,6 @@ matched group), \` (string prior to match), \' (string after match), and \\ (a l
backslash). */
/* scan the replacement text, looking for substitutions (\n) and \escapes. */
-#if ONIGURUMA_VERSION_MAJOR == 2
-#define ONIGENC_MBC_ENC_LEN(e, p) enc_len(e, *(p))
-#endif
static VALUE
oregexp_get_replacement(pat, src_text, repl_text, region)
VALUE pat,
View
@@ -276,6 +276,12 @@ def inspect
"/" + ORegexp.escape( @pattern ) + "/" + opt_str
end
+ # call-seq:
+ # rxp.source => str
+ #
+ # Returns the original string of the pattern.
+ #
+ # ORegex.new( 'ab+c', 'ix' ).source #=> "ab+c"
def source
@pattern.freeze
end
@@ -285,12 +291,71 @@ def source
end
end
+
+class ::String
+ # Calls <code>Oniguruma::ORegexp#gsub</code> on this string.
+ def ogsub(*args)
+ Oniguruma::ORegexp.new(args.shift).gsub(self, *args)
+ end
+
+ # Calls <code>Oniguruma::ORegexp#gsub!</code> on this string.
+ def ogsub!(*args)
+ Oniguruma::ORegexp.new(args.shift).gsub!(self, *args)
+ end
+
+ # Calls <code>Oniguruma::ORegexp#sub</code> on this string.
+ def osub(re, *args)
+ Oniguruma::ORegexp.new( re ).sub(self, *args)
+ end
+
+ # Calls <code>Oniguruma::ORegexp#sub!</code> on this string.
+ def osub!(re, *args)
+ Oniguruma::ORegexp.new( re ).sub(self, *args)
+ end
+end
+
class ::MatchData
+ # call-seq:
+ # to_index[symbol] => int or nil
+ #
+ # Returns the group index for the corresponding named group, or
+ # <code>nil</code> if the group does not exist.
+ #
+ # m = ORegexp.new( '(?<begin>^.*?)(?<middle>\d)(?<end>.*)' ).match("THX1138")
+ # m.to_index[:begin] #=> 1
+ # m.to_index[:unknown] #=> nil
def to_index symbol
@named_captures && @named_captures[symbol]
end
alias old_aref :[]
+
+ # call-seq:
+ # mtch[i] => obj
+ # mtch[start, length] => array
+ # mtch[range] => array
+ # mtch[symbol] => obj
+ #
+ # <code>MatchData</code> acts as an array, and may be
+ # accessed using the normal array indexing techniques. <i>mtch</i>[0] is
+ # equivalent to the special variable <code>$&</code>, and returns the entire
+ # matched string. <i>mtch</i>[1], <i>mtch</i>[2], and so on return the values
+ # of the matched backreferences (portions of the pattern between parentheses).
+ #
+ # m = ORegexp.new( '(.)(.)(\d+)(\d)' ).match("THX1138.")
+ # m[0] #=> "HX1138"
+ # m[1, 2] #=> ["H", "X"]
+ # m[1..3] #=> ["H", "X", "113"]
+ # m[-3, 2] #=> ["X", "113"]
+ #
+ # If a symbol is used as index, the corresponding named group is returned,
+ # or <code>nil</code> if such a group does not exist.
+ #
+ # m = ORegexp.new( '(?<begin>^.*?)(?<middle>\d)(?<end>.*)' ).match("THX1138")
+ # m[:begin] #=> "THX"
+ # m[:moddle] #=> "1"
+ # m[:end] #=> "138"
+
def [](*idx)
if idx[0].is_a?(Symbol)
k = to_index( idx[0] )
@@ -301,6 +366,32 @@ def [](*idx)
end
alias old_begin :begin
+
+ # call-seq:
+ # mtch.begin(n) => integer
+ # mtch.begin => integer
+ # mtch.begin(symbol) => integer
+ #
+ # Returns the offset of the start of the <em>n</em>th element of the match
+ # array in the string.
+ #
+ # m = ORegexp.new( '(.)(.)(\d+)(\d)' ).match("THX1138.")
+ # m.begin(0) #=> 1
+ # m.begin(2) #=> 2
+ #
+ # If no arguments are given, the index of the
+ # first matching character is returned.
+ #
+ # m = ORegexp.new( '(.)(.)(\d+)(\d)' ).match("THX1138.")
+ # m.begin #=> 1
+ #
+ # If the argument is a symbol, then the beginning of the
+ # corresponding named group is returned, or <code>nil</code>
+ # if the group does not exist.
+ #
+ # m = ORegexp.new( '(?<begin>^.*?)(?<middle>\d)(?<end>.*)' ).match("THX1138")
+ # m.begin(:middle) #=> 3
+
def begin(*idx)
if idx[0].is_a?(Symbol)
k = to_index( idx[0] )
@@ -313,6 +404,30 @@ def begin(*idx)
end
alias old_end :end
+
+ # call-seq:
+ # mtch.end(n) => integer
+ #
+ # Returns the offset of the character immediately following the end of the
+ # <em>n</em>th element of the match array in the string.
+ #
+ # m = ORegexp.new( '(.)(.)(\d+)(\d)' ).match("THX1138.")
+ # m.end(0) #=> 7
+ # m.end(2) #=> 3
+ #
+ # If no arguments are given, the index of the
+ # last matching character is returned.
+ #
+ # m = ORegexp.new( '(.)(.)(\d+)(\d)' ).match("THX1138.")
+ # m.last #=> 7
+ #
+ # If the argument is a symbol, then the beginning of the
+ # corresponding named group is returned, or <code>nil</code>
+ # if the group does not exist.
+ #
+ # m = ORegexp.new( '(?<begin>^.*?)(?<middle>\d)(?<end>.*)' ).match("THX1138")
+ # m.end(:middle) #=> 4
+
def end(*idx)
if idx[0].is_a?(Symbol)
k = to_index( idx[0] )
@@ -325,6 +440,32 @@ def end(*idx)
end
alias old_offset :offset
+
+ # call-seq:
+ # mtch.offset(n) => array
+ # mtch.offset => array
+ # mtch.offset(symbol) => array
+ #
+ # Returns a two-element array containing the beginning and ending offsets of
+ # the <em>n</em>th match.
+ #
+ # m = ORegexp.new( '(.)(.)(\d+)(\d)' ).match("THX1138.")
+ # m.offset(0) #=> [1, 7]
+ # m.offset(4) #=> [6, 7]
+ #
+ # If no arguments are given, the offsets of the entire
+ # sequence are returned.
+ #
+ # m = ORegexp.new( '(.)(.)(\d+)(\d)' ).match("THX1138.")
+ # m.offset #=> [1, 7]
+ #
+ # If the argument is a symbol, then the offsets of the
+ # corresponding named group are returned, or <code>nil</code>
+ # if the group does not exist.
+ #
+ # m = ORegexp.new( '(?<begin>^.*?)(?<middle>\d)(?<end>.*)' ).match("THX1138")
+ # m.end(:middle) #=> [3, 4]
+
def offset(*idx)
if idx[0].is_a?(Symbol)
k = to_index( idx[0] )
View
@@ -326,18 +326,6 @@ def test_sub_compatibility
assert_equal("<a.gif>", $x.osub('.*\.([^\.]+)$', '<\&>'))
assert_equal("a.a.", $x.osub('(gif)', '\`') )
end
-
- class ::String
- def ogsub(*args)
- Oniguruma::ORegexp.new(args.shift).gsub(self, *args)
- end
- def ogsub!(*args)
- Oniguruma::ORegexp.new(args.shift).gsub!(self, *args)
- end
- def osub(re, *args)
- Oniguruma::ORegexp.new( re ).sub(self, *args)
- end
- end
def test_gsub_compat
assert_equal("hello".ogsub('[aeiou]', '*') , "h*ll*")

0 comments on commit 62c7eac

Please sign in to comment.