Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Fixed usage of named backreferences in gsub with non-ascii names

  • Loading branch information...
commit 0962aff1d5f85daa61e822ccc674d7762544afc5 1 parent bcf193b
Nikolai Lugovoi authored
Showing with 17 additions and 12 deletions.
  1. +7 −11 ext/oregexp.c
  2. +10 −1 test/test_oniguruma.rb
View
18 ext/oregexp.c
@@ -231,7 +231,6 @@ matched group), \` (string prior to match), \' (string after match), and \\ (a l
backslash). */
/* scan the replacement text, looking for substitutions (\n) and \escapes. */
-#define MAX_GROUP_NAME_LEN 64
#if ONIGURUMA_VERSION_MAJOR == 2
#define ONIGENC_MBC_ENC_LEN(e, p) enc_len(e, *(p))
#endif
@@ -244,7 +243,7 @@ oregexp_get_replacement(pat, src_text, repl_text, region)
{
ORegexp *oregexp;
VALUE ret;
- int32_t replIdx = 0, name_pos ;
+ int32_t replIdx = 0, name_pos, name_start, name_end ;
int32_t replacementLength = RSTRING(repl_text)->len;
UChar *replacementText = RSTRING(repl_text)->ptr;
UChar *replacementEnd = replacementText + (replacementLength-1);
@@ -254,7 +253,6 @@ oregexp_get_replacement(pat, src_text, repl_text, region)
OnigEncoding enc;
const UChar * matchText;
long matchLen;
- UChar named_group[MAX_GROUP_NAME_LEN] = {0}, *name_end;
matchText = RSTRING(src_text)->ptr;
matchLen = RSTRING(src_text)->len;
@@ -338,28 +336,26 @@ oregexp_get_replacement(pat, src_text, repl_text, region)
break;
case '<': // named group references \<name>
name_pos = replIdx+c_len;
- name_end = named_group;
+ name_end = name_start = replIdx+c_len;
while(name_pos < replacementLength) {
c = ONIGENC_MBC_TO_CODE(enc, replacementText+name_pos, replacementEnd);
c_len = ONIGENC_MBC_ENC_LEN(enc, replacementText+name_pos) ;
name_pos += c_len;
if( c == '>') break;
- if( c < 128 && ONIGENC_IS_CODE_ALNUM(enc, c) &&
- name_end - named_group < MAX_GROUP_NAME_LEN ) {
- *name_end = (UChar)c;
- name_end ++;
+ if( ONIGENC_IS_CODE_WORD(enc, c) ) {
+ name_end += c_len;
} else {
break;
}
}
- if( c != '>' || name_end == named_group ) {
+ if( c != '>' || name_end == name_start ) {
// place backslash and '<'
rb_str_buf_cat(ret, replacementText+(replIdx-p_len), p_len+c_len);
replIdx += c_len;
} else {
// lookup for group and subst for that value
- *name_end = '\0';
- groupNum = onig_name_to_backref_number( oregexp->reg, named_group, name_end, region);
+ groupNum = onig_name_to_backref_number( oregexp->reg,
+ replacementText+name_start, replacementText+name_end, region);
if( groupNum >= 0 ) {
rb_str_buf_cat(ret, matchText+region->beg[groupNum],
region->end[groupNum]-region->beg[groupNum]);
View
11 test/test_oniguruma.rb
@@ -124,7 +124,7 @@ def test_source
def test_named_sub_backrefs
re = Oniguruma::ORegexp.new('(?<pre>\w+?)\d+(?<after>\w+)')
- assert_equal('def123abc', re.sub('abc123def', '\<after>123\<pre>') )
+ assert_equal(' def123abc ', re.sub('abc123def', ' \<after>123\<pre> ') )
end
def test_named_sub_backrefs_dupes
@@ -150,6 +150,10 @@ def test_backref_set_for_match_op
assert_equal "25", $3
end
+ def test_multibyte_named_backrefs
+ r = Oniguruma::ORegexp.new('(?<группа>test).+(\k<группа>)', :encoding => Oniguruma::ENCODING_UTF8)
+ assert_equal "should !test!", r.sub("should test this damned test", '!\<группа>!')
+ end
end
@@ -270,6 +274,11 @@ def test_group_by_name
assert_equal( nil, matches[:inexistent])
end
+ def test_multibyte_named_backrefs
+ r = Oniguruma::ORegexp.new('(?<имя>test).+(\k<имя>)', :encoding => Oniguruma::ENCODING_UTF8)
+ assert_equal "should TEST", r.sub("should test this damned test") {|m| m[:"имя"].upcase }
+ end
+
# casefolding for full Unicode set is not present in versions prior to 5.
if Oniguruma::VERSION >= '5.0.0'
def test_utf8_ignore_case
Please sign in to comment.
Something went wrong with that request. Please try again.