Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,10 @@ if isMatch, _ := re.MatchString(`Something to match`); isMatch {

This feature is a work in progress and I'm open to ideas for more things to put here (maybe more relaxed character escaping rules?).

## ECMAScript compatibility mode
In this mode the engine provides compatibility with the [regex engine](https://tc39.es/ecma262/multipage/text-processing.html#sec-regexp-regular-expression-objects) described in the ECMAScript specification.

Additionally a Unicode mode is provided which allows parsing of `\u{CodePoint}` syntax that is only when both are provided.

## Library features that I'm still working on
- Regex split
Expand Down
1 change: 1 addition & 0 deletions regexp.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ const (
Debug = 0x0080 // "d"
ECMAScript = 0x0100 // "e"
RE2 = 0x0200 // RE2 (regexp package) compatibility mode
Unicode = 0x0400 // "u"
)

func (re *Regexp) RightToLeft() bool {
Expand Down
14 changes: 14 additions & 0 deletions regexp_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -858,6 +858,20 @@ func TestECMAScriptXCurlyBraceEscape(t *testing.T) {
}
}

func TestEcmaScriptUnicodeRange(t *testing.T) {
r, err := Compile(`([\u{001a}-\u{ffff}]+)`, ECMAScript|Unicode)
if err != nil {
panic(err)
}
m, err := r.FindStringMatch("qqqq")
if err != nil {
panic(err)
}
if m == nil {
t.Fatal("Expected non-nil, got nil")
}
}

func TestNegateRange(t *testing.T) {
re := MustCompile(`[\D]`, 0)
if m, err := re.MatchString("A"); err != nil {
Expand Down
16 changes: 15 additions & 1 deletion syntax/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ const (
Debug = 0x0080 // "d"
ECMAScript = 0x0100 // "e"
RE2 = 0x0200 // RE2 compat mode
Unicode = 0x0400 // "u"
)

func optionFromCode(ch rune) RegexOptions {
Expand All @@ -43,6 +44,8 @@ func optionFromCode(ch rune) RegexOptions {
return Debug
case 'e', 'E':
return ECMAScript
case 'u', 'U':
return Unicode
default:
return 0
}
Expand Down Expand Up @@ -1695,7 +1698,13 @@ func (p *parser) scanCharEscape() (r rune, err error) {
r, err = p.scanHex(2)
}
case 'u':
r, err = p.scanHex(4)
// ECMAscript suppot \u{HEX} only if `u` is also set
if p.useOptionE() && p.useOptionU() && p.charsRight() > 0 && p.rightChar(0) == '{' {
p.moveRight(1)
return p.scanHexUntilBrace()
} else {
r, err = p.scanHex(4)
}
case 'a':
return '\u0007', nil
case 'b':
Expand Down Expand Up @@ -1972,6 +1981,11 @@ func (p *parser) useRE2() bool {
return (p.options & RE2) != 0
}

// True if U option enabling ECMAScript's Unicode behavior on.
func (p *parser) useOptionU() bool {
return (p.options & Unicode) != 0
}

// True if options stack is empty.
func (p *parser) emptyOptionsStack() bool {
return len(p.optionsStack) == 0
Expand Down