Skip to content
This repository has been archived by the owner on Apr 28, 2019. It is now read-only.

Commit

Permalink
Improve regexp performance by avoiding conversion of input to char[].
Browse files Browse the repository at this point in the history
  • Loading branch information
hannes%helma.at committed Mar 21, 2011
1 parent b688e34 commit d1ce3e3
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 62 deletions.
87 changes: 42 additions & 45 deletions src/org/mozilla/javascript/regexp/NativeRegExp.java
Expand Up @@ -1330,12 +1330,12 @@ private static int getIndex(byte[] array, int pc)
*/
private static boolean
flatNMatcher(REGlobalData gData, int matchChars,
int length, char[] chars, int end)
int length, String input, int end)
{
if ((gData.cp + length) > end)
return false;
for (int i = 0; i < length; i++) {
if (gData.regexp.source[matchChars + i] != chars[gData.cp + i]) {
if (gData.regexp.source[matchChars + i] != input.charAt(gData.cp + i)) {
return false;
}
}
Expand All @@ -1345,13 +1345,13 @@ private static int getIndex(byte[] array, int pc)

private static boolean
flatNIMatcher(REGlobalData gData, int matchChars,
int length, char[] chars, int end)
int length, String input, int end)
{
if ((gData.cp + length) > end)
return false;
for (int i = 0; i < length; i++) {
if (upcase(gData.regexp.source[matchChars + i])
!= upcase(chars[gData.cp + i]))
!= upcase(input.charAt(gData.cp + i)))
{
return false;
}
Expand Down Expand Up @@ -1385,7 +1385,7 @@ such that Canonicalize(s[i]) is not the same character as
*/
private static boolean
backrefMatcher(REGlobalData gData, int parenIndex,
char[] chars, int end)
String input, int end)
{
int len;
int i;
Expand All @@ -1399,13 +1399,13 @@ such that Canonicalize(s[i]) is not the same character as

if ((gData.regexp.flags & JSREG_FOLD) != 0) {
for (i = 0; i < len; i++) {
if (upcase(chars[parenContent + i]) != upcase(chars[gData.cp + i]))
if (upcase(input.charAt(parenContent + i)) != upcase(input.charAt(gData.cp + i)))
return false;
}
}
else {
for (i = 0; i < len; i++) {
if (chars[parenContent + i] != chars[gData.cp + i])
if (input.charAt(parenContent + i) != input.charAt(gData.cp + i))
return false;
}
}
Expand Down Expand Up @@ -1674,7 +1674,7 @@ such that Canonicalize(s[i]) is not the same character as
}

private static boolean
executeREBytecode(REGlobalData gData, char[] chars, int end)
executeREBytecode(REGlobalData gData, String input, int end)
{
int pc = 0;
byte program[] = gData.regexp.program;
Expand All @@ -1685,7 +1685,7 @@ such that Canonicalize(s[i]) is not the same character as
currentContinuation_pc = 0;
currentContinuation_op = REOP_END;
if (debug) {
System.out.println("Input = \"" + new String(chars) + "\", start at " + gData.cp);
System.out.println("Input = \"" + input + "\", start at " + gData.cp);
}
int op = program[pc++];
for (;;) {
Expand All @@ -1700,7 +1700,7 @@ such that Canonicalize(s[i]) is not the same character as
if (gData.cp != 0) {
if (gData.multiline ||
((gData.regexp.flags & JSREG_MULTILINE) != 0)) {
if (!isLineTerm(chars[gData.cp - 1])) {
if (!isLineTerm(input.charAt(gData.cp - 1))) {
result = false;
break;
}
Expand All @@ -1716,7 +1716,7 @@ such that Canonicalize(s[i]) is not the same character as
if (gData.cp != end) {
if (gData.multiline ||
((gData.regexp.flags & JSREG_MULTILINE) != 0)) {
if (!isLineTerm(chars[gData.cp])) {
if (!isLineTerm(input.charAt(gData.cp))) {
result = false;
break;
}
Expand All @@ -1729,51 +1729,51 @@ such that Canonicalize(s[i]) is not the same character as
result = true;
break;
case REOP_WBDRY:
result = ((gData.cp == 0 || !isWord(chars[gData.cp - 1]))
^ !((gData.cp < end) && isWord(chars[gData.cp])));
result = ((gData.cp == 0 || !isWord(input.charAt(gData.cp - 1)))
^ !((gData.cp < end) && isWord(input.charAt(gData.cp))));
break;
case REOP_WNONBDRY:
result = ((gData.cp == 0 || !isWord(chars[gData.cp - 1]))
^ ((gData.cp < end) && isWord(chars[gData.cp])));
result = ((gData.cp == 0 || !isWord(input.charAt(gData.cp - 1)))
^ ((gData.cp < end) && isWord(input.charAt(gData.cp))));
break;
case REOP_DOT:
result = (gData.cp != end && !isLineTerm(chars[gData.cp]));
result = (gData.cp != end && !isLineTerm(input.charAt(gData.cp)));
if (result) {
gData.cp++;
}
break;
case REOP_DIGIT:
result = (gData.cp != end && isDigit(chars[gData.cp]));
result = (gData.cp != end && isDigit(input.charAt(gData.cp)));
if (result) {
gData.cp++;
}
break;
case REOP_NONDIGIT:
result = (gData.cp != end && !isDigit(chars[gData.cp]));
result = (gData.cp != end && !isDigit(input.charAt(gData.cp)));
if (result) {
gData.cp++;
}
break;
case REOP_SPACE:
result = (gData.cp != end && isREWhiteSpace(chars[gData.cp]));
result = (gData.cp != end && isREWhiteSpace(input.charAt(gData.cp)));
if (result) {
gData.cp++;
}
break;
case REOP_NONSPACE:
result = (gData.cp != end && !isREWhiteSpace(chars[gData.cp]));
result = (gData.cp != end && !isREWhiteSpace(input.charAt(gData.cp)));
if (result) {
gData.cp++;
}
break;
case REOP_ALNUM:
result = (gData.cp != end && isWord(chars[gData.cp]));
result = (gData.cp != end && isWord(input.charAt(gData.cp)));
if (result) {
gData.cp++;
}
break;
case REOP_NONALNUM:
result = (gData.cp != end && !isWord(chars[gData.cp]));
result = (gData.cp != end && !isWord(input.charAt(gData.cp)));
if (result) {
gData.cp++;
}
Expand All @@ -1784,7 +1784,7 @@ such that Canonicalize(s[i]) is not the same character as
pc += INDEX_LEN;
int length = getIndex(program, pc);
pc += INDEX_LEN;
result = flatNMatcher(gData, offset, length, chars, end);
result = flatNMatcher(gData, offset, length, input, end);
}
break;
case REOP_FLATi:
Expand All @@ -1793,13 +1793,13 @@ such that Canonicalize(s[i]) is not the same character as
pc += INDEX_LEN;
int length = getIndex(program, pc);
pc += INDEX_LEN;
result = flatNIMatcher(gData, offset, length, chars, end);
result = flatNIMatcher(gData, offset, length, input, end);
}
break;
case REOP_FLAT1:
{
char matchCh = (char)(program[pc++] & 0xFF);
result = (gData.cp != end && chars[gData.cp] == matchCh);
result = (gData.cp != end && input.charAt(gData.cp) == matchCh);
if (result) {
gData.cp++;
}
Expand All @@ -1809,7 +1809,7 @@ such that Canonicalize(s[i]) is not the same character as
{
char matchCh = (char)(program[pc++] & 0xFF);
result = (gData.cp != end
&& upcase(chars[gData.cp]) == upcase(matchCh));
&& upcase(input.charAt(gData.cp)) == upcase(matchCh));
if (result) {
gData.cp++;
}
Expand All @@ -1819,7 +1819,7 @@ such that Canonicalize(s[i]) is not the same character as
{
char matchCh = (char)getIndex(program, pc);
pc += INDEX_LEN;
result = (gData.cp != end && chars[gData.cp] == matchCh);
result = (gData.cp != end && input.charAt(gData.cp) == matchCh);
if (result) {
gData.cp++;
}
Expand All @@ -1830,7 +1830,7 @@ such that Canonicalize(s[i]) is not the same character as
char matchCh = (char)getIndex(program, pc);
pc += INDEX_LEN;
result = (gData.cp != end
&& upcase(chars[gData.cp]) == upcase(matchCh));
&& upcase(input.charAt(gData.cp)) == upcase(matchCh));
if (result) {
gData.cp++;
}
Expand Down Expand Up @@ -1889,7 +1889,7 @@ such that Canonicalize(s[i]) is not the same character as
{
int parenIndex = getIndex(program, pc);
pc += INDEX_LEN;
result = backrefMatcher(gData, parenIndex, chars, end);
result = backrefMatcher(gData, parenIndex, input, end);
}
break;

Expand All @@ -1899,7 +1899,7 @@ such that Canonicalize(s[i]) is not the same character as
pc += INDEX_LEN;
if (gData.cp != end) {
if (classMatcher(gData, gData.regexp.classList[index],
chars[gData.cp]))
input.charAt(gData.cp)))
{
gData.cp++;
result = true;
Expand Down Expand Up @@ -2201,7 +2201,7 @@ such that Canonicalize(s[i]) is not the same character as

private static boolean
matchRegExp(REGlobalData gData, RECompiled re,
char[] chars, int start, int end, boolean multiline)
String input, int start, int end, boolean multiline)
{
if (re.parenCount != 0) {
gData.parens = new long[re.parenCount];
Expand Down Expand Up @@ -2233,7 +2233,7 @@ such that Canonicalize(s[i]) is not the same character as
if (i == end) {
return false;
}
char matchCh = chars[i];
char matchCh = input.charAt(i);
if (matchCh == anchorCh ||
((gData.regexp.flags & JSREG_FOLD) != 0
&& upcase(matchCh) == upcase((char)anchorCh)))
Expand All @@ -2247,7 +2247,7 @@ && upcase(matchCh) == upcase((char)anchorCh)))
for (int j = 0; j < re.parenCount; j++) {
gData.set_parens(j, -1, 0);
}
boolean result = executeREBytecode(gData, chars, end);
boolean result = executeREBytecode(gData, input, end);

gData.backTrackStackTop = null;
gData.stateStackTop = null;
Expand All @@ -2268,24 +2268,21 @@ Object executeRegExp(Context cx, Scriptable scope, RegExpImpl res,
REGlobalData gData = new REGlobalData();

int start = indexp[0];
char[] charArray = str.toCharArray();
int end = charArray.length;
int end = str.length();
if (start > end)
start = end;
//
// Call the recursive matcher to do the real work.
//
boolean matches = matchRegExp(gData, re, charArray, start, end,
boolean matches = matchRegExp(gData, re, str, start, end,
res.multiline);
if (!matches) {
if (matchType != PREFIX) return null;
return Undefined.instance;
}
int index = gData.cp;
int i = index;
indexp[0] = i;
int matchlen = i - (start + gData.skipped);
int ep = index;
int ep = indexp[0] = index;
int matchlen = ep - (start + gData.skipped);
index -= matchlen;
Object result;
Scriptable obj;
Expand All @@ -2308,7 +2305,7 @@ Object executeRegExp(Context cx, Scriptable scope, RegExpImpl res,
result = cx.newArray(scope, 0);
obj = (Scriptable) result;

String matchstr = new String(charArray, index, matchlen);
String matchstr = str.substring(index, index + matchlen);
obj.put(0, obj, matchstr);
}

Expand All @@ -2324,7 +2321,7 @@ Object executeRegExp(Context cx, Scriptable scope, RegExpImpl res,
String parstr;
if (cap_index != -1) {
int cap_length = gData.parens_length(num);
parsub = new SubString(charArray, cap_index, cap_length);
parsub = new SubString(str, cap_index, cap_length);
res.parens[num] = parsub;
if (matchType == TEST) continue;
parstr = parsub.toString();
Expand Down Expand Up @@ -2352,11 +2349,11 @@ Object executeRegExp(Context cx, Scriptable scope, RegExpImpl res,
res.leftContext = new SubString();
res.rightContext = new SubString();
}
res.lastMatch.charArray = charArray;
res.lastMatch.str = str;
res.lastMatch.index = index;
res.lastMatch.length = matchlen;

res.leftContext.charArray = charArray;
res.leftContext.str = str;
if (cx.getLanguageVersion() == Context.VERSION_1_2) {
/*
* JS1.2 emulated Perl4.0.1.8 (patch level 36) for global regexps used
Expand All @@ -2383,7 +2380,7 @@ Object executeRegExp(Context cx, Scriptable scope, RegExpImpl res,
res.leftContext.length = start + gData.skipped;
}

res.rightContext.charArray = charArray;
res.rightContext.str = str;
res.rightContext.index = ep;
res.rightContext.length = end - ep;

Expand Down
16 changes: 8 additions & 8 deletions src/org/mozilla/javascript/regexp/RegExpImpl.java
Expand Up @@ -100,7 +100,6 @@ public Object action(Context cx, Scriptable scope,
data.leftIndex = 0;
Object val = matchOrReplace(cx, scope, thisObj, args,
this, data, true);
SubString rc = this.rightContext;

if (data.charBuf == null) {
if (data.global || val == null
Expand All @@ -112,7 +111,8 @@ public Object action(Context cx, Scriptable scope,
SubString lc = this.leftContext;
replace_glob(data, cx, scope, this, lc.index, lc.length);
}
data.charBuf.append(rc.charArray, rc.index, rc.length);
SubString rc = this.rightContext;
data.charBuf.append(rc.str, rc.index, rc.index + rc.length);
return data.charBuf.toString();
}

Expand Down Expand Up @@ -363,15 +363,15 @@ private static void replace_glob(GlobData rdata, Context cx,
}

int growth = leftlen + replen + reImpl.rightContext.length;
StringBuffer charBuf = rdata.charBuf;
StringBuilder charBuf = rdata.charBuf;
if (charBuf == null) {
charBuf = new StringBuffer(growth);
charBuf = new StringBuilder(growth);
rdata.charBuf = charBuf;
} else {
charBuf.ensureCapacity(rdata.charBuf.length() + growth);
}

charBuf.append(reImpl.leftContext.charArray, leftIndex, leftlen);
charBuf.append(reImpl.leftContext.str, leftIndex, leftIndex + leftlen);
if (rdata.lambda != null) {
charBuf.append(lambdaStr);
} else {
Expand Down Expand Up @@ -475,7 +475,7 @@ private static SubString interpretDollar(Context cx, RegExpImpl res,
private static void do_replace(GlobData rdata, Context cx,
RegExpImpl regExpImpl)
{
StringBuffer charBuf = rdata.charBuf;
StringBuilder charBuf = rdata.charBuf;
int cp = 0;
String da = rdata.repstr;
int dp = rdata.dollar;
Expand All @@ -490,7 +490,7 @@ private static void do_replace(GlobData rdata, Context cx,
if (sub != null) {
len = sub.length;
if (len > 0) {
charBuf.append(sub.charArray, sub.index, len);
charBuf.append(sub.str, sub.index, sub.index + len);
}
cp += skip[0];
dp += skip[0];
Expand Down Expand Up @@ -752,6 +752,6 @@ final class GlobData
Function lambda; /* replacement function object or null */
String repstr; /* replacement string */
int dollar = -1; /* -1 or index of first $ in repstr */
StringBuffer charBuf; /* result characters, null initially */
StringBuilder charBuf; /* result characters, null initially */
int leftIndex; /* leftContext index, always 0 for JS1.2 */
}

0 comments on commit d1ce3e3

Please sign in to comment.