Skip to content

Commit

Permalink
04_simeplfold_1
Browse files Browse the repository at this point in the history
  • Loading branch information
arnaudroger committed Mar 23, 2018
1 parent bec631b commit bc6d2e0
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 16 deletions.
18 changes: 18 additions & 0 deletions java/com/google/re2j/Compiler.java
Expand Up @@ -171,6 +171,24 @@ private Frag rune(int[] runes, int flags) {
runes.length == 2 &&
runes[0] == runes[1]) {
i.op = Inst.RUNE1;
i.f0 = runes[0];
} else if (runes.length == 1 && (flags & RE2.FOLD_CASE) != 0) {
i.op = Inst.RUNE1_FOLD;
i.f0 = runes[0];
i.f1 = runes[0];
i.f2 = runes[0];
i.f3 = runes[0];

int[] fold = Unicode.fold(i.f0);
if (fold.length > 0){
i.f1 = fold[0];
if (fold.length > 1) {
i.f2 = fold[1];
if (fold.length > 2) {
i.f3 = fold[2];
}
}
}
} else if (runes.length == 2 &&
runes[0] == 0 &&
runes[1] == Unicode.MAX_RUNE) {
Expand Down
30 changes: 14 additions & 16 deletions java/com/google/re2j/Inst.java
Expand Up @@ -24,6 +24,7 @@ class Inst {
public static final int RUNE1 = 9;
public static final int RUNE_ANY = 10;
public static final int RUNE_ANY_NOT_NL = 11;
public static final int RUNE1_FOLD = 12;

int op;
int out; // all but MATCH, FAIL
Expand All @@ -32,12 +33,17 @@ class Inst {
// otherwise a list of [lo,hi] pairs. hi is *inclusive*.
// REVIEWERS: why not half-open intervals?

int f0;
int f1;
int f2;
int f3;

Inst(int op) {
this.op = op;
}

boolean isRune() {
return op >= RUNE && op <= RUNE_ANY_NOT_NL;
return op >= RUNE && op <= RUNE1_FOLD;
}


Expand All @@ -46,21 +52,12 @@ boolean isRune() {
boolean matchRune(int r) {
// Special case: single-rune slice is from literal string, not char
// class.
if (runes.length == 1) {
int r0 = runes[0];
if (r == r0) {
return true;
}
if ((arg & RE2.FOLD_CASE) != 0) {
for (int r1 = Unicode.simpleFold(r0);
r1 != r0;
r1 = Unicode.simpleFold(r1)) {
if (r == r1) {
return true;
}
}
}
return false;
if (op ==RUNE1) {
return f0 == r;
}

if (op == RUNE1_FOLD) {
return f0 == r || f1 == r || f2 == r || f3 == r;
}

// Peek at the first few pairs.
Expand Down Expand Up @@ -108,6 +105,7 @@ public String toString() {
case NOP:
return "nop -> " + out;
case RUNE:
case RUNE1_FOLD:
if (runes == null) {
return "rune <null>"; // can't happen
}
Expand Down
2 changes: 2 additions & 0 deletions java/com/google/re2j/Machine.java
Expand Up @@ -307,6 +307,7 @@ private void step(Queue runq, Queue nextq, int pos, int nextPos, int c,
break;

case Inst.RUNE:
case Inst.RUNE1_FOLD:
add = i.matchRune(c);
break;

Expand Down Expand Up @@ -385,6 +386,7 @@ private Thread add(Queue q, int pc, int pos, int[] cap, int cond, Thread t) {

case Inst.MATCH:
case Inst.RUNE:
case Inst.RUNE1_FOLD:
case Inst.RUNE1:
case Inst.RUNE_ANY:
case Inst.RUNE_ANY_NOT_NL:
Expand Down
14 changes: 14 additions & 0 deletions java/com/google/re2j/Unicode.java
Expand Up @@ -7,6 +7,8 @@

package com.google.re2j;

import java.util.Arrays;

/**
* Utilities for dealing with Unicode better than Java does.
*
Expand Down Expand Up @@ -227,6 +229,18 @@ static int simpleFold(int r) {
return toUpper(r);
}


public static int[] fold(int r0) {
int[] folds = new int[3];
int foldsSize = 0;
int r1 = r0;
while((r1 = simpleFold(r1)) != r0) {
folds[foldsSize] = r1;
foldsSize++;
}
return Arrays.copyOf(folds, foldsSize);
}

private Unicode() {} // uninstantiable

}

0 comments on commit bc6d2e0

Please sign in to comment.