Skip to content

Commit

Permalink
Refactor char class parsing and enable macros in char classes
Browse files Browse the repository at this point in the history
Addresses issue #216.
  • Loading branch information
lsf37 committed Dec 7, 2019
1 parent 236f530 commit 3fcab7f
Show file tree
Hide file tree
Showing 14 changed files with 758 additions and 859 deletions.
1,026 changes: 339 additions & 687 deletions jflex/src/main/cup/LexParse.cup

Large diffs are not rendered by default.

19 changes: 17 additions & 2 deletions jflex/src/main/java/jflex/chars/Interval.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,7 @@ public final class Interval {
public int end;

/**
* Constructs a new interval from {@code start} to {@code end}, including both start and end
* points.
* Constructs a new interval from {@code start} to {@code end}, including both end points.
*
* @param start first codepoint the interval contains
* @param end last codepoint the interval contains
Expand All @@ -35,6 +34,22 @@ public Interval(int start, int end) {
this.end = end;
}

/**
* Constructs a new interval containing a single character.
*
* @param content the single character the interval should contain
*/
public Interval(int content) {
this.start = content;
this.end = content;
}

/** Copy constructor */
public Interval(Interval other) {
this.start = other.start;
this.end = other.end;
}

/**
* Returns {@code true} iff {@code point} is contained in this interval.
*
Expand Down
27 changes: 3 additions & 24 deletions jflex/src/main/java/jflex/core/CharClasses.java
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,8 @@ public int getNumClasses() {
* @param caseless if true upper/lower/title case are considered equivalent
*/
public void makeClass(IntCharSet set, boolean caseless) {
set = set.copy(); // avoid destructively updating the original

if (caseless) set = set.getCaseless(unicodeProps);

if (DEBUG) {
Expand Down Expand Up @@ -275,7 +277,7 @@ public void makeClassNot(List<Interval> l, boolean caseless) {
* Returns an array that contains the character class codes of all characters in the specified set
* of input characters.
*/
private int[] getClassCodes(IntCharSet set, boolean negate) {
public int[] getClassCodes(IntCharSet set, boolean negate) {

if (DEBUG) {
Out.dump("getting class codes for " + set);
Expand Down Expand Up @@ -309,29 +311,6 @@ private int[] getClassCodes(IntCharSet set, boolean negate) {
return result;
}

/**
* Returns an array that contains the character class codes of all characters in the specified set
* of input characters.
*
* @param intervalList a List of Intervals, the set of characters to get the class codes for
* @return an array with the class codes for intervalList
*/
public int[] getClassCodes(List<Interval> intervalList) {
return getClassCodes(new IntCharSet(intervalList), false);
}

/**
* Returns an array that contains the character class codes of all characters that are
* <strong>not</strong> in the specified set of input characters.
*
* @param intervalList a List of Intervals, the complement of the set of characters to get the
* class codes for
* @return an array with the class codes for the complement of intervalList
*/
public int[] getNotClassCodes(List<Interval> intervalList) {
return getClassCodes(new IntCharSet(intervalList), true);
}

/**
* Check consistency of the stored classes [debug].
*
Expand Down
47 changes: 45 additions & 2 deletions jflex/src/main/java/jflex/core/IntCharSet.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,12 @@ public IntCharSet() {}

/** Creates a char set that contains only the given character. */
public IntCharSet(int c) {
this(new Interval(c, c));
this(new Interval(c));
}

/** Creates a char set that contains only the given interval. */
public IntCharSet(int start, int end) {
this(new Interval(start, end));
}

/** Creates a charset that contains only one interval. */
Expand All @@ -60,6 +65,28 @@ public IntCharSet(List<Interval> chars) {
for (Interval interval : chars) add(interval);
}

/**
* The set of all characters.
*
* @return a new IntCharSet that contains all characters.
*/
public static IntCharSet allChars() {
return new IntCharSet(0, CharClasses.maxChar);
}

/**
* The set of new-line characters.
*
* @return a new IntCharSet that contains all characters that are considered a new-line char in
* Java.
*/
public static IntCharSet nlChars() {
IntCharSet set = new IntCharSet(new Interval('\n', '\r'));
set.add(new Interval('\u0085', '\u0085'));
set.add(new Interval('\u2028', '\u2029'));
return set;
}

/**
* returns the index of the interval that contains the character c, -1 if there is no such
* interval
Expand Down Expand Up @@ -194,7 +221,7 @@ public void add(int c) {
}

// end reached but nothing found -> append at end
intervals.add(new Interval(c, c));
intervals.add(new Interval(c));
}

/**
Expand All @@ -207,6 +234,21 @@ public boolean contains(int singleChar) {
return indexOf(singleChar) >= 0;
}

/**
* Check wether this set contains a another set.
*
* @param other an IntCharSet.
* @return true iff all characters of {@code other} are contained in this set.
*/
public boolean contains(IntCharSet other) {
// treat null as empty set
if (other == null) return true;
IntCharSet set = other.copy();
IntCharSet inter = this.and(other);
set.sub(inter);
return !set.containsElements();
}

/**
* {@inheritDoc}
*
Expand All @@ -222,6 +264,7 @@ public boolean equals(Object o) {
return Objects.equals(intervals, set.intervals);
}

/** {@inheritDoc} */
@Override
public int hashCode() {
int h = 1;
Expand Down
21 changes: 18 additions & 3 deletions jflex/src/main/java/jflex/core/Macros.java
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ public void expand() throws jflex.exceptions.MacroException {
* @throws jflex.exceptions.MacroException when an error (such as a cyclic definition) occurs
* during expansion
*/
@SuppressWarnings("unchecked")
private RegExp expandMacro(String name, RegExp definition)
throws jflex.exceptions.MacroException {

Expand Down Expand Up @@ -180,15 +181,29 @@ private RegExp expandMacro(String name, RegExp definition)
case sym.STRING_I:
case sym.CHAR:
case sym.CHAR_I:
case sym.PRIMCLASS:
return definition;

case sym.CCLASS:
case sym.CCLASSNOT:
RegExp1 cclass = (RegExp1) definition;
List<RegExp> classes = (List<RegExp>) cclass.content;
List<RegExp> newClasses = new ArrayList<RegExp>();
for (RegExp r : classes) {
newClasses.add(expandMacro(name, r));
}
cclass.content = newClasses;
return cclass;

case sym.CCLASSOP:
RegExp2 cclassOp = (RegExp2) ((RegExp1) definition).content;
cclassOp.r1 = expandMacro(name, cclassOp.r1);
cclassOp.r2 = expandMacro(name, cclassOp.r2);
return definition;

default:
throw new MacroException(
"unknown expression type "
+ definition.type
+ " in macro expansion"); // $NON-NLS-1$ //$NON-NLS-2$
"unknown expression type " + definition.typeName() + " in macro expansion");
}
}
}
55 changes: 15 additions & 40 deletions jflex/src/main/java/jflex/core/NFA.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import jflex.base.IntPair;
import jflex.chars.Interval;
import jflex.exceptions.GeneratorException;
import jflex.exceptions.RegExpException;
import jflex.l10n.ErrorMessages;

/**
Expand Down Expand Up @@ -188,7 +189,7 @@ public void addRegExp(int regExpNum) {
// base forward pass
IntPair forward = insertNFA(r1);
// lookahead backward pass
IntPair backward = insertNFA(r2.rev(macros));
IntPair backward = insertNFA(r2.rev());

isFinal[forward.end()] = true;
action[forward.end()] = new Action(Action.FORWARD_ACTION);
Expand Down Expand Up @@ -222,9 +223,6 @@ private void insertLookAheadChoices(int baseEnd, Action a, RegExp lookAhead) {
RegExp2 r = (RegExp2) lookAhead;
insertLookAheadChoices(baseEnd, a, r.r1);
insertLookAheadChoices(baseEnd, a, r.r2);
} else if (lookAhead.type == sym.MACROUSE) {
RegExp1 r = (RegExp1) lookAhead;
insertLookAheadChoices(baseEnd, a, macros.getDefinition((String) r.content));
} else {
int len = SemCheck.length(lookAhead);

Expand All @@ -243,11 +241,11 @@ private void insertLookAheadChoices(int baseEnd, Action a, RegExp lookAhead) {
scanner.actions.add(x);
} else {
// should never happen
throw new Error(
throw new RegExpException(
"When inserting lookahead expression: unknown expression type "
+ lookAhead.type
+ lookAhead.typeName()
+ " in "
+ lookAhead); // $NON-NLS-1$ //$NON-NLS-2$
+ lookAhead);
}
}
}
Expand Down Expand Up @@ -671,20 +669,8 @@ private IntPair insertStringNFA(boolean caseless, String str) {
return IntPair.create(start, i + start);
}

private void insertClassNFA(List<Interval> intervals, int start, int end) {
// empty char class is ok:
if (intervals == null) return;

for (int aCl : classes.getClassCodes(intervals)) {
addTransition(start, aCl, end);
}
}

private void insertNotClassNFA(List<Interval> intervals, int start, int end) {

for (int input : classes.getNotClassCodes(intervals)) {
addTransition(start, input, end);
}
private void insertClassNFA(IntCharSet set, int start, int end) {
for (int aCl : classes.getClassCodes(set, false)) addTransition(start, aCl, end);
}

/**
Expand Down Expand Up @@ -909,9 +895,7 @@ private void removeDead(int start, int end) {
* <p>Assumes that regExp.isCharClass(macros) == true
*
* @param regExp the regular expression to construct the NFA for
* @return a pair of integers denoting the index of start and end state of the NFA.
*/
@SuppressWarnings("unchecked") // for List<Interval> casts
private void insertCCLNFA(RegExp regExp, int start, int end) {
switch (regExp.type) {
case sym.BAR:
Expand All @@ -920,12 +904,8 @@ private void insertCCLNFA(RegExp regExp, int start, int end) {
insertCCLNFA(r.r2, start, end);
return;

case sym.CCLASS:
insertClassNFA((List<Interval>) ((RegExp1) regExp).content, start, end);
return;

case sym.CCLASSNOT:
insertNotClassNFA((List<Interval>) ((RegExp1) regExp).content, start, end);
case sym.PRIMCLASS:
insertClassNFA((IntCharSet) ((RegExp1) regExp).content, start, end);
return;

case sym.CHAR:
Expand All @@ -935,13 +915,10 @@ private void insertCCLNFA(RegExp regExp, int start, int end) {
case sym.CHAR_I:
insertLetterNFA(true, (Integer) ((RegExp1) regExp).content, start, end);
return;

case sym.MACROUSE:
insertCCLNFA(macros.getDefinition((String) ((RegExp1) regExp).content), start, end);
return;
}

throw new Error("Unknown expression type " + regExp.type + " in NFA construction");
throw new RegExpException(
"Unknown expression type " + regExp.typeName() + " in NFA construction");
}

/**
Expand All @@ -963,7 +940,7 @@ public IntPair insertNFA(RegExp regExp) {
Out.debug("Inserting RegExp : " + regExp);
}

if (regExp.isCharClass(macros)) {
if (regExp.isCharClass()) {
start = numStates;
end = numStates + 1;

Expand Down Expand Up @@ -1040,19 +1017,17 @@ public IntPair insertNFA(RegExp regExp) {
return complement(insertNFA((RegExp) ((RegExp1) regExp).content));

case sym.TILDE:
return insertNFA(regExp.resolveTilde(macros));
return insertNFA(regExp.resolveTilde());

case sym.STRING:
return insertStringNFA(false, (String) ((RegExp1) regExp).content);

case sym.STRING_I:
return insertStringNFA(true, (String) ((RegExp1) regExp).content);

case sym.MACROUSE:
return insertNFA(macros.getDefinition((String) ((RegExp1) regExp).content));
}

throw new Error("Unknown expression type " + regExp.type + " in NFA construction");
throw new RegExpException(
"Unknown expression type " + regExp.typeName() + " in NFA construction");
}

public int numStates() {
Expand Down
Loading

0 comments on commit 3fcab7f

Please sign in to comment.