Skip to content

Commit

Permalink
Improve and optimize recursion stopper
Browse files Browse the repository at this point in the history
  • Loading branch information
boxbeam committed Jun 1, 2022
1 parent dbb6570 commit 798e0fd
Show file tree
Hide file tree
Showing 21 changed files with 210 additions and 71 deletions.
2 changes: 2 additions & 0 deletions src/redempt/redlex/bnf/BNFLexer.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import redempt.redlex.processing.Lexer;
import redempt.redlex.token.*;

import java.util.Collections;
import java.util.HashMap;
import java.util.Map;

Expand Down Expand Up @@ -121,6 +122,7 @@ private static TokenType rootType() {
TokenType line = new ChoiceToken("line", comment, sentence);
TokenType sentenceList = new ListToken("sentences", line, sepOrEnd);
TokenType sentenceRep = new RepeatingToken("sentencesRep", sentenceList);
sentenceRep.replacePlaceholders(Collections.emptyMap());
return sentenceRep;
}

Expand Down
32 changes: 7 additions & 25 deletions src/redempt/redlex/data/LexContext.java
Original file line number Diff line number Diff line change
@@ -1,15 +1,10 @@
package redempt.redlex.data;

import redempt.redlex.token.ListToken;

import java.util.HashMap;
import java.util.Map;

public class LexContext {

private int lastPos;
private TokenType token;
private Map<TokenType, Integer> map = new HashMap<>();
private LongStackSet stackSet = new LongStackSet();

public LexContext() {}

Expand All @@ -22,29 +17,16 @@ public int getLastPos() {
}

public boolean update(int pos, TokenType type) {
if (type.getName() == null) {
return shouldRecurse(pos, type);
}
if (pos > lastPos) {
if (type.getName() != null && pos > lastPos) {
lastPos = pos;
token = type;
}
return shouldRecurse(pos, type);
long id = ((long) pos << 32) + type.getId();
return stackSet.add(id);
}

private boolean shouldRecurse(int pos, TokenType type) {
if (!(type instanceof ListToken)) {
return false;
}
if (map.putIfAbsent(type, pos) == null) {
return true;
}
int cur = map.get(type);
if (pos > cur) {
map.put(type, pos);
return true;
}
return false;

public void pop() {
stackSet.pop();
}

public TokenType getLastToken() {
Expand Down
103 changes: 103 additions & 0 deletions src/redempt/redlex/data/LongStackSet.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
package redempt.redlex.data;

public class LongStackSet {

private long[] stack = new long[100];
private long[] set = new long[130];
private int size;

public boolean add(long value) {
addToStack(value);
boolean contained = addToSet(value, true);
size++;
return contained;
}

public long pop() {
long last = stack[size - 1];
size--;
removeFromSet(last);
return last;
}

public int size() {
return size;
}

private void addToStack(long value) {
if (size >= stack.length) {
growStack();
}
stack[size] = value;
}

private void removeFromSet(long value) {
int hash = (int) value % set.length;
int index = hash;
while (set[index] != value && set[index] != 0) {
index = nextIndex(index);
}
if (set[index] == 0) {
return;
}
set[index] = 0;
index = nextIndex(index);
while (set[index] != 0 && set[index] % set.length == hash) {
set[previousIndex(index)] = set[index];
set[index] = 0;
index = nextIndex(index);
}
}

public boolean contains(long value) {
int hash = (int) value % set.length;
while (set[hash] != 0 && set[hash] != value) {
hash = nextIndex(hash);
}
return set[hash] == value;
}

private int previousIndex(int ind) {
if (--ind < 0) {
ind = set.length - 1;
}
return ind;
}

private int nextIndex(int ind) {
if (++ind >= set.length) {
ind = 0;
}
return ind;
}

private boolean addToSet(long value, boolean checkSize) {
if (checkSize && (float) size / set.length > 0.7) {
growSet();
}
int hash = (int) value % set.length;
while (set[hash] != 0 && set[hash] != value) {
hash = nextIndex(hash);
}
boolean contained = set[hash] == value;
set[hash] = value;
return !contained;
}

private void growSet() {
long[] set = this.set;
this.set = new long[this.set.length * 2];
for (long num : set) {
if (num != 0) {
addToSet(num, false);
}
}
}

private void growStack() {
long[] copy = new long[stack.length * 2];
System.arraycopy(stack, 0, copy, 0, stack.length);
stack = copy;
}

}
23 changes: 18 additions & 5 deletions src/redempt/redlex/data/TokenType.java
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,21 @@ public String getName() {
public void setName(String name) {
this.name = name;
}

public Token findForward(String str, int pos, LexContext ctx) {
ctx.update(pos, this);

public Token tryTokenize(String str, int pos, LexContext ctx) {
if (ctx == null) {
return findForward(str, pos, ctx);
}
if (!ctx.update(pos, this) && this instanceof ParentToken) {
ctx.pop();
return null;
}
Token token = findForward(str, pos, ctx);
ctx.pop();
return token;
}

protected Token findForward(String str, int pos, LexContext ctx) {
int start = pos;
int offset = 0;
while (pos < str.length() && offset < maxLength() && characterMatches(str, pos, offset)) {
Expand All @@ -106,7 +118,8 @@ public String getMessage() {
}

public void replacePlaceholders(Map<String, TokenType> tokens) {
int[] counter = {0};
id = 1;
int[] counter = {2};
walk(t -> {
t.id = counter[0]++;
if (!(t instanceof ParentToken)) {
Expand Down Expand Up @@ -146,7 +159,7 @@ private void walk(Consumer<TokenType> each, Set<TokenType> visited) {
public abstract boolean lengthMatches(int length);
public abstract int minLength();
public abstract int maxLength();
public abstract List<Character> calcFirstCharacters();
protected abstract List<Character> calcFirstCharacters();

/**
* @return A list of all the characters which may appear as the first character of this token. Includes null if this token may be zero-length.
Expand Down
10 changes: 8 additions & 2 deletions src/redempt/redlex/debug/DebugEntry.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,15 @@ public class DebugEntry {
private int row;
private int col;
private int depth;
private int length;
private TokenType owner;

public DebugEntry(TokenType owner, int row, int col, int depth, int status) {
public DebugEntry(TokenType owner, int row, int col, int length, int depth, int status) {
this.owner = owner;
this.row = row;
this.col = col;
this.status = status;
this.length = length;
this.depth = depth;
}

Expand All @@ -43,6 +45,10 @@ public int getCol() {
return getCol();
}

public int getLength() {
return length;
}

/**
* @return The status of the step - 0 for begin, 1 for failure, 2 for success
*/
Expand Down Expand Up @@ -79,7 +85,7 @@ public String toString() {
for (int i = 0; i < depth; i++) {
out.append(" ");
}
out.append(owner.getName()).append(" ").append(getStatusString()).append(" at line ").append(row).append(", column ").append(col);
out.append(owner.getName()).append(" ").append(getStatusString()).append(" at line ").append(row).append(", column ").append(col).append(status == 2 ? " (length " + length + ")" : "");
return out.toString();
}

Expand Down
13 changes: 10 additions & 3 deletions src/redempt/redlex/debug/DebugHistory.java
Original file line number Diff line number Diff line change
Expand Up @@ -37,16 +37,16 @@ public void begin(TokenType type, int pos) {
if (type.getName() == null) {
return;
}
entries.add(new DebugEntry(type, breaks[pos][0], breaks[pos][1], stack.size(), 0));
entries.add(new DebugEntry(type, breaks[pos][0], breaks[pos][1], 0, stack.size(), 0));
stack.add(type);
}

public void result(TokenType type, int pos, boolean success) {
public void result(TokenType type, int pos, int length, boolean success) {
if (type.getName() == null) {
return;
}
stack.removeLast();
entries.add(new DebugEntry(type, breaks[pos][0], breaks[pos][1], stack.size(), success ? 2 : 1));
entries.add(new DebugEntry(type, breaks[pos][0], breaks[pos][1], length, stack.size(), success ? 2 : 1));
}

public DebugHistory filter(String... names) {
Expand All @@ -60,6 +60,13 @@ public DebugHistory filter(Predicate<String> nameFilter) {
return this;
}

public List<DebugEntry> getEntries() {
return entries;
}

/**
* @return A formatted description of all tokenizing actions
*/
@Override
public String toString() {
return entries.stream().map(DebugEntry::toString).collect(Collectors.joining("\n"));
Expand Down
10 changes: 5 additions & 5 deletions src/redempt/redlex/debug/DebugToken.java
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,10 @@ public boolean characterMatches(String input, int pos, int offset) {
}

@Override
public Token findForward(String str, int pos, LexContext ctx) {
protected Token findForward(String str, int pos, LexContext ctx) {
history.begin(this, pos);
Token value = child.findForward(str, pos, ctx);
history.result(this, pos, value != null);
Token value = child.tryTokenize(str, pos, ctx);
history.result(this, pos, value == null ? 0 : value.length(), value != null);
return value;
}

Expand All @@ -57,8 +57,8 @@ public int maxLength() {
}

@Override
public List<Character> calcFirstCharacters() {
return child.calcFirstCharacters();
protected List<Character> calcFirstCharacters() {
return child.getFirstCharacters();
}

@Override
Expand Down
6 changes: 3 additions & 3 deletions src/redempt/redlex/processing/Lexer.java
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ public void setRuleByName(CullStrategy strategy, String... names) {
*/
public Token tokenize(String str, boolean errorOnFail) {
LexContext ctx = new LexContext();
Token inst = root.findForward(str, 0, ctx);
Token inst = root.tryTokenize(str, 0, ctx);
if (inst == null || inst.length() != str.length()) {
if (!errorOnFail) {
return null;
Expand All @@ -124,8 +124,8 @@ public Token tokenize(String str, boolean errorOnFail) {
type = type == null ? root : type;
int[] pos = cursorPos(str, ctx.getLastPos());
String[] split = str.split("\n");
throw new LexException(type.getMessage() + " token on line " + (pos[0] + 1) + ", column " + pos[1] + ": " + type
+ "\n" + split[pos[0]]
throw new LexException(type.getMessage() + " token on line " + (pos[0] + 1) + ", column " + (pos[1] + 1) + ": " + type
+ "\n" + (split.length == 0 ? "" : split[pos[0]])
+ "\n" + repeat(" ", pos[1]) + "^");
}
return inst;
Expand Down
2 changes: 1 addition & 1 deletion src/redempt/redlex/token/CharGroupToken.java
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ public int maxLength() {
}

@Override
public List<Character> calcFirstCharacters() {
protected List<Character> calcFirstCharacters() {
if (inverted) {
return Collections.singletonList(null);
}
Expand Down
2 changes: 1 addition & 1 deletion src/redempt/redlex/token/CharSetToken.java
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ public int maxLength() {
}

@Override
public List<Character> calcFirstCharacters() {
protected List<Character> calcFirstCharacters() {
return inverted ? Collections.singletonList(null) : new ArrayList<>(chars);
}

Expand Down
8 changes: 4 additions & 4 deletions src/redempt/redlex/token/ChoiceToken.java
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ private void initLength() {
if (minLength != -1) {
return;
}
minLength = 0;
for (TokenType child : children) {
minLength = Math.min(child.minLength(), minLength);
maxLength = Math.max(child.maxLength(), maxLength);
Expand All @@ -47,8 +48,7 @@ public boolean characterMatches(String input, int pos, int offset) {
}

@Override
public Token findForward(String str, int pos, LexContext ctx) {
ctx.update(pos, this);
protected Token findForward(String str, int pos, LexContext ctx) {
initMap();
if (pos < str.length()) {
char c = str.charAt(pos);
Expand All @@ -67,7 +67,7 @@ private Token tryParse(List<TokenType> children, String str, int pos, LexContext
return null;
}
for (TokenType child : children) {
Token inst = child.findForward(str, pos, ctx);
Token inst = child.tryTokenize(str, pos, ctx);
if (inst != null) {
return new Token(this, inst.getBaseString(), inst.getStart(), inst.getEnd(), new Token[] {inst});
}
Expand All @@ -94,7 +94,7 @@ public int maxLength() {
}

@Override
public List<Character> calcFirstCharacters() {
protected List<Character> calcFirstCharacters() {
Set<Character> chars = new HashSet<>();
for (TokenType child : children) {
chars.addAll(child.getFirstCharacters());
Expand Down
Loading

0 comments on commit 798e0fd

Please sign in to comment.