Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
351 changes: 148 additions & 203 deletions java/src/org/antlr/codebuff/CollectFeatures.java

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions java/src/org/antlr/codebuff/Corpus.java
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,8 @@ public void randomShuffleInPlace() {
public void buildTokenContextIndex() {
curAndPrevTokenRuleIndexToVectorsMap = new HashMap<>();
for (int i=0; i<X.size(); i++) {
int curTokenRuleIndex = X.get(i)[CollectFeatures.INDEX_RULE];
int prevTokenRuleIndex = X.get(i)[CollectFeatures.INDEX_PREV_RULE];
int curTokenRuleIndex = X.get(i)[CollectFeatures.INDEX_PREV_EARLIEST_RIGHT_ANCESTOR];
int prevTokenRuleIndex = X.get(i)[CollectFeatures.INDEX_EARLIEST_LEFT_ANCESTOR];
int pr = CollectFeatures.unrulealt(prevTokenRuleIndex)[0];
int cr = CollectFeatures.unrulealt(curTokenRuleIndex)[0];
Pair<Integer, Integer> key = new Pair<>(pr, cr);
Expand Down
2 changes: 1 addition & 1 deletion java/src/org/antlr/codebuff/FeatureType.java
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package org.antlr.codebuff;

public enum FeatureType {
TOKEN(12), RULE(14), INT(7), BOOL(5), COL(7),
TOKEN(12), RULE(14), INT(12), BOOL(5), COL(7),
INFO_FILE(15), INFO_LINE(4), INFO_CHARPOS(4),
UNUSED(0);
public int displayWidth;
Expand Down
58 changes: 30 additions & 28 deletions java/src/org/antlr/codebuff/Formatter.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,14 @@
import static org.antlr.codebuff.CollectFeatures.CAT_INDENT_FROM_ANCESTOR_FIRST_TOKEN;
import static org.antlr.codebuff.CollectFeatures.CAT_INJECT_NL;
import static org.antlr.codebuff.CollectFeatures.CAT_INJECT_WS;
import static org.antlr.codebuff.CollectFeatures.CAT_NO_ALIGNMENT;
import static org.antlr.codebuff.CollectFeatures.FEATURES_ALIGN;
import static org.antlr.codebuff.CollectFeatures.FEATURES_INJECT_WS;
import static org.antlr.codebuff.CollectFeatures.INDEX_FIRST_ON_LINE;
import static org.antlr.codebuff.CollectFeatures.INDEX_PREV_END_COLUMN;
import static org.antlr.codebuff.CollectFeatures.INDEX_MATCHING_TOKEN_DIFF_LINE;
import static org.antlr.codebuff.CollectFeatures.MAX_CONTEXT_DIFF_THRESHOLD;
import static org.antlr.codebuff.CollectFeatures.earliestAncestorStartingWithToken;
import static org.antlr.codebuff.CollectFeatures.getMatchingSymbolOnDiffLine;
import static org.antlr.codebuff.CollectFeatures.getNodeFeatures;
import static org.antlr.codebuff.CollectFeatures.getRealTokens;
import static org.antlr.codebuff.CollectFeatures.getTokensOnPreviousLine;
Expand All @@ -46,7 +48,6 @@ public class Formatter {
protected Vector<TokenPositionAnalysis> analysis = new Vector<>();

protected CodekNNClassifier nlwsClassifier;
protected CodekNNClassifier wsClassifier;
protected CodekNNClassifier alignClassifier;
protected int k;

Expand Down Expand Up @@ -112,13 +113,14 @@ public String format() {
public void processToken(int indexIntoRealTokens, int tokenIndexInStream) {
CommonToken curToken = (CommonToken)tokens.get(tokenIndexInStream);
String tokText = curToken.getText();
TerminalNode node = tokenToNodeMap.get(curToken);

emitCommentsToTheLeft(tokenIndexInStream);

int[] features = getNodeFeatures(tokenToNodeMap, doc, tokenIndexInStream, line, tabSize);
// must set "prev end column" value as token stream doesn't have it;
// we're tracking it as we emit tokens
features[INDEX_PREV_END_COLUMN] = charPosInLine;
// features[INDEX_PREV_END_COLUMN] = charPosInLine;

int injectNL_WS = nlwsClassifier.classify(k, features, corpus.injectWhitespace, MAX_CONTEXT_DIFF_THRESHOLD);
int newlines = 0;
Expand All @@ -130,23 +132,15 @@ else if ( (injectNL_WS&0xFF)==CAT_INJECT_WS ) {
ws = CollectFeatures.unwscat(injectNL_WS);
}

// getNodeFeatures() also doesn't know what line curToken is on. If \n, we need to find exemplars that start a line
features[INDEX_FIRST_ON_LINE] = newlines; // use \n prediction to match exemplars for alignment

int align = alignClassifier.classify(k, features, corpus.align, MAX_CONTEXT_DIFF_THRESHOLD);

TokenPositionAnalysis tokenPositionAnalysis =
getTokenAnalysis(features, indexIntoRealTokens, tokenIndexInStream, newlines, align, ws);
analysis.setSize(tokenIndexInStream+1);
analysis.set(tokenIndexInStream, tokenPositionAnalysis);

if ( ws==0 && cannotJoin(realTokens.get(indexIntoRealTokens-1), curToken) ) { // failsafe!
ws = 1;
}

int align = CAT_NO_ALIGNMENT;

if ( newlines>0 ) {
output.append(Tool.newlines(newlines));
line++;
line+=newlines;
charPosInLine = 0;

List<Token> tokensOnPreviousLine = getTokensOnPreviousLine(tokens, tokenIndexInStream, line);
Expand All @@ -155,9 +149,15 @@ else if ( (injectNL_WS&0xFF)==CAT_INJECT_WS ) {
firstTokenOnPrevLine = tokensOnPreviousLine.get(0);
}

TerminalNode node = tokenToNodeMap.get(curToken);
ParserRuleContext parent = (ParserRuleContext)node.getParent();

// getNodeFeatures() doesn't know what line curToken is on. If \n, we need to find exemplars that start a line
features[INDEX_FIRST_ON_LINE] = newlines>0 ? 1 : 0; // use \n prediction to match exemplars for alignment
// if we decide to inject a newline, we better recompute this value before classifying alignment
features[INDEX_MATCHING_TOKEN_DIFF_LINE] = getMatchingSymbolOnDiffLine(doc, node, line);

align = alignClassifier.classify(k, features, corpus.align, MAX_CONTEXT_DIFF_THRESHOLD);

if ( align==CAT_INDENT ) {
if ( firstTokenOnPrevLine!=null ) { // if not on first line, we cannot indent
int indentedCol = firstTokenOnPrevLine.getCharPositionInLine()+INDENT_LEVEL;
Expand All @@ -169,10 +169,7 @@ else if ( (align&0xFF)==CAT_ALIGN_WITH_ANCESTOR_CHILD ) {
int[] deltaChild = CollectFeatures.unaligncat(align);
int deltaFromAncestor = deltaChild[0];
int childIndex = deltaChild[1];
ParserRuleContext earliestLeftAncestor = earliestAncestorStartingWithToken(parent, curToken);
if ( earliestLeftAncestor==null ) {
earliestLeftAncestor = parent;
}
ParserRuleContext earliestLeftAncestor = earliestAncestorStartingWithToken(node, curToken);
ParserRuleContext ancestor = CollectFeatures.getAncestor(earliestLeftAncestor, deltaFromAncestor);
ParseTree child = ancestor.getChild(childIndex);
Token start = null;
Expand All @@ -194,10 +191,7 @@ else if ( child instanceof TerminalNode ){
}
else if ( (align&0xFF)==CAT_INDENT_FROM_ANCESTOR_FIRST_TOKEN ) {
int deltaFromAncestor = CollectFeatures.unindentcat(align);
ParserRuleContext earliestLeftAncestor = earliestAncestorStartingWithToken(parent, curToken);
if ( earliestLeftAncestor==null ) {
earliestLeftAncestor = parent;
}
ParserRuleContext earliestLeftAncestor = earliestAncestorStartingWithToken(node, curToken);
ParserRuleContext ancestor = CollectFeatures.getAncestor(earliestLeftAncestor, deltaFromAncestor);
Token start = ancestor.getStart();
int indentCol = start.getCharPositionInLine() + INDENT_LEVEL;
Expand All @@ -211,6 +205,11 @@ else if ( (align&0xFF)==CAT_INDENT_FROM_ANCESTOR_FIRST_TOKEN ) {
charPosInLine += ws;
}

TokenPositionAnalysis tokenPositionAnalysis =
getTokenAnalysis(features, indexIntoRealTokens, tokenIndexInStream, newlines, align, ws);
analysis.setSize(tokenIndexInStream+1);
analysis.set(tokenIndexInStream, tokenPositionAnalysis);

// update Token object with position information now that we are about
// to emit it.
curToken.setLine(line);
Expand All @@ -227,6 +226,10 @@ else if ( (align&0xFF)==CAT_INDENT_FROM_ANCESTOR_FIRST_TOKEN ) {
/** Look into the token stream to get the comments to the left of current
* token. Emit all whitespace and comments except for whitespace at the
* end as we'll inject that per newline prediction.
*
* This assumes we are grooming not totally reformatting.
* We able to see original input stream for comment purposes. With all
* whitespace removed, we can't emit this stuff properly at moment.
*/
public void emitCommentsToTheLeft(int tokenIndexInStream) {
List<Token> hiddenTokensToLeft = tokens.getHiddenTokensToLeft(tokenIndexInStream);
Expand Down Expand Up @@ -270,7 +273,7 @@ public void emitCommentsToTheLeft(int tokenIndexInStream) {

public TokenPositionAnalysis getTokenAnalysis(int[] features, int indexIntoRealTokens, int tokenIndexInStream,
int injectNewline,
int alignWithPrevious,
int align,
int ws)
{
CommonToken curToken = (CommonToken)tokens.get(tokenIndexInStream);
Expand All @@ -286,12 +289,11 @@ public TokenPositionAnalysis getTokenAnalysis(int[] features, int indexIntoRealT

boolean prevIsWS = prevToken.getChannel()==Token.HIDDEN_CHANNEL; // assume this means whitespace
int actualNL = Tool.count(prevToken.getText(), '\n');
int actualWS = Tool.count(prevToken.getText(), ' ');
String newlinePredictionString = String.format("### line %d: predicted %d \\n actual %s",
String newlinePredictionString = String.format("### line %d: predicted %d \\n actual ?",
originalCurToken.getLine(), injectNewline, prevIsWS ? actualNL : "none");
String alignPredictionString = String.format("### line %d: predicted %s actual %s",
String alignPredictionString = String.format("### line %d: predicted %d actual %s",
originalCurToken.getLine(),
alignWithPrevious==1?"align":"unaligned",
align,
"?");

String newlineAnalysis = newlinePredictionString+"\n"+
Expand Down
2 changes: 1 addition & 1 deletion java/src/org/antlr/codebuff/Neighbor.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ public String toString(FeatureMetaData[] FEATURES, List<Integer> Y) {
int[] X = corpus.X.get(corpusVectorIndex);
InputDocument doc = corpus.documents.get(corpusVectorIndex);
String features = CollectFeatures._toString(FEATURES, doc, X);
int line = CollectFeatures.getInfoLine(X);
int line = X[CollectFeatures.INDEX_INFO_LINE];
String lineText = doc.getLine(line);
int col = X[CollectFeatures.INDEX_INFO_CHARPOS];
// insert a dot right before char position
Expand Down
4 changes: 2 additions & 2 deletions java/src/org/antlr/codebuff/kNNClassifier.java
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,8 @@ public Neighbor[] kNN(int[] unknown, int k, double distanceThreshold) {
}

public Neighbor[] distances(int[] unknown, double distanceThreshold) {
int curTokenRuleIndex = unknown[CollectFeatures.INDEX_RULE];
int prevTokenRuleIndex = unknown[CollectFeatures.INDEX_PREV_RULE];
int curTokenRuleIndex = unknown[CollectFeatures.INDEX_PREV_EARLIEST_RIGHT_ANCESTOR];
int prevTokenRuleIndex = unknown[CollectFeatures.INDEX_EARLIEST_LEFT_ANCESTOR];
int pr = CollectFeatures.unrulealt(prevTokenRuleIndex)[0];
int cr = CollectFeatures.unrulealt(curTokenRuleIndex)[0];
Pair<Integer, Integer> key = new Pair<>(pr, cr);
Expand Down