Permalink
Browse files

Merge branch 'fix-76'

  • Loading branch information...
2 parents cae9f7b + c592e41 commit 2242948c03577d0a58961729e5aca683ec842fce @sharwell sharwell committed Mar 27, 2013
View
12 runtime/Java/src/org/antlr/v4/runtime/misc/Utils.java
@@ -53,6 +53,18 @@
return buf.toString();
}
+ public static <T> String join(T[] array, String separator) {
+ StringBuilder builder = new StringBuilder();
+ for (int i = 0; i < array.length; i++) {
+ builder.append(array[i]);
+ if (i < array.length - 1) {
+ builder.append(separator);
+ }
+ }
+
+ return builder.toString();
+ }
+
public static int numNonnull(Object[] data) {
int n = 0;
if ( data == null ) return n;
View
15 tool/resources/org/antlr/v4/tool/templates/codegen/Java/Java.stg
@@ -817,10 +817,23 @@ public class <lexer.name> extends <superClass> {
}
>>
-
SerializedATN(model) ::= <<
+<if(rest(model.segments))>
+<! requires segmented representation !>
+private static final int _serializedATNSegments = <length(model.segments)>;
+<model.segments:{segment|private static final String _serializedATNSegment<i0> =
+ "<segment; wrap={"+<\n><\t>"}>";}; separator="\n">
+public static final String _serializedATN = Utils.join(
+ new String[] {
+ <model.segments:{segment | _serializedATNSegment<i0>}; separator=",\n">
+ },
+ ""
+);
+<else>
+<! only one segment, can be inlined !>
public static final String _serializedATN =
"<model.serialized; wrap={"+<\n><\t>"}>";
+<endif>
public static final ATN _ATN =
ATNSimulator.deserialize(_serializedATN.toCharArray());
static {
View
5 tool/src/org/antlr/v4/automata/ATNSerializer.java
@@ -263,7 +263,10 @@ else if ( s instanceof BlockStartState ) {
// don't adjust the first value since that's the version number
for (int i = 1; i < data.size(); i++) {
- assert data.get(i) >= -1 && data.get(i) < 0xFFFF;
+ if (data.get(i) < -1 || data.get(i) > 0xFFFE) {
+ throw new UnsupportedOperationException("Serialized ATN data element out of range.");
+ }
+
int value = (data.get(i) + 2) & 0xFFFF;
if (value == 0xFFFF) {
value = -1;
View
7 tool/src/org/antlr/v4/codegen/JavaTarget.java
@@ -164,6 +164,13 @@ public String encodeIntAsCharEscape(int v) {
}
@Override
+ public int getSerializedATNSegmentLimit() {
+ // 65535 is the class file format byte limit for a UTF-8 encoded string literal
+ // 3 is the maximum number of bytes it takes to encode a value in the range 0-0xFFFF
+ return 65535 / 3;
+ }
+
+ @Override
protected boolean visibleGrammarSymbolCausesIssueInGeneratedCode(GrammarAST idNode) {
return getBadWords().contains(idNode.getText());
}
View
13 tool/src/org/antlr/v4/codegen/Target.java
@@ -31,6 +31,7 @@
package org.antlr.v4.codegen;
import org.antlr.v4.codegen.model.RuleFunction;
+import org.antlr.v4.codegen.model.SerializedATN;
import org.antlr.v4.misc.Utils;
import org.antlr.v4.parse.ANTLRParser;
import org.antlr.v4.runtime.Token;
@@ -287,6 +288,18 @@ public String getElementName(String name) {
return getTokenTypeAsTargetLabel(getCodeGenerator().g, ttype);
}
+ /**
+ * Gets the maximum number of 16-bit unsigned integers that can be encoded
+ * in a single segment of the serialized ATN.
+ *
+ * @see SerializedATN#getSegments
+ *
+ * @return the serialized ATN segment limit
+ */
+ public int getSerializedATNSegmentLimit() {
+ return Integer.MAX_VALUE;
+ }
+
public boolean grammarSymbolCausesIssueInGeneratedCode(GrammarAST idNode) {
switch (idNode.getParent().getType()) {
case ANTLRParser.ASSIGN:
View
11 tool/src/org/antlr/v4/codegen/model/SerializedATN.java
@@ -51,4 +51,15 @@ public SerializedATN(OutputModelFactory factory, ATN atn) {
}
// System.out.println(ATNSerializer.getDecoded(factory.getGrammar(), atn));
}
+
+ public String[][] getSegments() {
+ List<String[]> segments = new ArrayList<String[]>();
+ int segmentLimit = factory.getGenerator().getTarget().getSerializedATNSegmentLimit();
+ for (int i = 0; i < serialized.size(); i += segmentLimit) {
+ List<String> currentSegment = serialized.subList(i, Math.min(i + segmentLimit, serialized.size()));
+ segments.add(currentSegment.toArray(new String[currentSegment.size()]));
+ }
+
+ return segments.toArray(new String[segments.size()][]);
+ }
}
View
22 tool/test/org/antlr/v4/test/TestLexerExec.java
@@ -693,6 +693,28 @@ public void testPositionAdjustingLexer() throws Exception {
assertEquals(expecting, found);
}
+ /**
+ * This is a regression test for antlr/antlr4#76 "Serialized ATN strings
+ * should be split when longer than 2^16 bytes (class file limitation)"
+ * https://github.com/antlr/antlr4/issues/76
+ */
+ @Test
+ public void testLargeLexer() throws Exception {
+ StringBuilder grammar = new StringBuilder();
+ grammar.append("lexer grammar L;\n");
+ grammar.append("WS : [ \\t\\r\\n]+ -> skip;\n");
+ for (int i = 0; i < 4000; i++) {
+ grammar.append("KW").append(i).append(" : '").append("KW").append(i).append("';\n");
+ }
+
+ String input = "KW400";
+ String found = execLexer("L.g4", grammar.toString(), "L", input);
+ String expecting =
+ "[@0,0:4='KW400',<402>,1:0]\n" +
+ "[@1,5:4='<EOF>',<-1>,1:5]\n";
+ assertEquals(expecting, found);
+ }
+
protected String load(String fileName, @Nullable String encoding)
throws IOException
{

0 comments on commit 2242948

Please sign in to comment.