Skip to content

Commit

Permalink
Fix some issues with painless's strings (#22393)
Browse files Browse the repository at this point in the history
1. Escape sequences we're working. For example `\\` is now correctly
interpreted as `\` instead of `\\`. Same with `\'` being `'` and
`\"` being `"`.
2. `'` delimited strings weren't allowed to contain `"`s but it looked
like they were intended to support it. Now they do.
3. Improves the error message when the script contains an invalid
escape sequence inside a string to include a list of the valid
escape sequences.

Closes #22372
  • Loading branch information
nik9000 committed Jan 6, 2017
1 parent a487b90 commit f24ca51
Show file tree
Hide file tree
Showing 6 changed files with 134 additions and 59 deletions.
2 changes: 1 addition & 1 deletion modules/lang-painless/src/main/antlr/PainlessLexer.g4
Expand Up @@ -107,7 +107,7 @@ HEX: '0' [xX] [0-9a-fA-F]+ [lL]?;
INTEGER: ( '0' | [1-9] [0-9]* ) [lLfFdD]?;
DECIMAL: ( '0' | [1-9] [0-9]* ) (DOT [0-9]+)? ( [eE] [+\-]? [0-9]+ )? [fFdD]?;

STRING: ( '"' ( '\\"' | '\\\\' | ~[\\"] )*? '"' ) | ( '\'' ( '\\\'' | '\\\\' | ~[\\"] )*? '\'' );
STRING: ( '"' ( '\\"' | '\\\\' | ~[\\"] )*? '"' ) | ( '\'' ( '\\\'' | '\\\\' | ~[\\'] )*? '\'' );
REGEX: '/' ( ~('/' | '\n') | '\\' ~'\n' )+ '/' [cilmsUux]* { SlashStrategy.slashIsRegex(this) }?;

TRUE: 'true';
Expand Down
Expand Up @@ -29,12 +29,13 @@
import org.elasticsearch.painless.Location;

/**
* A lexer that is customized for painless. It will:
* A lexer that is customized for painless. It:
* <ul>
* <li>will override the default error behavior to fail on the first error
* <li>store the last token in case we need to do lookbehind for semicolon insertion and regex vs division detection
* <li>insert semicolons where they'd improve the language's readability. Rather than hack this into the parser and create a ton of
* <li>Overrides the default error behavior to fail on the first error
* <li>Stores the last token in case we need to do lookbehind for semicolon insertion and regex vs division detection
* <li>Insert semicolons where they'd improve the language's readability. Rather than hack this into the parser and create a ton of
* ambiguity we hack them here where we can use heuristics to do it quickly.
* <li>Enhances the error message when a string contains invalid escape sequences to include a list of valid escape sequences.
* </ul>
*/
final class EnhancedPainlessLexer extends PainlessLexer {
Expand Down Expand Up @@ -77,7 +78,15 @@ public void recover(final LexerNoViableAltException lnvae) {
final String text = charStream.getText(Interval.of(startIndex, charStream.index()));

Location location = new Location(sourceName, _tokenStartCharIndex);
throw location.createError(new IllegalArgumentException("unexpected character [" + getErrorDisplay(text) + "].", lnvae));
String message = "unexpected character [" + getErrorDisplay(text) + "].";
char firstChar = text.charAt(0);
if ((firstChar == '\'' || firstChar == '"') && text.length() - 2 > 0 && text.charAt(text.length() - 2) == '\\') {
/* Use a simple heuristic to guess if the unrecognized characters were trying to be a string but has a broken escape sequence.
* If it was add an extra message about valid string escape sequences. */
message += " The only valid escape sequences in strings starting with [" + firstChar + "] are [\\\\] and [\\"
+ firstChar + "].";
}
throw location.createError(new IllegalArgumentException(message, lnvae));
}

private static boolean insertSemicolon(Token previous, Token next) {
Expand Down
Expand Up @@ -203,51 +203,51 @@ private boolean TYPE_sempred(RuleContext _localctx, int predIndex) {
"\348\35:\36<\37> @!B\"D#F$H%J&L\'N(P)R*T+V,X-Z.\\/^\60`\61b\62d\63f\64"+
"h\65j\66l\67n8p9r:t;v<x=z>|?~@\u0080A\u0082B\u0084C\u0086D\u0088E\u008a"+
"F\u008cG\u008eH\u0090I\u0092J\u0094K\u0096L\u0098M\u009aN\u009cO\u009e"+
"P\u00a0Q\u00a2R\u00a4S\u00a6T\u00a8U\u00aaV\4\2\3\24\5\2\13\f\17\17\""+
"P\u00a0Q\u00a2R\u00a4S\u00a6T\u00a8U\u00aaV\4\2\3\25\5\2\13\f\17\17\""+
"\"\4\2\f\f\17\17\3\2\629\4\2NNnn\4\2ZZzz\5\2\62;CHch\3\2\63;\3\2\62;\b"+
"\2FFHHNNffhhnn\4\2GGgg\4\2--//\6\2FFHHffhh\4\2$$^^\4\2\f\f\61\61\3\2\f"+
"\f\t\2WWeekknouuwwzz\5\2C\\aac|\6\2\62;C\\aac|\u0277\2\4\3\2\2\2\2\6\3"+
"\2\2\2\2\b\3\2\2\2\2\n\3\2\2\2\2\f\3\2\2\2\2\16\3\2\2\2\2\20\3\2\2\2\2"+
"\22\3\2\2\2\2\24\3\2\2\2\2\26\3\2\2\2\2\30\3\2\2\2\2\32\3\2\2\2\2\34\3"+
"\2\2\2\2\36\3\2\2\2\2 \3\2\2\2\2\"\3\2\2\2\2$\3\2\2\2\2&\3\2\2\2\2(\3"+
"\2\2\2\2*\3\2\2\2\2,\3\2\2\2\2.\3\2\2\2\2\60\3\2\2\2\2\62\3\2\2\2\2\64"+
"\3\2\2\2\2\66\3\2\2\2\28\3\2\2\2\2:\3\2\2\2\2<\3\2\2\2\2>\3\2\2\2\2@\3"+
"\2\2\2\2B\3\2\2\2\2D\3\2\2\2\2F\3\2\2\2\2H\3\2\2\2\2J\3\2\2\2\2L\3\2\2"+
"\2\2N\3\2\2\2\2P\3\2\2\2\2R\3\2\2\2\2T\3\2\2\2\2V\3\2\2\2\2X\3\2\2\2\2"+
"Z\3\2\2\2\2\\\3\2\2\2\2^\3\2\2\2\2`\3\2\2\2\2b\3\2\2\2\2d\3\2\2\2\2f\3"+
"\2\2\2\2h\3\2\2\2\2j\3\2\2\2\2l\3\2\2\2\2n\3\2\2\2\2p\3\2\2\2\2r\3\2\2"+
"\2\2t\3\2\2\2\2v\3\2\2\2\2x\3\2\2\2\2z\3\2\2\2\2|\3\2\2\2\2~\3\2\2\2\2"+
"\u0080\3\2\2\2\2\u0082\3\2\2\2\2\u0084\3\2\2\2\2\u0086\3\2\2\2\2\u0088"+
"\3\2\2\2\2\u008a\3\2\2\2\2\u008c\3\2\2\2\2\u008e\3\2\2\2\2\u0090\3\2\2"+
"\2\2\u0092\3\2\2\2\2\u0094\3\2\2\2\2\u0096\3\2\2\2\2\u0098\3\2\2\2\2\u009a"+
"\3\2\2\2\2\u009c\3\2\2\2\2\u009e\3\2\2\2\2\u00a0\3\2\2\2\2\u00a2\3\2\2"+
"\2\2\u00a4\3\2\2\2\2\u00a6\3\2\2\2\3\u00a8\3\2\2\2\3\u00aa\3\2\2\2\4\u00ad"+
"\3\2\2\2\6\u00c8\3\2\2\2\b\u00cc\3\2\2\2\n\u00ce\3\2\2\2\f\u00d0\3\2\2"+
"\2\16\u00d2\3\2\2\2\20\u00d4\3\2\2\2\22\u00d6\3\2\2\2\24\u00d8\3\2\2\2"+
"\26\u00dc\3\2\2\2\30\u00e1\3\2\2\2\32\u00e3\3\2\2\2\34\u00e5\3\2\2\2\36"+
"\u00e8\3\2\2\2 \u00eb\3\2\2\2\"\u00f0\3\2\2\2$\u00f6\3\2\2\2&\u00f9\3"+
"\2\2\2(\u00fd\3\2\2\2*\u0106\3\2\2\2,\u010c\3\2\2\2.\u0113\3\2\2\2\60"+
"\u0117\3\2\2\2\62\u011b\3\2\2\2\64\u0121\3\2\2\2\66\u0127\3\2\2\28\u012c"+
"\3\2\2\2:\u0137\3\2\2\2<\u0139\3\2\2\2>\u013b\3\2\2\2@\u013d\3\2\2\2B"+
"\u0140\3\2\2\2D\u0142\3\2\2\2F\u0144\3\2\2\2H\u0146\3\2\2\2J\u0149\3\2"+
"\2\2L\u014c\3\2\2\2N\u0150\3\2\2\2P\u0152\3\2\2\2R\u0155\3\2\2\2T\u0157"+
"\3\2\2\2V\u015a\3\2\2\2X\u015d\3\2\2\2Z\u0161\3\2\2\2\\\u0164\3\2\2\2"+
"^\u0168\3\2\2\2`\u016a\3\2\2\2b\u016c\3\2\2\2d\u016e\3\2\2\2f\u0171\3"+
"\2\2\2h\u0174\3\2\2\2j\u0176\3\2\2\2l\u0178\3\2\2\2n\u017b\3\2\2\2p\u017e"+
"\3\2\2\2r\u0181\3\2\2\2t\u0184\3\2\2\2v\u0188\3\2\2\2x\u018b\3\2\2\2z"+
"\u018e\3\2\2\2|\u0190\3\2\2\2~\u0193\3\2\2\2\u0080\u0196\3\2\2\2\u0082"+
"\u0199\3\2\2\2\u0084\u019c\3\2\2\2\u0086\u019f\3\2\2\2\u0088\u01a2\3\2"+
"\2\2\u008a\u01a5\3\2\2\2\u008c\u01a8\3\2\2\2\u008e\u01ac\3\2\2\2\u0090"+
"\u01b0\3\2\2\2\u0092\u01b5\3\2\2\2\u0094\u01be\3\2\2\2\u0096\u01d0\3\2"+
"\2\2\u0098\u01dd\3\2\2\2\u009a\u020d\3\2\2\2\u009c\u020f\3\2\2\2\u009e"+
"\u0220\3\2\2\2\u00a0\u0225\3\2\2\2\u00a2\u022b\3\2\2\2\u00a4\u0230\3\2"+
"\2\2\u00a6\u023b\3\2\2\2\u00a8\u024a\3\2\2\2\u00aa\u024e\3\2\2\2\u00ac"+
"\u00ae\t\2\2\2\u00ad\u00ac\3\2\2\2\u00ae\u00af\3\2\2\2\u00af\u00ad\3\2"+
"\2\2\u00af\u00b0\3\2\2\2\u00b0\u00b1\3\2\2\2\u00b1\u00b2\b\2\2\2\u00b2"+
"\5\3\2\2\2\u00b3\u00b4\7\61\2\2\u00b4\u00b5\7\61\2\2\u00b5\u00b9\3\2\2"+
"\2\u00b6\u00b8\13\2\2\2\u00b7\u00b6\3\2\2\2\u00b8\u00bb\3\2\2\2\u00b9"+
"\u00ba\3\2\2\2\u00b9\u00b7\3\2\2\2\u00ba\u00bc\3\2\2\2\u00bb\u00b9\3\2"+
"\2\2\u00bc\u00c9\t\3\2\2\u00bd\u00be\7\61\2\2\u00be\u00bf\7,\2\2\u00bf"+
"\2FFHHNNffhhnn\4\2GGgg\4\2--//\6\2FFHHffhh\4\2$$^^\4\2))^^\4\2\f\f\61"+
"\61\3\2\f\f\t\2WWeekknouuwwzz\5\2C\\aac|\6\2\62;C\\aac|\u0277\2\4\3\2"+
"\2\2\2\6\3\2\2\2\2\b\3\2\2\2\2\n\3\2\2\2\2\f\3\2\2\2\2\16\3\2\2\2\2\20"+
"\3\2\2\2\2\22\3\2\2\2\2\24\3\2\2\2\2\26\3\2\2\2\2\30\3\2\2\2\2\32\3\2"+
"\2\2\2\34\3\2\2\2\2\36\3\2\2\2\2 \3\2\2\2\2\"\3\2\2\2\2$\3\2\2\2\2&\3"+
"\2\2\2\2(\3\2\2\2\2*\3\2\2\2\2,\3\2\2\2\2.\3\2\2\2\2\60\3\2\2\2\2\62\3"+
"\2\2\2\2\64\3\2\2\2\2\66\3\2\2\2\28\3\2\2\2\2:\3\2\2\2\2<\3\2\2\2\2>\3"+
"\2\2\2\2@\3\2\2\2\2B\3\2\2\2\2D\3\2\2\2\2F\3\2\2\2\2H\3\2\2\2\2J\3\2\2"+
"\2\2L\3\2\2\2\2N\3\2\2\2\2P\3\2\2\2\2R\3\2\2\2\2T\3\2\2\2\2V\3\2\2\2\2"+
"X\3\2\2\2\2Z\3\2\2\2\2\\\3\2\2\2\2^\3\2\2\2\2`\3\2\2\2\2b\3\2\2\2\2d\3"+
"\2\2\2\2f\3\2\2\2\2h\3\2\2\2\2j\3\2\2\2\2l\3\2\2\2\2n\3\2\2\2\2p\3\2\2"+
"\2\2r\3\2\2\2\2t\3\2\2\2\2v\3\2\2\2\2x\3\2\2\2\2z\3\2\2\2\2|\3\2\2\2\2"+
"~\3\2\2\2\2\u0080\3\2\2\2\2\u0082\3\2\2\2\2\u0084\3\2\2\2\2\u0086\3\2"+
"\2\2\2\u0088\3\2\2\2\2\u008a\3\2\2\2\2\u008c\3\2\2\2\2\u008e\3\2\2\2\2"+
"\u0090\3\2\2\2\2\u0092\3\2\2\2\2\u0094\3\2\2\2\2\u0096\3\2\2\2\2\u0098"+
"\3\2\2\2\2\u009a\3\2\2\2\2\u009c\3\2\2\2\2\u009e\3\2\2\2\2\u00a0\3\2\2"+
"\2\2\u00a2\3\2\2\2\2\u00a4\3\2\2\2\2\u00a6\3\2\2\2\3\u00a8\3\2\2\2\3\u00aa"+
"\3\2\2\2\4\u00ad\3\2\2\2\6\u00c8\3\2\2\2\b\u00cc\3\2\2\2\n\u00ce\3\2\2"+
"\2\f\u00d0\3\2\2\2\16\u00d2\3\2\2\2\20\u00d4\3\2\2\2\22\u00d6\3\2\2\2"+
"\24\u00d8\3\2\2\2\26\u00dc\3\2\2\2\30\u00e1\3\2\2\2\32\u00e3\3\2\2\2\34"+
"\u00e5\3\2\2\2\36\u00e8\3\2\2\2 \u00eb\3\2\2\2\"\u00f0\3\2\2\2$\u00f6"+
"\3\2\2\2&\u00f9\3\2\2\2(\u00fd\3\2\2\2*\u0106\3\2\2\2,\u010c\3\2\2\2."+
"\u0113\3\2\2\2\60\u0117\3\2\2\2\62\u011b\3\2\2\2\64\u0121\3\2\2\2\66\u0127"+
"\3\2\2\28\u012c\3\2\2\2:\u0137\3\2\2\2<\u0139\3\2\2\2>\u013b\3\2\2\2@"+
"\u013d\3\2\2\2B\u0140\3\2\2\2D\u0142\3\2\2\2F\u0144\3\2\2\2H\u0146\3\2"+
"\2\2J\u0149\3\2\2\2L\u014c\3\2\2\2N\u0150\3\2\2\2P\u0152\3\2\2\2R\u0155"+
"\3\2\2\2T\u0157\3\2\2\2V\u015a\3\2\2\2X\u015d\3\2\2\2Z\u0161\3\2\2\2\\"+
"\u0164\3\2\2\2^\u0168\3\2\2\2`\u016a\3\2\2\2b\u016c\3\2\2\2d\u016e\3\2"+
"\2\2f\u0171\3\2\2\2h\u0174\3\2\2\2j\u0176\3\2\2\2l\u0178\3\2\2\2n\u017b"+
"\3\2\2\2p\u017e\3\2\2\2r\u0181\3\2\2\2t\u0184\3\2\2\2v\u0188\3\2\2\2x"+
"\u018b\3\2\2\2z\u018e\3\2\2\2|\u0190\3\2\2\2~\u0193\3\2\2\2\u0080\u0196"+
"\3\2\2\2\u0082\u0199\3\2\2\2\u0084\u019c\3\2\2\2\u0086\u019f\3\2\2\2\u0088"+
"\u01a2\3\2\2\2\u008a\u01a5\3\2\2\2\u008c\u01a8\3\2\2\2\u008e\u01ac\3\2"+
"\2\2\u0090\u01b0\3\2\2\2\u0092\u01b5\3\2\2\2\u0094\u01be\3\2\2\2\u0096"+
"\u01d0\3\2\2\2\u0098\u01dd\3\2\2\2\u009a\u020d\3\2\2\2\u009c\u020f\3\2"+
"\2\2\u009e\u0220\3\2\2\2\u00a0\u0225\3\2\2\2\u00a2\u022b\3\2\2\2\u00a4"+
"\u0230\3\2\2\2\u00a6\u023b\3\2\2\2\u00a8\u024a\3\2\2\2\u00aa\u024e\3\2"+
"\2\2\u00ac\u00ae\t\2\2\2\u00ad\u00ac\3\2\2\2\u00ae\u00af\3\2\2\2\u00af"+
"\u00ad\3\2\2\2\u00af\u00b0\3\2\2\2\u00b0\u00b1\3\2\2\2\u00b1\u00b2\b\2"+
"\2\2\u00b2\5\3\2\2\2\u00b3\u00b4\7\61\2\2\u00b4\u00b5\7\61\2\2\u00b5\u00b9"+
"\3\2\2\2\u00b6\u00b8\13\2\2\2\u00b7\u00b6\3\2\2\2\u00b8\u00bb\3\2\2\2"+
"\u00b9\u00ba\3\2\2\2\u00b9\u00b7\3\2\2\2\u00ba\u00bc\3\2\2\2\u00bb\u00b9"+
"\3\2\2\2\u00bc\u00c9\t\3\2\2\u00bd\u00be\7\61\2\2\u00be\u00bf\7,\2\2\u00bf"+
"\u00c3\3\2\2\2\u00c0\u00c2\13\2\2\2\u00c1\u00c0\3\2\2\2\u00c2\u00c5\3"+
"\2\2\2\u00c3\u00c4\3\2\2\2\u00c3\u00c1\3\2\2\2\u00c4\u00c6\3\2\2\2\u00c5"+
"\u00c3\3\2\2\2\u00c6\u00c7\7,\2\2\u00c7\u00c9\7\61\2\2\u00c8\u00b3\3\2"+
Expand Down Expand Up @@ -340,14 +340,14 @@ private boolean TYPE_sempred(RuleContext _localctx, int predIndex) {
"\u01fa\3\2\2\2\u01fc\u01ff\3\2\2\2\u01fd\u01fe\3\2\2\2\u01fd\u01fb\3\2"+
"\2\2\u01fe\u0200\3\2\2\2\u01ff\u01fd\3\2\2\2\u0200\u020e\7$\2\2\u0201"+
"\u0209\7)\2\2\u0202\u0203\7^\2\2\u0203\u0208\7)\2\2\u0204\u0205\7^\2\2"+
"\u0205\u0208\7^\2\2\u0206\u0208\n\16\2\2\u0207\u0202\3\2\2\2\u0207\u0204"+
"\u0205\u0208\7^\2\2\u0206\u0208\n\17\2\2\u0207\u0202\3\2\2\2\u0207\u0204"+
"\3\2\2\2\u0207\u0206\3\2\2\2\u0208\u020b\3\2\2\2\u0209\u020a\3\2\2\2\u0209"+
"\u0207\3\2\2\2\u020a\u020c\3\2\2\2\u020b\u0209\3\2\2\2\u020c\u020e\7)"+
"\2\2\u020d\u01f5\3\2\2\2\u020d\u0201\3\2\2\2\u020e\u009b\3\2\2\2\u020f"+
"\u0213\7\61\2\2\u0210\u0214\n\17\2\2\u0211\u0212\7^\2\2\u0212\u0214\n"+
"\20\2\2\u0213\u0210\3\2\2\2\u0213\u0211\3\2\2\2\u0214\u0215\3\2\2\2\u0215"+
"\u0213\7\61\2\2\u0210\u0214\n\20\2\2\u0211\u0212\7^\2\2\u0212\u0214\n"+
"\21\2\2\u0213\u0210\3\2\2\2\u0213\u0211\3\2\2\2\u0214\u0215\3\2\2\2\u0215"+
"\u0213\3\2\2\2\u0215\u0216\3\2\2\2\u0216\u0217\3\2\2\2\u0217\u021b\7\61"+
"\2\2\u0218\u021a\t\21\2\2\u0219\u0218\3\2\2\2\u021a\u021d\3\2\2\2\u021b"+
"\2\2\u0218\u021a\t\22\2\2\u0219\u0218\3\2\2\2\u021a\u021d\3\2\2\2\u021b"+
"\u0219\3\2\2\2\u021b\u021c\3\2\2\2\u021c\u021e\3\2\2\2\u021d\u021b\3\2"+
"\2\2\u021e\u021f\6N\3\2\u021f\u009d\3\2\2\2\u0220\u0221\7v\2\2\u0221\u0222"+
"\7t\2\2\u0222\u0223\7w\2\2\u0223\u0224\7g\2\2\u0224\u009f\3\2\2\2\u0225"+
Expand All @@ -357,14 +357,14 @@ private boolean TYPE_sempred(RuleContext _localctx, int predIndex) {
"\u0236\5\u00a6S\2\u0231\u0232\5\24\n\2\u0232\u0233\5\u00a6S\2\u0233\u0235"+
"\3\2\2\2\u0234\u0231\3\2\2\2\u0235\u0238\3\2\2\2\u0236\u0234\3\2\2\2\u0236"+
"\u0237\3\2\2\2\u0237\u0239\3\2\2\2\u0238\u0236\3\2\2\2\u0239\u023a\6R"+
"\4\2\u023a\u00a5\3\2\2\2\u023b\u023f\t\22\2\2\u023c\u023e\t\23\2\2\u023d"+
"\4\2\u023a\u00a5\3\2\2\2\u023b\u023f\t\23\2\2\u023c\u023e\t\24\2\2\u023d"+
"\u023c\3\2\2\2\u023e\u0241\3\2\2\2\u023f\u023d\3\2\2\2\u023f\u0240\3\2"+
"\2\2\u0240\u00a7\3\2\2\2\u0241\u023f\3\2\2\2\u0242\u024b\7\62\2\2\u0243"+
"\u0247\t\b\2\2\u0244\u0246\t\t\2\2\u0245\u0244\3\2\2\2\u0246\u0249\3\2"+
"\2\2\u0247\u0245\3\2\2\2\u0247\u0248\3\2\2\2\u0248\u024b\3\2\2\2\u0249"+
"\u0247\3\2\2\2\u024a\u0242\3\2\2\2\u024a\u0243\3\2\2\2\u024b\u024c\3\2"+
"\2\2\u024c\u024d\bT\4\2\u024d\u00a9\3\2\2\2\u024e\u0252\t\22\2\2\u024f"+
"\u0251\t\23\2\2\u0250\u024f\3\2\2\2\u0251\u0254\3\2\2\2\u0252\u0250\3"+
"\2\2\u024c\u024d\bT\4\2\u024d\u00a9\3\2\2\2\u024e\u0252\t\23\2\2\u024f"+
"\u0251\t\24\2\2\u0250\u024f\3\2\2\2\u0251\u0254\3\2\2\2\u0252\u0250\3"+
"\2\2\2\u0252\u0253\3\2\2\2\u0253\u0255\3\2\2\2\u0254\u0252\3\2\2\2\u0255"+
"\u0256\bU\4\2\u0256\u00ab\3\2\2\2$\2\3\u00af\u00b9\u00c3\u00c8\u01b9\u01bc"+
"\u01c3\u01c6\u01cd\u01d0\u01d3\u01da\u01dd\u01e3\u01e5\u01e9\u01ee\u01f0"+
Expand Down
Expand Up @@ -799,9 +799,27 @@ public ANode visitNull(NullContext ctx) {

@Override
public ANode visitString(StringContext ctx) {
String string = ctx.STRING().getText().substring(1, ctx.STRING().getText().length() - 1);
StringBuilder string = new StringBuilder(ctx.STRING().getText());

// Strip the leading and trailing quotes and replace the escape sequences with their literal equivalents
int src = 1;
int dest = 0;
int end = string.length() - 1;
assert string.charAt(0) == '"' || string.charAt(0) == '\'' : "expected string to start with a quote but was [" + string + "]";
assert string.charAt(end) == '"' || string.charAt(end) == '\'' : "expected string to end with a quote was [" + string + "]";
while (src < end) {
char current = string.charAt(src);
if (current == '\\') {
src++;
current = string.charAt(src);
}
string.setCharAt(dest, current);
src++;
dest++;
}
string.setLength(dest);

return new EString(location(ctx), string);
return new EString(location(ctx), string.toString());
}

@Override
Expand Down
Expand Up @@ -42,6 +42,7 @@ public void testReturnConstant() {
assertEquals((byte)255, exec("return (byte)255"));
assertEquals((short)5, exec("return (short)5"));
assertEquals("string", exec("return \"string\""));
assertEquals("string", exec("return 'string'"));
assertEquals(true, exec("return true"));
assertEquals(false, exec("return false"));
assertNull(exec("return null"));
Expand All @@ -55,6 +56,37 @@ public void testConstantCharTruncation() {
assertEquals('蚠', exec("return (char)100000;"));
}

public void testStringEscapes() {
// The readability of this test suffers from having to escape `\` and `"` in java strings. Please be careful. Sorry!
// `\\` is a `\`
assertEquals("\\string", exec("\"\\\\string\""));
assertEquals("\\string", exec("'\\\\string'"));
// `\"` is a `"` if surrounded by `"`s
assertEquals("\"string", exec("\"\\\"string\""));
Exception e = expectScriptThrows(IllegalArgumentException.class, () -> exec("'\\\"string'", false));
assertEquals("unexpected character ['\\\"]. The only valid escape sequences in strings starting with ['] are [\\\\] and [\\'].",
e.getMessage());
// `\'` is a `'` if surrounded by `'`s
e = expectScriptThrows(IllegalArgumentException.class, () -> exec("\"\\'string\"", false));
assertEquals("unexpected character [\"\\']. The only valid escape sequences in strings starting with [\"] are [\\\\] and [\\\"].",
e.getMessage());
assertEquals("'string", exec("'\\'string'"));
// We don't break native escapes like new line
assertEquals("\nstring", exec("\"\nstring\""));
assertEquals("\nstring", exec("'\nstring'"));

// And we're ok with strings with multiple escape sequences
assertEquals("\\str\"in\\g", exec("\"\\\\str\\\"in\\\\g\""));
assertEquals("st\\r'i\\ng", exec("'st\\\\r\\'i\\\\ng'"));
}

public void testStringTermination() {
// `'` inside of a string delimited with `"` should be ok
assertEquals("test'", exec("\"test'\""));
// `"` inside of a string delimited with `'` should be ok
assertEquals("test\"", exec("'test\"'"));
}

/** declaring variables for primitive types */
public void testDeclareVariable() {
assertEquals(5, exec("int i = 5; return i;"));
Expand Down
Expand Up @@ -267,4 +267,20 @@ public void testQuestionSpaceDotIsNotNullSafeDereference() {
assertEquals("invalid sequence of tokens near ['.'].", e.getMessage());
}

public void testBadStringEscape() {
Exception e = expectScriptThrows(IllegalArgumentException.class, () -> exec("'\\a'", false));
assertEquals("unexpected character ['\\a]. The only valid escape sequences in strings starting with ['] are [\\\\] and [\\'].",
e.getMessage());
e = expectScriptThrows(IllegalArgumentException.class, () -> exec("\"\\a\"", false));
assertEquals("unexpected character [\"\\a]. The only valid escape sequences in strings starting with [\"] are [\\\\] and [\\\"].",
e.getMessage());
}

public void testRegularUnexpectedCharacter() {
Exception e = expectScriptThrows(IllegalArgumentException.class, () -> exec("'", false));
assertEquals("unexpected character ['].", e.getMessage());
e = expectScriptThrows(IllegalArgumentException.class, () -> exec("'cat", false));
assertEquals("unexpected character ['cat].", e.getMessage());
}

}

0 comments on commit f24ca51

Please sign in to comment.