Skip to content

Commit

Permalink
Merge pull request eclipse-rdf4j#843 from jamesrdf/issues/eclipse-rdf…
Browse files Browse the repository at this point in the history
…4j#62-long-unicode

Fix eclipse-rdf4j#62: Decode both short and long unicode SPARQL escape
Signed-off-by: Heshan Jayasinghe <shanujse@gmail.com>
  • Loading branch information
James Leigh committed Jun 9, 2017
2 parents b9bb20f + bbc816e commit 893c4f2
Show file tree
Hide file tree
Showing 6 changed files with 272 additions and 64 deletions.
@@ -0,0 +1,115 @@
/* Generated By:JavaCC: Do not edit this line. CharStream.java Version 5.0 */
/* JavaCCOptions:STATIC=false,SUPPORT_CLASS_VISIBILITY_PUBLIC=true */
package org.eclipse.rdf4j.query.parser.sparql.ast;

/**
* This interface describes a character stream that maintains line and
* column number positions of the characters. It also has the capability
* to backup the stream to some extent. An implementation of this
* interface is used in the TokenManager implementation generated by
* JavaCCParser.
*
* All the methods except backup can be implemented in any fashion. backup
* needs to be implemented correctly for the correct operation of the lexer.
* Rest of the methods are all used to get information like line number,
* column number and the String that constitutes a token and are not used
* by the lexer. Hence their implementation won't affect the generated lexer's
* operation.
*/

public
interface CharStream {

/**
* Returns the next character from the selected input. The method
* of selecting the input is the responsibility of the class
* implementing this interface. Can throw any java.io.IOException.
*/
char readChar() throws java.io.IOException;

@Deprecated
/**
* Returns the column position of the character last read.
* @deprecated
* @see #getEndColumn
*/
int getColumn();

@Deprecated
/**
* Returns the line number of the character last read.
* @deprecated
* @see #getEndLine
*/
int getLine();

/**
* Returns the column number of the last character for current token (being
* matched after the last call to BeginTOken).
*/
int getEndColumn();

/**
* Returns the line number of the last character for current token (being
* matched after the last call to BeginTOken).
*/
int getEndLine();

/**
* Returns the column number of the first character for current token (being
* matched after the last call to BeginTOken).
*/
int getBeginColumn();

/**
* Returns the line number of the first character for current token (being
* matched after the last call to BeginTOken).
*/
int getBeginLine();

/**
* Backs up the input stream by amount steps. Lexer calls this method if it
* had already read some characters, but could not use them to match a
* (longer) token. So, they will be used again as the prefix of the next
* token and it is the implemetation's responsibility to do this right.
*/
void backup(int amount);

/**
* Returns the next character that marks the beginning of the next token.
* All characters must remain in the buffer between two successive calls
* to this method to implement backup correctly.
*/
char BeginToken() throws java.io.IOException;

/**
* Returns a string made up of characters from the marked token beginning
* to the current buffer position. Implementations have the choice of returning
* anything that they want to. For example, for efficiency, one might decide
* to just return null, which is a valid implementation.
*/
String GetImage();

/**
* Returns an array of characters that make up the suffix of length 'len' for
* the currently matched token. This is used to build up the matched string
* for use in actions in the case of MORE. A simple and inefficient
* implementation of this is as follows :
*
* {
* String t = GetImage();
* return t.substring(t.length() - len, t.length()).toCharArray();
* }
*/
char[] GetSuffix(int len);

/**
* The lexer calls this function to indicate that it is done with the stream
* and hence implementations can free any resources held by this class.
* Again, the body of this function can be just empty and it will not
* affect the lexer's operation.
*/
void Done();

}
/* JavaCC - OriginalChecksum=d5d02d7f2852c9b712f39bed41ca22b5 (do not edit this line) */
Expand Up @@ -29,11 +29,10 @@ public class SyntaxTreeBuilder/*@bgen(jjtree)*/implements SyntaxTreeBuilderTreeC
public static ASTQueryContainer parseQuery(String query)
throws TokenMgrError, ParseException
{
SyntaxTreeBuilder stb = new SyntaxTreeBuilder( new StringReader(query) );
SyntaxTreeBuilder stb = new SyntaxTreeBuilder( new UnicodeEscapeStream(new StringReader(query), 1) );

// Set size of tab to 1 to force tokenmanager to report correct column
// index for substring splitting of service graph pattern.
stb.jj_input_stream.setTabSize(1);

ASTQueryContainer container = stb.QueryContainer();
container.setSourceString(query);
Expand All @@ -51,11 +50,10 @@ public static ASTQueryContainer parseQuery(String query)
public static ASTUpdateSequence parseUpdateSequence(String sequence)
throws TokenMgrError, ParseException
{
SyntaxTreeBuilder stb = new SyntaxTreeBuilder( new StringReader(sequence) );
SyntaxTreeBuilder stb = new SyntaxTreeBuilder( new UnicodeEscapeStream(new StringReader(sequence), 1) );

// Set size of tab to 1 to force tokenmanager to report correct column
// index for substring splitting of service graph pattern.
stb.jj_input_stream.setTabSize(1);

ASTUpdateSequence seq = stb.UpdateSequence();
seq.setSourceString(sequence);
Expand Down Expand Up @@ -8159,6 +8157,11 @@ private boolean jj_2_7(int xla) {
finally { jj_save(6, xla); }
}

private boolean jj_3R_67() {
if (jj_scan_token(LBRACK)) return true;
return false;
}

private boolean jj_3R_61() {
Token xsp;
xsp = jj_scanpos;
Expand Down Expand Up @@ -8648,14 +8651,8 @@ private boolean jj_3R_84() {
return false;
}

private boolean jj_3R_67() {
if (jj_scan_token(LBRACK)) return true;
return false;
}

/** Generated Token Manager. */
public SyntaxTreeBuilderTokenManager token_source;
JavaCharStream jj_input_stream;
/** Current token. */
public Token token;
/** Next token. */
Expand Down Expand Up @@ -8701,41 +8698,9 @@ private static void jj_la1_init_5() {
private boolean jj_rescan = false;
private int jj_gc = 0;

/** Constructor with InputStream. */
public SyntaxTreeBuilder(java.io.InputStream stream) {
this(stream, null);
}
/** Constructor with InputStream and supplied encoding */
public SyntaxTreeBuilder(java.io.InputStream stream, String encoding) {
try { jj_input_stream = new JavaCharStream(stream, encoding, 1, 1); } catch(java.io.UnsupportedEncodingException e) { throw new RuntimeException(e); }
token_source = new SyntaxTreeBuilderTokenManager(jj_input_stream);
token = new Token();
jj_ntk = -1;
jj_gen = 0;
for (int i = 0; i < 174; i++) jj_la1[i] = -1;
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
}

/** Reinitialise. */
public void ReInit(java.io.InputStream stream) {
ReInit(stream, null);
}
/** Reinitialise. */
public void ReInit(java.io.InputStream stream, String encoding) {
try { jj_input_stream.ReInit(stream, encoding, 1, 1); } catch(java.io.UnsupportedEncodingException e) { throw new RuntimeException(e); }
token_source.ReInit(jj_input_stream);
token = new Token();
jj_ntk = -1;
jjtree.reset();
jj_gen = 0;
for (int i = 0; i < 174; i++) jj_la1[i] = -1;
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
}

/** Constructor. */
public SyntaxTreeBuilder(java.io.Reader stream) {
jj_input_stream = new JavaCharStream(stream, 1, 1);
token_source = new SyntaxTreeBuilderTokenManager(jj_input_stream);
/** Constructor with user supplied CharStream. */
public SyntaxTreeBuilder(CharStream stream) {
token_source = new SyntaxTreeBuilderTokenManager(stream);
token = new Token();
jj_ntk = -1;
jj_gen = 0;
Expand All @@ -8744,9 +8709,8 @@ public SyntaxTreeBuilder(java.io.Reader stream) {
}

/** Reinitialise. */
public void ReInit(java.io.Reader stream) {
jj_input_stream.ReInit(stream, 1, 1);
token_source.ReInit(jj_input_stream);
public void ReInit(CharStream stream) {
token_source.ReInit(stream);
token = new Token();
jj_ntk = -1;
jjtree.reset();
Expand Down Expand Up @@ -8865,18 +8829,21 @@ private void jj_add_error_token(int kind, int pos) {
for (int i = 0; i < jj_endpos; i++) {
jj_expentry[i] = jj_lasttokens[i];
}
jj_entries_loop: for (java.util.Iterator<?> it = jj_expentries.iterator(); it.hasNext();) {
boolean exists = false;
for (java.util.Iterator<?> it = jj_expentries.iterator(); it.hasNext();) {
exists = true;
int[] oldentry = (int[])(it.next());
if (oldentry.length == jj_expentry.length) {
for (int i = 0; i < jj_expentry.length; i++) {
if (oldentry[i] != jj_expentry[i]) {
continue jj_entries_loop;
exists = false;
break;
}
}
jj_expentries.add(jj_expentry);
break jj_entries_loop;
if (exists) break;
}
}
if (!exists) jj_expentries.add(jj_expentry);
if (pos != 0) jj_lasttokens[(jj_endpos = pos) - 1] = kind;
}
}
Expand Down
Expand Up @@ -3315,25 +3315,23 @@ private static final boolean jjCanMove_2(int hiByte, int i1, int i2, long l1, lo
static final long[] jjtoSpecial = {
0x8L, 0x0L, 0x0L,
};
protected JavaCharStream input_stream;
protected CharStream input_stream;
private final int[] jjrounds = new int[157];
private final int[] jjstateSet = new int[314];
protected char curChar;
/** Constructor. */
public SyntaxTreeBuilderTokenManager(JavaCharStream stream){
if (JavaCharStream.staticFlag)
throw new Error("ERROR: Cannot use a static CharStream class with a non-static lexical analyzer.");
public SyntaxTreeBuilderTokenManager(CharStream stream){
input_stream = stream;
}

/** Constructor. */
public SyntaxTreeBuilderTokenManager(JavaCharStream stream, int lexState){
public SyntaxTreeBuilderTokenManager(CharStream stream, int lexState){
this(stream);
SwitchTo(lexState);
}

/** Reinitialise parser. */
public void ReInit(JavaCharStream stream)
public void ReInit(CharStream stream)
{
jjmatchedPos = jjnewStateCnt = 0;
curLexState = defaultLexState;
Expand All @@ -3349,7 +3347,7 @@ private void ReInitRounds()
}

/** Reinitialise parser. */
public void ReInit(JavaCharStream stream, int lexState)
public void ReInit(CharStream stream, int lexState)
{
ReInit(stream);
SwitchTo(lexState);
Expand Down

0 comments on commit 893c4f2

Please sign in to comment.