Skip to content

Commit

Permalink
Use the antlr parser for parsing pg arrays.
Browse files Browse the repository at this point in the history
Such as `PgArray#decodeUTF8Text` contained at least
two known bugs related to parsing the `point` and
`varchar` Postgres types, and was hard to extend and reason
about, we decided to go with the Antlr parser to parse/decode
pg arrays.

This change fixes the above mentioned issues with insertion
of the `varchar` and `point` type arrays and introduces a new
approach to parse the pg arrays.
  • Loading branch information
kovrus authored and mergify[bot] committed Nov 28, 2019
1 parent 3091e26 commit 4154a6f
Show file tree
Hide file tree
Showing 16 changed files with 650 additions and 158 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -55,4 +55,4 @@ out/

# Misc
sandbox/crate/data/
sql-parser/src/main/java/io/crate/sql/parser/antlr/
**/antlr/v4
1 change: 1 addition & 0 deletions app/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ dependencies {
compileNotTransitive project(':es:es-x-content')
compileNotTransitive project(':sql')
compileNotTransitive project(':sql-parser')
compileNotTransitive project(':pgwire')
compileNotTransitive project(':dex')
compileNotTransitive project(':shared')
compileNotTransitive project(':blob')
Expand Down
9 changes: 9 additions & 0 deletions docs/appendices/release-notes/unreleased.rst
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,15 @@ Changes
Fixes
=====

- Fixed issues that would prevent usage or lead to incorrect behaviour
of the client that use PostgreSQL Wire Protocol when inserting arrays
of certain types:

- Insertion of the PostgreSQL ``point`` arrays was not possible.

- Insertion of the PostgreSQL ``varchar`` arrays that contain the ``,``
in its items would have led to incorrect results.

- Improved performance of snapshot finalization as https://github.com/crate/crate/pull/9327
introduced a performance regression on the snapshot process.

Expand Down
4 changes: 2 additions & 2 deletions gradle/checkstyle/suppressions.xml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,6 @@
"http://www.puppycrawl.com/dtds/suppressions_1_1.dtd">

<suppressions>
<suppress files=".*[/\\]parser[/\\]antlr.v4[/\\].*" checks="[a-zA-Z0-9]*"/>
<suppress files=".*[/\\]antlr.v4[/\\].*" checks="[a-zA-Z0-9]*"/>
<suppress files=".*[/\\]es[/\\]upstream[/\\].*" checks="[a-zA-Z0-9]*"/>
</suppressions>
</suppressions>
54 changes: 54 additions & 0 deletions pgwire/build.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
apply from: "$rootDir/gradle/javaModule.gradle"

archivesBaseName = 'crate-pgwire'

ext.antlr = [
source : "src/main/antlr",
output : "src/main/java/io/crate/protocols/postgres/antlr/v4",
package: 'io.crate.protocols.postgres.antlr.v4'
]

configurations {
antlr4
}

dependencies {
antlr4 "org.antlr:antlr4:${versions.antlr}"

implementation "org.antlr:antlr4-runtime:${versions.antlr}"
implementation "com.carrotsearch:hppc:${versions.carrotsearch_hppc}"

testImplementation "org.hamcrest:hamcrest:${versions.hamcrest}"
testImplementation "junit:junit:${versions.junit}"
testImplementation "com.fasterxml.jackson.core:jackson-databind:${versions.jacksondatabind}"
}

task antlrOutputDir {
doLast {
mkdir(antlr.output)
}
}

task generateGrammarSource(dependsOn: antlrOutputDir, type: JavaExec) {
inputs.files(fileTree(antlr.source))
outputs.dir file(antlr.output)

def grammars = fileTree(antlr.source).include('**/*.g4')

main = 'org.antlr.v4.Tool'
classpath = configurations.antlr4
args = [
"-o", "${antlr.output}",
"-visitor", "-no-listener",
"-package", antlr.package,
grammars.files
].flatten()
}

tasks.withType(JavaCompile) {
it.dependsOn('generateGrammarSource')
}

clean {
delete antlr.output
}
71 changes: 71 additions & 0 deletions pgwire/src/main/antlr/PgArray.g4
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
grammar PgArray;

/**
The grammar is used to parse PG array text
representations. E.g.:
numeric PG arrays:
{10, NULL, NULL, 20, 30}
{"10", NULL, NULL, "20", "30"}
multi-dimentsional PG arrays:
{{"10", "20"}, {"30", NULL, "40"}}
json PG arrays:
{"{\"x\": 10}", "{\"y\": 20}"}
{\"{\\\"x\\\": 10}\", \"{\\\"y\\\": 20}\"}
*/

array
: '{' item (',' item)* '}'
| '{' '}'
;

item
: STRING #value
| NUMBER #value
| bool #value
| array #noop
| NULL #null
;

bool
: 'true'
| 'false'
;

STRING
: '"' (ESC | ~["\\])* '"'
;
NUMBER
: '-'? DIGIT '.' DIGIT EXP?
| '-'? DIGIT EXP
| '-'? DIGIT
;
NULL: 'NULL';
fragment ESC
: '\\' (["\\/bfnrt] | UNICODE)
;
fragment UNICODE
: 'u' HEX HEX HEX HEX
;
fragment HEX
: [0-9a-fA-F]
;
fragment DIGIT
: '0' | [1-9] [0-9]*
;
fragment EXP
: [Ee] [+\-]? DIGIT
;
WS
: [ \t\n\r] + -> skip
;
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
/*
* Licensed to Crate under one or more contributor license agreements.
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership. Crate licenses this file
* to you under the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License. You may
* obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License.
*
* However, if you have executed another commercial license agreement
* with Crate these terms will supersede the license and you may use the
* software solely pursuant to the terms of the relevant commercial
* agreement.
*/

package io.crate.protocols.postgres.parser;

import com.carrotsearch.hppc.ByteArrayList;
import io.crate.protocols.postgres.antlr.v4.PgArrayBaseVisitor;
import io.crate.protocols.postgres.antlr.v4.PgArrayParser;

import java.util.ArrayList;
import java.util.function.Function;

import static java.nio.charset.StandardCharsets.UTF_8;

class PgArrayASTVisitor extends PgArrayBaseVisitor<Object> {

private final Function<byte[], Object> convert;

PgArrayASTVisitor(Function<byte[], Object> convert) {
this.convert = convert;
}

@Override
public Object visitArray(PgArrayParser.ArrayContext ctx) {
ArrayList<Object> array = new ArrayList<>();
for (var value : ctx.item()) {
array.add(visit(value));
}
return array;
}

@Override
public Object visitValue(PgArrayParser.ValueContext ctx) {
return convert.apply(processArrayItem(ctx.getText().getBytes(UTF_8)));
}

@Override
public Object visitNull(PgArrayParser.NullContext ctx) {
return null;
}

/**
* Processes an array's item by skipping escape characters
* and unquoting the item if it is needed.
*
* @param bytes {@code byte[]} that represent an array's item.
*/
private static byte[] processArrayItem(byte[] bytes) {
var itemBytes = new ByteArrayList();
int start = 0, end = bytes.length - 1;
if (bytes[start] == '"') {
start += 1;
end -= 1;
}

for (int i = start; i <= end; i++) {
if (i < end && (char) bytes[i] == '\\'
&& ((char) bytes[i + 1] == '\\' || (char) bytes[i + 1] == '\"')) {
i++;
}
itemBytes.add(bytes[i]);
}

return itemBytes.toArray();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
/*
* Licensed to Crate under one or more contributor license agreements.
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership. Crate licenses this file
* to you under the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License. You may
* obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License.
*
* However, if you have executed another commercial license agreement
* with Crate these terms will supersede the license and you may use the
* software solely pursuant to the terms of the relevant commercial
* agreement.
*/

package io.crate.protocols.postgres.parser;

import io.crate.protocols.postgres.antlr.v4.PgArrayLexer;
import org.antlr.v4.runtime.BaseErrorListener;
import org.antlr.v4.runtime.CharStreams;
import org.antlr.v4.runtime.CommonTokenStream;
import org.antlr.v4.runtime.ParserRuleContext;
import org.antlr.v4.runtime.RecognitionException;
import org.antlr.v4.runtime.Recognizer;
import org.antlr.v4.runtime.atn.PredictionMode;
import org.antlr.v4.runtime.misc.ParseCancellationException;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.function.Function;

public class PgArrayParser {

private static final BaseErrorListener ERROR_LISTENER = new BaseErrorListener() {
@Override
public void syntaxError(Recognizer<?, ?> recognizer,
Object offendingSymbol,
int line,
int charPositionInLine,
String message,
RecognitionException e) {
throw new PgArrayParsingException(message, e, line, charPositionInLine);
}
};

private static final PgArrayParser INSTANCE = new PgArrayParser();

public static Object parse(byte[] bytes, Function<byte[], Object> convert) {
return INSTANCE.invokeParser(
new ByteArrayInputStream(bytes),
io.crate.protocols.postgres.antlr.v4.PgArrayParser::array,
convert);
}

private Object invokeParser(InputStream inputStream,
Function<io.crate.protocols.postgres.antlr.v4.PgArrayParser, ParserRuleContext> parseFunction,
Function<byte[], Object> convert) {
try {
var lexer = new PgArrayLexer(CharStreams.fromStream(
inputStream,
StandardCharsets.UTF_8));
var tokenStream = new CommonTokenStream(lexer);
var parser = new io.crate.protocols.postgres.antlr.v4.PgArrayParser(tokenStream);

lexer.removeErrorListeners();
lexer.addErrorListener(ERROR_LISTENER);

parser.removeErrorListeners();
parser.addErrorListener(ERROR_LISTENER);

ParserRuleContext tree;
try {
// first, try parsing with potentially faster SLL mode
parser.getInterpreter().setPredictionMode(PredictionMode.SLL);
tree = parseFunction.apply(parser);
} catch (ParseCancellationException ex) {
// if we fail, parse with LL mode
tokenStream.seek(0); // rewind input stream
parser.reset();

parser.getInterpreter().setPredictionMode(PredictionMode.LL);
tree = parseFunction.apply(parser);
}

return new PgArrayASTVisitor(convert).visit(tree);
} catch (StackOverflowError e) {
throw new PgArrayParsingException("stack overflow while parsing: " + e.getLocalizedMessage());
} catch (IOException e) {
return new IllegalArgumentException(e);
}
}
}
Loading

0 comments on commit 4154a6f

Please sign in to comment.