Skip to content

Commit

Permalink
fix: like queries fail if using a special regex
Browse files Browse the repository at this point in the history
  • Loading branch information
Michael Beer committed Oct 22, 2015
1 parent ed2a3d2 commit 6c24c22
Show file tree
Hide file tree
Showing 6 changed files with 28 additions and 42 deletions.
3 changes: 3 additions & 0 deletions CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ Changes for Crate
Unreleased
==========

- Fixed an issue that was causing like queries to fail if using a special regex
character

- Fixed the issue which causes overloading of the first configured node in
the cluster when multiple clients connections are established

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import com.vividsolutions.jts.geom.Envelope;
import com.vividsolutions.jts.geom.Geometry;
import io.crate.analyze.WhereClause;
import io.crate.lucene.LuceneQueryBuilder;
import io.crate.operation.Input;
import io.crate.operation.operator.*;
import io.crate.operation.operator.any.*;
Expand Down Expand Up @@ -84,19 +85,6 @@ static final class Fields {
static final XContentBuilderString BOOST = new XContentBuilderString("boost");
}

public static String convertWildcard(String wildcardString) {
// lucene uses * and ? as wildcard characters
// but via SQL they are used as % and _
// here they are converted back.
wildcardString = wildcardString.replaceAll("(?<!\\\\)\\*", "\\\\*");
wildcardString = wildcardString.replaceAll("(?<!\\\\)%", "*");
wildcardString = wildcardString.replaceAll("\\\\%", "%");

wildcardString = wildcardString.replaceAll("(?<!\\\\)\\?", "\\\\?");
wildcardString = wildcardString.replaceAll("(?<!\\\\)_", "?");
return wildcardString.replaceAll("\\\\_", "_");
}

public static String convertWildcardToRegex(String wildcardString) {
// lucene uses * and ? as wildcard characters
// but via SQL they are used as % and _
Expand Down Expand Up @@ -694,7 +682,7 @@ static class LikeConverter extends CmpConverter {
@Override
public boolean buildESQuery(String columnName, Object value, Context context) throws IOException {
String like = value.toString();
like = convertWildcard(like);
like = LuceneQueryBuilder.convertSqlLikeToLuceneWildcard(like);
context.builder.startObject("wildcard").field(columnName, like).endObject();
return true;
}
Expand Down
21 changes: 5 additions & 16 deletions sql/src/main/java/io/crate/lucene/LuceneQueryBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -184,20 +184,7 @@ public String unsupportedMessage(String field){
.build();
}

public static String convertWildcardToRegex(String wildcardString) {
// lucene uses * and ? as wildcard characters
// but via SQL they are used as % and _
// here they are converted back.
wildcardString = wildcardString.replaceAll("(?<!\\\\)\\*", "\\\\*");
wildcardString = wildcardString.replaceAll("(?<!\\\\)%", ".*");
wildcardString = wildcardString.replaceAll("\\\\%", "%");

wildcardString = wildcardString.replaceAll("(?<!\\\\)\\?", "\\\\?");
wildcardString = wildcardString.replaceAll("(?<!\\\\)_", ".");
return wildcardString.replaceAll("\\\\_", "_");
}

public static String convertWildcard(String wildcardString) {
public static String convertSqlLikeToLuceneWildcard(String wildcardString) {
// lucene uses * and ? as wildcard characters
// but via SQL they are used as % and _
// here they are converted back.
Expand Down Expand Up @@ -356,8 +343,10 @@ static class AnyNotLikeQuery extends AbstractAnyQuery {

@Override
protected Query applyArrayReference(Reference arrayReference, Literal literal, Context context) throws IOException {
String notLike = negateWildcard(
convertWildcardToRegex(BytesRefs.toString(literal.value())));
String regexString = LikeOperator.patternToRegex(BytesRefs.toString(literal.value()), LikeOperator.DEFAULT_ESCAPE, false);
regexString = regexString.substring(1, regexString.length() - 1);
String notLike = negateWildcard(regexString);

return new RegexpQuery(new Term(
arrayReference.info().ident().columnIdent().fqn(),
notLike),
Expand Down
11 changes: 3 additions & 8 deletions sql/src/main/java/io/crate/lucene/QueryBuilderHelper.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.NumericUtils;
import org.elasticsearch.common.lucene.BytesRefs;
import org.elasticsearch.common.lucene.search.*;
import org.elasticsearch.common.lucene.search.MatchNoDocsFilter;
import org.elasticsearch.common.lucene.search.Queries;
import org.elasticsearch.index.cache.filter.FilterCache;
import org.elasticsearch.index.mapper.ip.IpFieldMapper;

Expand Down Expand Up @@ -273,13 +274,7 @@ public Filter eqFilter(String columnName, Object value) {

@Override
public Query like(String columnName, Object value, @Nullable FilterCache filterCache) {

Filter filter = new RegexpFilter(
new Term(columnName, LuceneQueryBuilder.convertWildcardToRegex(BytesRefs.toString(value))));
if (filterCache != null) {
filter = filterCache.cache(filter);
}
return new XConstantScoreQuery(filter);
return new WildcardQuery(new Term(columnName, LuceneQueryBuilder.convertSqlLikeToLuceneWildcard(BytesRefs.toString(value))));
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,8 @@ public static String patternToRegex(String patternString, char escapeChar, boole
case ']':
case '(':
case ')':
case '|':
case '+':
regex.append('\\');
}

Expand Down
17 changes: 13 additions & 4 deletions sql/src/test/java/io/crate/lucene/LuceneQueryBuilderTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -308,8 +308,7 @@ public void testAnyOnArrayLiteral() throws Exception {
for (int i = 0; i < 2; i++) {
// like --> XConstantScoreQuery with regexp-filter
Query filteredQuery = likeBQuery.clauses().get(i).getQuery();
assertThat(filteredQuery, instanceOf(XConstantScoreQuery.class));
assertThat(((XConstantScoreQuery)filteredQuery).getFilter(), instanceOf(RegexpFilter.class));
assertThat(filteredQuery, instanceOf(WildcardQuery.class));
}

// col not like any (1,2,3)
Expand All @@ -322,8 +321,7 @@ public void testAnyOnArrayLiteral() throws Exception {
assertThat(((BooleanQuery)clause.getQuery()).clauses(), hasSize(3));
for (BooleanClause innerClause : ((BooleanQuery)clause.getQuery()).clauses()) {
assertThat(innerClause.getOccur(), is(BooleanClause.Occur.MUST));
assertThat(innerClause.getQuery(), instanceOf(XConstantScoreQuery.class));
assertThat(((XConstantScoreQuery)innerClause.getQuery()).getFilter(), instanceOf(RegexpFilter.class));
assertThat(innerClause.getQuery(), instanceOf(WildcardQuery.class));
}


Expand All @@ -334,6 +332,17 @@ public void testAnyOnArrayLiteral() throws Exception {
assertThat(ltBQuery.toString(), is("(d:{* TO a} d:{* TO b} d:{* TO c})~1"));
}

@Test
public void testSqlLikeToLuceneWildcard() throws Exception {
assertThat(LuceneQueryBuilder.convertSqlLikeToLuceneWildcard("%me"), is("*me"));
assertThat(LuceneQueryBuilder.convertSqlLikeToLuceneWildcard("\\%me"), is("%me"));
assertThat(LuceneQueryBuilder.convertSqlLikeToLuceneWildcard("*me"), is("\\*me"));

assertThat(LuceneQueryBuilder.convertSqlLikeToLuceneWildcard("_me"), is("?me"));
assertThat(LuceneQueryBuilder.convertSqlLikeToLuceneWildcard("\\_me"), is("_me"));
assertThat(LuceneQueryBuilder.convertSqlLikeToLuceneWildcard("?me"), is("\\?me"));
}

private Query convert(WhereClause clause) {
return builder.convert(clause, searchContext, indexCache).query;
}
Expand Down

0 comments on commit 6c24c22

Please sign in to comment.