Skip to content

Commit

Permalink
PHOENIX-1287 Use the joni byte[] regex engine in place of j.u.regex (…
Browse files Browse the repository at this point in the history
…Shuxiong Ye)
  • Loading branch information
jtaylor-sfdc committed Apr 14, 2015
1 parent 7ef1718 commit 3f6b259
Show file tree
Hide file tree
Showing 43 changed files with 1,952 additions and 161 deletions.
2 changes: 2 additions & 0 deletions phoenix-assembly/src/build/components-major-client.xml
Expand Up @@ -49,6 +49,8 @@
<include>org.codehaus.jackson:jackson-core-asl</include> <include>org.codehaus.jackson:jackson-core-asl</include>
<include>commons-collections:commons-collections</include> <include>commons-collections:commons-collections</include>
<include>joda-time:joda-time</include> <include>joda-time:joda-time</include>
<include>org.jruby.joni:joni</include>
<include>org.jruby.jcodings:jcodings</include>
</includes> </includes>
</dependencySet> </dependencySet>
</dependencySets> </dependencySets>
Expand Down
5 changes: 5 additions & 0 deletions phoenix-core/pom.xml
Expand Up @@ -417,5 +417,10 @@
<groupId>org.apache.hadoop</groupId> <groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-minicluster</artifactId> <artifactId>hadoop-minicluster</artifactId>
</dependency> </dependency>
<dependency>
<groupId>org.jruby.joni</groupId>
<artifactId>joni</artifactId>
<version>${joni.version}</version>
</dependency>
</dependencies> </dependencies>
</project> </project>
@@ -0,0 +1,88 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.phoenix.end2end;

import static org.apache.phoenix.util.TestUtil.closeStmtAndConn;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;

import org.junit.Before;
import org.junit.Test;

public class LikeExpressionIT extends BaseHBaseManagedTimeIT {
@Before
public void doBeforeTestSetup() throws Exception {
Connection conn = null;
PreparedStatement stmt = null;
try {
conn = DriverManager.getConnection(getUrl());
String ddl;
ddl = "CREATE TABLE testTable (k VARCHAR NOT NULL PRIMARY KEY, i INTEGER)";
conn.createStatement().execute(ddl);
conn.commit();
} finally {
closeStmtAndConn(stmt, conn);
}
insertRow(conn, "123n7-app-2-", 1);
insertRow(conn, "132n7-App-2-", 2);
insertRow(conn, "213n7-app-2-", 4);
insertRow(conn, "231n7-App-2-", 8);
insertRow(conn, "312n7-app-2-", 16);
insertRow(conn, "321n7-App-2-", 32);
}

private void insertRow(Connection conn, String k, int i) throws SQLException {
PreparedStatement stmt = conn.prepareStatement("UPSERT INTO testTable VALUES (?, ?)");
stmt.setString(1, k);
stmt.setInt(2, i);
stmt.executeUpdate();
conn.commit();
}

private void testLikeExpression(Connection conn, String likeStr, int numResult, int expectedSum)
throws Exception {
String cmd = "select k, i from testTable where k like '" + likeStr + "'";
Statement stmt = conn.createStatement();
ResultSet rs = stmt.executeQuery(cmd);
int sum = 0;
for (int i = 0; i < numResult; ++i) {
assertTrue(rs.next());
sum += rs.getInt("i");
}
assertFalse(rs.next());
assertEquals(sum, expectedSum);
}

@Test
public void testLikeExpression() throws Exception {
Connection conn = DriverManager.getConnection(getUrl());
// wildcard
testLikeExpression(conn, "%1%3%7%2%", 3, 7);
// CaseSensitive
testLikeExpression(conn, "%A%", 3, 42);
conn.close();
}
}
@@ -0,0 +1,100 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.phoenix.end2end;

import static org.apache.phoenix.util.TestUtil.GROUPBYTEST_NAME;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;

import org.junit.Before;
import org.junit.Test;


public class RegexpReplaceFunctionIT extends BaseHBaseManagedTimeIT {

private int id;

@Before
public void doBeforeTestSetup() throws Exception {
ensureTableCreated(getUrl(), GROUPBYTEST_NAME);
Connection conn = DriverManager.getConnection(getUrl());
insertRow(conn, "Report11", 10);
insertRow(conn, "Report11", 10);
insertRow(conn, "Report22", 30);
insertRow(conn, "Report33", 30);
conn.commit();
conn.close();
}

private void insertRow(Connection conn, String uri, int appcpu) throws SQLException {
PreparedStatement statement = conn.prepareStatement("UPSERT INTO " + GROUPBYTEST_NAME + "(id, uri, appcpu) values (?,?,?)");
statement.setString(1, "id" + id);
statement.setString(2, uri);
statement.setInt(3, appcpu);
statement.executeUpdate();
id++;
}

@Test
public void testGroupByScanWithRegexpReplace() throws Exception {
Connection conn = DriverManager.getConnection(getUrl());
Statement stmt = conn.createStatement();
ResultSet rs = stmt.executeQuery("select REGEXP_REPLACE(uri, '[1-3]+', '*') suburi, sum(appcpu) sumcpu from " + GROUPBYTEST_NAME + " group by suburi");
assertTrue(rs.next());
assertEquals(rs.getString("suburi"), "Report*");
assertEquals(rs.getInt("sumcpu"), 80);
assertFalse(rs.next());

stmt = conn.createStatement();
rs = stmt.executeQuery("select REGEXP_REPLACE(uri, '[1-3]+') suburi, sum(appcpu) sumcpu from " + GROUPBYTEST_NAME + " group by suburi");
assertTrue(rs.next());
assertEquals(rs.getString("suburi"), "Report");
assertEquals(rs.getInt("sumcpu"), 80);
assertFalse(rs.next());

conn.close();
}

@Test
public void testFilterWithRegexReplace() throws Exception {
Connection conn = DriverManager.getConnection(getUrl());
ResultSet rs = conn.createStatement().executeQuery("select id from " + GROUPBYTEST_NAME + " where REGEXP_REPLACE(uri, '[2-3]+', '*') = 'Report*'");
assertTrue(rs.next());
assertEquals("id2", rs.getString(1));
assertTrue(rs.next());
assertEquals("id3", rs.getString(1));
assertFalse(rs.next());

rs = conn.createStatement().executeQuery("select id from " + GROUPBYTEST_NAME + " where REGEXP_REPLACE(uri, '[2-3]+') = 'Report'");
assertTrue(rs.next());
assertEquals("id2", rs.getString(1));
assertTrue(rs.next());
assertEquals("id3", rs.getString(1));
assertFalse(rs.next());
conn.close();
}

}
Expand Up @@ -58,35 +58,54 @@ private void insertRow(Connection conn, String uri, int appcpu) throws SQLExcept
id++; id++;
} }


@Test private void testGroupByScanWithRegexpSubstr(Connection conn, Integer offset, String exceptedSubstr) throws Exception {
public void testGroupByScanWithRegexpSubstr() throws Exception { String cmd = "select REGEXP_SUBSTR(uri, '[^\\\\?]+'" + ((offset == null) ? "" : ", " + offset.intValue()) +") suburi, sum(appcpu) sumcpu from " + GROUPBYTEST_NAME + " group by suburi";
Connection conn = DriverManager.getConnection(getUrl());
Statement stmt = conn.createStatement(); Statement stmt = conn.createStatement();
ResultSet rs = stmt.executeQuery("select REGEXP_SUBSTR(uri, '[^\\\\?]+') suburi, sum(appcpu) sumcpu from " + GROUPBYTEST_NAME ResultSet rs = stmt.executeQuery(cmd);
+ " group by suburi");
assertTrue(rs.next()); assertTrue(rs.next());
assertEquals(rs.getString("suburi"), "Report1"); assertEquals(rs.getString("suburi"), exceptedSubstr + "1");
assertEquals(rs.getInt("sumcpu"), 20); assertEquals(rs.getInt("sumcpu"), 20);
assertTrue(rs.next()); assertTrue(rs.next());
assertEquals(rs.getString("suburi"), "Report2"); assertEquals(rs.getString("suburi"), exceptedSubstr + "2");
assertEquals(rs.getInt("sumcpu"), 30); assertEquals(rs.getInt("sumcpu"), 30);
assertTrue(rs.next()); assertTrue(rs.next());
assertEquals(rs.getString("suburi"), "Report3"); assertEquals(rs.getString("suburi"), exceptedSubstr + "3");
assertEquals(rs.getInt("sumcpu"), 30); assertEquals(rs.getInt("sumcpu"), 30);
assertFalse(rs.next()); assertFalse(rs.next());
conn.close();
} }


@Test @Test
public void testFilterWithRegexSubstr() throws Exception { public void testGroupByScanWithRegexpSubstr() throws Exception {
Connection conn = DriverManager.getConnection(getUrl()); Connection conn = DriverManager.getConnection(getUrl());
ResultSet rs = conn.createStatement().executeQuery( // Default offset
"select id from " + GROUPBYTEST_NAME + " where REGEXP_SUBSTR(uri, '[^\\\\?]+') = 'Report1'"); testGroupByScanWithRegexpSubstr(conn, null, "Report");
// Positive offset
testGroupByScanWithRegexpSubstr(conn, Integer.valueOf(2), "eport");
// Negative offset
testGroupByScanWithRegexpSubstr(conn, Integer.valueOf(-5), "rt");
conn.close();
}

private void testFilterWithRegexSubstr(Connection conn, Integer offset, String exceptedSubstr) throws Exception {
String cmd = "select id from " + GROUPBYTEST_NAME + " where REGEXP_SUBSTR(uri, '[^\\\\?]+'"+ ((offset == null) ? "" : ", " + offset.intValue()) +") = '" + exceptedSubstr + "1'";
ResultSet rs = conn.createStatement().executeQuery(cmd);
assertTrue(rs.next()); assertTrue(rs.next());
assertEquals("id0", rs.getString(1)); assertEquals("id0", rs.getString(1));
assertTrue(rs.next()); assertTrue(rs.next());
assertEquals("id1", rs.getString(1)); assertEquals("id1", rs.getString(1));
assertFalse(rs.next()); assertFalse(rs.next());
} }


@Test
public void testFilterWithRegexSubstr() throws Exception {
Connection conn = DriverManager.getConnection(getUrl());
// Default offset
testFilterWithRegexSubstr(conn, null, "Report");
// Positive offset
testFilterWithRegexSubstr(conn, Integer.valueOf(2), "eport");
// Negative offset
testFilterWithRegexSubstr(conn, Integer.valueOf(-5), "rt");
conn.close();
}

} }
Expand Up @@ -32,6 +32,7 @@
import org.apache.phoenix.exception.SQLExceptionInfo; import org.apache.phoenix.exception.SQLExceptionInfo;
import org.apache.phoenix.expression.AndExpression; import org.apache.phoenix.expression.AndExpression;
import org.apache.phoenix.expression.ArrayConstructorExpression; import org.apache.phoenix.expression.ArrayConstructorExpression;
import org.apache.phoenix.expression.ByteBasedLikeExpression;
import org.apache.phoenix.expression.CaseExpression; import org.apache.phoenix.expression.CaseExpression;
import org.apache.phoenix.expression.CoerceExpression; import org.apache.phoenix.expression.CoerceExpression;
import org.apache.phoenix.expression.ComparisonExpression; import org.apache.phoenix.expression.ComparisonExpression;
Expand Down Expand Up @@ -60,6 +61,7 @@
import org.apache.phoenix.expression.OrExpression; import org.apache.phoenix.expression.OrExpression;
import org.apache.phoenix.expression.RowKeyColumnExpression; import org.apache.phoenix.expression.RowKeyColumnExpression;
import org.apache.phoenix.expression.RowValueConstructorExpression; import org.apache.phoenix.expression.RowValueConstructorExpression;
import org.apache.phoenix.expression.StringBasedLikeExpression;
import org.apache.phoenix.expression.StringConcatExpression; import org.apache.phoenix.expression.StringConcatExpression;
import org.apache.phoenix.expression.TimestampAddExpression; import org.apache.phoenix.expression.TimestampAddExpression;
import org.apache.phoenix.expression.TimestampSubtractExpression; import org.apache.phoenix.expression.TimestampSubtractExpression;
Expand Down Expand Up @@ -100,6 +102,8 @@
import org.apache.phoenix.parse.SubqueryParseNode; import org.apache.phoenix.parse.SubqueryParseNode;
import org.apache.phoenix.parse.SubtractParseNode; import org.apache.phoenix.parse.SubtractParseNode;
import org.apache.phoenix.parse.UnsupportedAllParseNodeVisitor; import org.apache.phoenix.parse.UnsupportedAllParseNodeVisitor;
import org.apache.phoenix.query.QueryServices;
import org.apache.phoenix.query.QueryServicesOptions;
import org.apache.phoenix.schema.ColumnFamilyNotFoundException; import org.apache.phoenix.schema.ColumnFamilyNotFoundException;
import org.apache.phoenix.schema.ColumnNotFoundException; import org.apache.phoenix.schema.ColumnNotFoundException;
import org.apache.phoenix.schema.ColumnRef; import org.apache.phoenix.schema.ColumnRef;
Expand Down Expand Up @@ -497,7 +501,16 @@ public Expression visitLeave(LikeParseNode node, List<Expression> children) thro
} }
} }
} }
Expression expression = LikeExpression.create(children, node.getLikeType()); QueryServices services = context.getConnection().getQueryServices();
boolean useByteBasedRegex =
services.getProps().getBoolean(QueryServices.USE_BYTE_BASED_REGEX_ATTRIB,
QueryServicesOptions.DEFAULT_USE_BYTE_BASED_REGEX);
Expression expression;
if (useByteBasedRegex) {
expression = ByteBasedLikeExpression.create(children, node.getLikeType());
} else {
expression = StringBasedLikeExpression.create(children, node.getLikeType());
}
if (ExpressionUtil.isConstant(expression)) { if (ExpressionUtil.isConstant(expression)) {
ImmutableBytesWritable ptr = context.getTempPtr(); ImmutableBytesWritable ptr = context.getTempPtr();
if (!expression.evaluate(null, ptr)) { if (!expression.evaluate(null, ptr)) {
Expand Down
@@ -0,0 +1,48 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.phoenix.expression;

import java.util.List;

import org.apache.phoenix.expression.util.regex.AbstractBasePattern;
import org.apache.phoenix.expression.util.regex.JONIPattern;
import org.apache.phoenix.parse.LikeParseNode.LikeType;

public class ByteBasedLikeExpression extends LikeExpression {

public ByteBasedLikeExpression() {
}

public ByteBasedLikeExpression(List<Expression> children) {
super(children);
}

@Override
protected AbstractBasePattern compilePatternSpec(String value) {
return new JONIPattern(value);
}

public static LikeExpression create(List<Expression> children, LikeType likeType) {
return new ByteBasedLikeExpression(addLikeTypeChild(children, likeType));
}

@Override
public LikeExpression clone(List<Expression> children) {
return new ByteBasedLikeExpression(children);
}
}

0 comments on commit 3f6b259

Please sign in to comment.