-
Notifications
You must be signed in to change notification settings - Fork 1k
PHOENIX-4594: Perform binary search on guideposts during query compilation #347
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -823,12 +823,189 @@ private void testSelectQueriesWithFilters(boolean useStatsForParallelization) th | |
| assertEquals(100 + i, rs.getInt(1)); | ||
| i++; | ||
| } | ||
| assertEquals(numRows, i); | ||
| info = getByteRowEstimates(conn, sql, binds); | ||
| // Depending on the guidepost boundary, this estimate | ||
| // can be slightly off. It's called estimate for a reason. | ||
| assertEquals((Long) 3L, info.getEstimatedRows()); | ||
| assertEquals((Long) 160L, info.getEstimatedBytes()); | ||
| assertTrue(info.getEstimateInfoTs() > 0); | ||
| // Query whose start key is between and end key is after data. | ||
| sql = "SELECT a FROM " + tableName + " WHERE K <= 120 AND K >= 100"; | ||
| rs = conn.createStatement().executeQuery(sql); | ||
| i = 0; | ||
| numRows = 10; | ||
| while (rs.next()) { | ||
| assertEquals(100 + i, rs.getInt(1)); | ||
| i++; | ||
| } | ||
| assertEquals(numRows, i); | ||
| info = getByteRowEstimates(conn, sql, binds); | ||
| // Depending on the guidepost boundary, this estimate | ||
| // can be slightly off. It's called estimate for a reason. | ||
| assertEquals((Long) 10L, info.getEstimatedRows()); | ||
| assertEquals((Long) 720L, info.getEstimatedBytes()); | ||
| assertTrue(info.getEstimateInfoTs() > 0); | ||
| // Query whose start key and end key are both between data. | ||
| sql = "SELECT a FROM " + tableName + " WHERE K <= 109 AND K >= 100"; | ||
| rs = conn.createStatement().executeQuery(sql); | ||
| i = 0; | ||
| numRows = 10; | ||
| while (rs.next()) { | ||
| assertEquals(100 + i, rs.getInt(1)); | ||
| i++; | ||
| } | ||
| assertEquals(numRows, i); | ||
| info = getByteRowEstimates(conn, sql, binds); | ||
| // Depending on the guidepost boundary, this estimate | ||
| // can be slightly off. It's called estimate for a reason. | ||
| assertEquals((Long) 10L, info.getEstimatedRows()); | ||
| assertEquals((Long) 720L, info.getEstimatedBytes()); | ||
| assertTrue(info.getEstimateInfoTs() > 0); | ||
| } | ||
| } | ||
|
|
||
| @Test | ||
| public void testSelectQueriesWithGuidePostMovingWindows() throws Exception { | ||
| testSelectQueriesWithGuidePostMovingWindows(0); | ||
| testSelectQueriesWithGuidePostMovingWindows(1); | ||
| testSelectQueriesWithGuidePostMovingWindows(2); | ||
| testSelectQueriesWithGuidePostMovingWindows(10); | ||
| testSelectQueriesWithGuidePostMovingWindows(256); | ||
| } | ||
|
|
||
| private void testSelectQueriesWithGuidePostMovingWindows(int movingWindowSize) throws Exception { | ||
| String tableName = generateUniqueName(); | ||
| try (Connection conn = DriverManager.getConnection(getUrl())) { | ||
| int guidePostWidth = 20; | ||
| String ddl = | ||
| "CREATE TABLE " + tableName + " (k INTEGER PRIMARY KEY, a bigint, b bigint)" | ||
| + " GUIDE_POSTS_WIDTH=" + guidePostWidth | ||
| + ", USE_STATS_FOR_PARALLELIZATION=true" + " SPLIT ON (102, 105, 108)"; | ||
| conn.createStatement().execute(ddl); | ||
| conn.createStatement().execute("upsert into " + tableName + " values (100,100,3)"); | ||
| conn.createStatement().execute("upsert into " + tableName + " values (101,101,4)"); | ||
| conn.createStatement().execute("upsert into " + tableName + " values (102,102,4)"); | ||
| conn.createStatement().execute("upsert into " + tableName + " values (103,103,4)"); | ||
| conn.createStatement().execute("upsert into " + tableName + " values (104,104,4)"); | ||
| conn.createStatement().execute("upsert into " + tableName + " values (105,105,4)"); | ||
| conn.createStatement().execute("upsert into " + tableName + " values (106,106,4)"); | ||
| conn.createStatement().execute("upsert into " + tableName + " values (107,107,4)"); | ||
| conn.createStatement().execute("upsert into " + tableName + " values (108,108,4)"); | ||
| conn.createStatement().execute("upsert into " + tableName + " values (109,109,4)"); | ||
| conn.commit(); | ||
| conn.createStatement().execute("UPDATE STATISTICS " + tableName + ""); | ||
| } | ||
| List<Object> binds = Lists.newArrayList(); | ||
| try (Connection conn = DriverManager.getConnection(getUrl())) { | ||
| int defaultGuidePostsMovingWindowSize = | ||
| PhoenixConfigurationUtil.getGuidePostsMovingWindowSize(conn.unwrap(PhoenixConnection.class)); | ||
| PhoenixConfigurationUtil.setGuidePostsMovingWindowSize( | ||
| conn.unwrap(PhoenixConnection.class), movingWindowSize); | ||
|
|
||
| // Query whose start key is before any data | ||
| String sql = "SELECT a FROM " + tableName + " WHERE K >= 99"; | ||
| ResultSet rs = conn.createStatement().executeQuery(sql); | ||
| int i = 0; | ||
| int numRows = 10; | ||
| while (rs.next()) { | ||
| assertEquals(100 + i, rs.getInt(1)); | ||
| i++; | ||
| } | ||
| assertEquals(numRows, i); | ||
| Estimate info = getByteRowEstimates(conn, sql, binds); | ||
| assertEquals((Long) 10L, info.getEstimatedRows()); | ||
| assertEquals((Long) 720L, info.getEstimatedBytes()); | ||
| assertTrue(info.getEstimateInfoTs() > 0); | ||
|
|
||
| // Query whose start key is after any data | ||
| sql = "SELECT a FROM " + tableName + " WHERE K >= 110"; | ||
| rs = conn.createStatement().executeQuery(sql); | ||
| assertFalse(rs.next()); | ||
| info = getByteRowEstimates(conn, sql, binds); | ||
| assertEquals((Long) 0L, info.getEstimatedRows()); | ||
| assertEquals((Long) 0L, info.getEstimatedBytes()); | ||
| assertTrue(info.getEstimateInfoTs() > 0); | ||
|
|
||
| // Point lookup query. | ||
| sql = "SELECT a FROM " + tableName + " WHERE K = 101"; | ||
| rs = conn.createStatement().executeQuery(sql); | ||
| i = 0; | ||
| numRows = 1; | ||
| while (rs.next()) { | ||
| assertEquals(101, rs.getInt(1)); | ||
| i++; | ||
| } | ||
| assertEquals(numRows, i); | ||
| info = getByteRowEstimates(conn, sql, binds); | ||
| assertEquals((Long) 1L, info.getEstimatedRows()); | ||
| assertEquals((Long) 97L, info.getEstimatedBytes()); | ||
| assertTrue(info.getEstimateInfoTs() == 0); | ||
|
|
||
| // Query whose end key is before any data | ||
| sql = "SELECT a FROM " + tableName + " WHERE K <= 98"; | ||
| rs = conn.createStatement().executeQuery(sql); | ||
| assertFalse(rs.next()); | ||
| info = getByteRowEstimates(conn, sql, binds); | ||
| assertEquals((Long) 0L, info.getEstimatedRows()); | ||
| assertEquals((Long) 0L, info.getEstimatedBytes()); | ||
| assertTrue(info.getEstimateInfoTs() > 0); | ||
|
|
||
| // Query whose end key is after any data. | ||
| // In this case, we return the estimate as scanning all the guide posts. | ||
| sql = "SELECT a FROM " + tableName + " WHERE K <= 110"; | ||
| rs = conn.createStatement().executeQuery(sql); | ||
| i = 0; | ||
| numRows = 10; | ||
| while (rs.next()) { | ||
| assertEquals(100 + i, rs.getInt(1)); | ||
| i++; | ||
| } | ||
| assertEquals(numRows, i); | ||
| info = getByteRowEstimates(conn, sql, binds); | ||
| assertEquals((Long) 10L, info.getEstimatedRows()); | ||
| assertEquals((Long) 720L, info.getEstimatedBytes()); | ||
| assertTrue(info.getEstimateInfoTs() > 0); | ||
|
|
||
| // Query whose start key and end key is before any data. | ||
| // In this case, we return the estimate as scanning the first guide post | ||
| sql = "SELECT a FROM " + tableName + " WHERE K <= 90 AND K >= 80"; | ||
| rs = conn.createStatement().executeQuery(sql); | ||
| assertFalse(rs.next()); | ||
| info = getByteRowEstimates(conn, sql, binds); | ||
| assertEquals((Long) 0L, info.getEstimatedRows()); | ||
| assertEquals((Long) 0L, info.getEstimatedBytes()); | ||
| assertTrue(info.getEstimateInfoTs() > 0); | ||
|
|
||
| // Query whose start key and end key is after any data. | ||
| // In this case, we return the estimate as scanning no guide post | ||
| sql = "SELECT a FROM " + tableName + " WHERE K <= 130 AND K >= 120"; | ||
| rs = conn.createStatement().executeQuery(sql); | ||
| assertFalse(rs.next()); | ||
| info = getByteRowEstimates(conn, sql, binds); | ||
| assertEquals((Long) 0L, info.getEstimatedRows()); | ||
| assertEquals((Long) 0L, info.getEstimatedBytes()); | ||
| assertTrue(info.getEstimateInfoTs() > 0); | ||
|
|
||
| // Query whose start key is before and end key is between data. | ||
| // In this case, we return the estimate as scanning the guide posts | ||
| // between the first key and end ky | ||
| sql = "SELECT a FROM " + tableName + " WHERE K <= 102 AND K >= 90"; | ||
| rs = conn.createStatement().executeQuery(sql); | ||
| i = 0; | ||
| numRows = 3; | ||
| while (rs.next()) { | ||
| assertEquals(100 + i, rs.getInt(1)); | ||
| i++; | ||
| } | ||
| assertEquals(numRows, i); | ||
| info = getByteRowEstimates(conn, sql, binds); | ||
| // Depending on the guidepost boundary, this estimate | ||
| // can be slightly off. It's called estimate for a reason. | ||
| assertEquals((Long) 3L, info.getEstimatedRows()); | ||
| assertEquals((Long) 160L, info.getEstimatedBytes()); | ||
| assertTrue(info.getEstimateInfoTs() > 0); | ||
|
|
||
| // Query whose start key is between and end key is after data. | ||
| sql = "SELECT a FROM " + tableName + " WHERE K <= 120 AND K >= 100"; | ||
| rs = conn.createStatement().executeQuery(sql); | ||
|
|
@@ -838,12 +1015,14 @@ private void testSelectQueriesWithFilters(boolean useStatsForParallelization) th | |
| assertEquals(100 + i, rs.getInt(1)); | ||
| i++; | ||
| } | ||
| assertEquals(numRows, i); | ||
| info = getByteRowEstimates(conn, sql, binds); | ||
| // Depending on the guidepost boundary, this estimate | ||
| // can be slightly off. It's called estimate for a reason. | ||
| assertEquals((Long) 10L, info.getEstimatedRows()); | ||
| assertEquals((Long) 720L, info.getEstimatedBytes()); | ||
| assertTrue(info.getEstimateInfoTs() > 0); | ||
|
|
||
| // Query whose start key and end key are both between data. | ||
| sql = "SELECT a FROM " + tableName + " WHERE K <= 109 AND K >= 100"; | ||
| rs = conn.createStatement().executeQuery(sql); | ||
|
|
@@ -853,12 +1032,52 @@ private void testSelectQueriesWithFilters(boolean useStatsForParallelization) th | |
| assertEquals(100 + i, rs.getInt(1)); | ||
| i++; | ||
| } | ||
| assertEquals(numRows, i); | ||
| info = getByteRowEstimates(conn, sql, binds); | ||
| // Depending on the guidepost boundary, this estimate | ||
| // can be slightly off. It's called estimate for a reason. | ||
| assertEquals((Long) 10L, info.getEstimatedRows()); | ||
| assertEquals((Long) 720L, info.getEstimatedBytes()); | ||
| assertTrue(info.getEstimateInfoTs() > 0); | ||
|
|
||
| // Query with multiple scan ranges, and each range's start key and end key are both between data | ||
| sql = "SELECT a FROM " + tableName + " WHERE K <= 103 AND K >= 101 OR K <= 108 AND K >= 106"; | ||
| rs = conn.createStatement().executeQuery(sql); | ||
| i = 0; | ||
| numRows = 6; | ||
| int[] result = new int[] { 101, 102, 103, 106, 107, 108 }; | ||
| while (rs.next()) { | ||
| assertEquals(result[i++], rs.getInt(1)); | ||
| } | ||
| assertEquals(numRows, i); | ||
| info = getByteRowEstimates(conn, sql, binds); | ||
| // Depending on the guidepost boundary, this estimate | ||
| // can be slightly off. It's called estimate for a reason. | ||
| assertEquals((Long) 6L, info.getEstimatedRows()); | ||
| assertEquals((Long) 460L, info.getEstimatedBytes()); | ||
| // TODO: the original code before this change will hit the following assertion. Need to investigate it. | ||
| // assertTrue(info.getEstimateInfoTs() > 0); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why does this assert fail now?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As the "TODO" in the comment said, this is a new test case I wanted to add in this change, but it failed. So I reverted all the changes except this new test case, but it still failed, which means there is problem in the current code base without any of my change. I commented out the assertion here, but I opened another JIRA PHOENIX-4914 to track the original problem in the code base. |
||
|
|
||
| // Query with multiple scan ranges which start with the min key and end with the max key | ||
| sql = "SELECT a FROM " + tableName + " WHERE K <= 102 AND K >= 100 OR K <= 109 AND K >= 107"; | ||
| rs = conn.createStatement().executeQuery(sql); | ||
| i = 0; | ||
| numRows = 6; | ||
| result = new int[] { 100, 101, 102, 107, 108, 109 }; | ||
| while (rs.next()) { | ||
| assertEquals(result[i++], rs.getInt(1)); | ||
| } | ||
| assertEquals(numRows, i); | ||
| info = getByteRowEstimates(conn, sql, binds); | ||
| // Depending on the guidepost boundary, this estimate | ||
| // can be slightly off. It's called estimate for a reason. | ||
| assertEquals((Long) 6L, info.getEstimatedRows()); | ||
| assertEquals((Long) 390L, info.getEstimatedBytes()); | ||
| assertTrue(info.getEstimateInfoTs() > 0); | ||
|
|
||
| // Restore the default moving window size | ||
| PhoenixConfigurationUtil.setGuidePostsMovingWindowSize( | ||
| conn.unwrap(PhoenixConnection.class), defaultGuidePostsMovingWindowSize); | ||
| } | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
refactor the common init code in initDataAndStats and reuse it here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'll hold this change as discussed in PHOENIX-4594.