From 9291fcf67898d986898ac27ce8f068ec153a11c7 Mon Sep 17 00:00:00 2001 From: Bin Shi Date: Tue, 18 Sep 2018 12:19:51 -0700 Subject: [PATCH] PHOENIX-4008: UPDATE STATISTIC should run raw scan with all versions of cells. --- .../schema/stats/StatsCollectorIT.java | 72 +++++++++++++------ .../apache/phoenix/schema/MetaDataClient.java | 1 + 2 files changed, 53 insertions(+), 20 deletions(-) diff --git a/phoenix-core/src/it/java/org/apache/phoenix/schema/stats/StatsCollectorIT.java b/phoenix-core/src/it/java/org/apache/phoenix/schema/stats/StatsCollectorIT.java index 3af0d090e76..41e39e365f0 100644 --- a/phoenix-core/src/it/java/org/apache/phoenix/schema/stats/StatsCollectorIT.java +++ b/phoenix-core/src/it/java/org/apache/phoenix/schema/stats/StatsCollectorIT.java @@ -46,7 +46,6 @@ import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.client.Table; -import org.apache.hadoop.hbase.client.TableDescriptorBuilder; import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment; import org.apache.hadoop.hbase.util.Bytes; import org.apache.phoenix.coprocessor.UngroupedAggregateRegionObserver; @@ -698,6 +697,25 @@ public void testRowCountWhenNumKVsExceedCompactionScannerThreshold() throws Exce assertEquals("Number of expected rows in stats table after major compaction didn't match", numRows, rs.getInt(1)); } } + + private void verifyGuidePostGenerated(ConnectionQueryServices queryServices, + String tableName, String[] familyNames, + long guidePostWidth, boolean emptyGuidePostExpected) throws Exception { + try (Table statsHTable = + queryServices.getTable( + SchemaUtil.getPhysicalName(PhoenixDatabaseMetaData.SYSTEM_STATS_NAME_BYTES, + queryServices.getProps()).getName())) { + for (String familyName : familyNames) { + GuidePostsInfo gps = + StatisticsUtil.readStatistics(statsHTable, + new GuidePostsKey(Bytes.toBytes(tableName), Bytes.toBytes(familyName)), + HConstants.LATEST_TIMESTAMP); + assertTrue(emptyGuidePostExpected ? gps.isEmptyGuidePost() : !gps.isEmptyGuidePost()); + assertTrue(gps.getByteCounts()[0] >= guidePostWidth); + assertTrue(gps.getGuidePostTimestamps()[0] > 0); + } + } + } @Test public void testEmptyGuidePostGeneratedWhenDataSizeLessThanGPWidth() throws Exception { @@ -714,25 +732,39 @@ public void testEmptyGuidePostGeneratedWhenDataSizeLessThanGPWidth() throws Exce conn.createStatement().execute("UPDATE STATISTICS " + tableName); ConnectionQueryServices queryServices = conn.unwrap(PhoenixConnection.class).getQueryServices(); - try (Table statsHTable = - queryServices.getTable( - SchemaUtil.getPhysicalName(PhoenixDatabaseMetaData.SYSTEM_STATS_NAME_BYTES, - queryServices.getProps()).getName())) { - GuidePostsInfo gps = - StatisticsUtil.readStatistics(statsHTable, - new GuidePostsKey(Bytes.toBytes(tableName), Bytes.toBytes("C1")), - HConstants.LATEST_TIMESTAMP); - assertTrue(gps.isEmptyGuidePost()); - assertEquals(guidePostWidth, gps.getByteCounts()[0]); - assertTrue(gps.getGuidePostTimestamps()[0] > 0); - gps = - StatisticsUtil.readStatistics(statsHTable, - new GuidePostsKey(Bytes.toBytes(tableName), Bytes.toBytes("C2")), - HConstants.LATEST_TIMESTAMP); - assertTrue(gps.isEmptyGuidePost()); - assertEquals(guidePostWidth, gps.getByteCounts()[0]); - assertTrue(gps.getGuidePostTimestamps()[0] > 0); - } + verifyGuidePostGenerated(queryServices, tableName, new String[] {"C1", "C2"}, guidePostWidth, true); + } + } + + @Test + public void testCollectingAllVersionsOfCells() throws Exception { + String tableName = generateUniqueName(); + try (Connection conn = DriverManager.getConnection(getUrl())) { + long guidePostWidth = 70; + String ddl = + "CREATE TABLE " + tableName + " (k INTEGER PRIMARY KEY, c1.a bigint, c2.b bigint)" + + " GUIDE_POSTS_WIDTH=" + guidePostWidth + + ", USE_STATS_FOR_PARALLELIZATION=true" + ", VERSIONS=3"; + conn.createStatement().execute(ddl); + conn.createStatement().execute("upsert into " + tableName + " values (100,100,3)"); + conn.commit(); + conn.createStatement().execute("UPDATE STATISTICS " + tableName); + + ConnectionQueryServices queryServices = + conn.unwrap(PhoenixConnection.class).getQueryServices(); + + // The table only has one row. All cells just has one version, and the data size of the row + // is less than the guide post width, so we generate empty guide post. + verifyGuidePostGenerated(queryServices, tableName, new String[] {"C1", "C2"}, guidePostWidth, true); + + + conn.createStatement().execute("upsert into " + tableName + " values (100,101,4)"); + conn.commit(); + conn.createStatement().execute("UPDATE STATISTICS " + tableName); + + // We updated the row. Now each cell has two versions, and the data size of the row + // is >= the guide post width, so we generate non-empty guide post. + verifyGuidePostGenerated(queryServices, tableName, new String[] {"C1", "C2"}, guidePostWidth, false); } } diff --git a/phoenix-core/src/main/java/org/apache/phoenix/schema/MetaDataClient.java b/phoenix-core/src/main/java/org/apache/phoenix/schema/MetaDataClient.java index cdbf234089e..5c7960342a4 100644 --- a/phoenix-core/src/main/java/org/apache/phoenix/schema/MetaDataClient.java +++ b/phoenix-core/src/main/java/org/apache/phoenix/schema/MetaDataClient.java @@ -1279,6 +1279,7 @@ public TransactionFactory.Provider getTransactionProvider() { MutationPlan plan = compiler.compile(Collections.singletonList(tableRef), null, cfs, null, clientTimeStamp); Scan scan = plan.getContext().getScan(); scan.setCacheBlocks(false); + scan.readAllVersions(); scan.setAttribute(ANALYZE_TABLE, TRUE_BYTES); boolean runUpdateStatsAsync = props.getBoolean(QueryServices.RUN_UPDATE_STATS_ASYNC, DEFAULT_RUN_UPDATE_STATS_ASYNC); scan.setAttribute(RUN_UPDATE_STATS_ASYNC_ATTRIB, runUpdateStatsAsync ? TRUE_BYTES : FALSE_BYTES);