fix comment

apache · Dec 24, 2019 · f0900a6 · f0900a6
1 parent 08d4279
commit f0900a6
Show file tree

Hide file tree

Showing 15 changed files with 16 additions and 92 deletions.
diff --git a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
@@ -1503,7 +1503,7 @@ private CarbonCommonConstants() {
 
   /**
    * Whether load/insert command is fired internally or by the user.
-   * Used to block load/insert on pre-aggregate if fired by user
+   * Used to block load/insert on MV if fired by user
    */
   @CarbonProperty
   public static final String IS_INTERNAL_LOAD_CALL = "is.internal.load.call";

diff --git a/core/src/main/java/org/apache/carbondata/core/datamap/DataMapProvider.java b/core/src/main/java/org/apache/carbondata/core/datamap/DataMapProvider.java
@@ -64,8 +64,6 @@
  *
  * <br>Currently CarbonData supports following provider:
  * <ol>
- *   <li> preaggregate: pre-aggregate table of single table </li>
- *   <li> timeseries: pre-aggregate table based on time dimension of the table </li>
  *   <li> lucene: index backed by Apache Lucene </li>
  *   <li> bloomfilter: index backed by Bloom Filter </li>
  * </ol>

diff --git a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
@@ -3316,22 +3316,6 @@ public static String generateUUID() {
     return UUID.randomUUID().toString();
   }
 
-  /**
-   * Below method will be used to get the datamap schema name from datamap table name
-   * it will split name based on character '_' and get the last name
-   * This is only for pre aggregate and timeseries tables
-   *
-   * @param tableName
-   * @return datamapschema name
-   */
-  public static String getDatamapNameFromTableName(String tableName) {
-    int i = tableName.lastIndexOf('_');
-    if (i != -1) {
-      return tableName.substring(i + 1, tableName.length());
-    }
-    return null;
-  }
-
   public static String getIndexServerTempPath(String tablePath, String queryId) {
     String tempFolderPath = CarbonProperties.getInstance()
         .getProperty(CarbonCommonConstants.CARBON_INDEX_SERVER_TEMP_PATH);

diff --git a/docs/introduction.md b/docs/introduction.md
@@ -71,14 +71,6 @@ CarbonData has rich set of features to support various use cases in Big Data ana
 
 ### DataMaps
 
-- ##### Pre-Aggregate
-
-  CarbonData has concept of datamaps to assist in pruning of data while querying so that performance is faster.Pre Aggregate tables are kind of datamaps which can improve the query performance by order of magnitude.CarbonData will automatically pre-aggregate the incremental data and re-write the query to automatically fetch from the most appropriate pre-aggregate table to serve the query faster.
-
-- ##### Time Series
-
-  CarbonData has built in understanding of time order(Year, month,day,hour, minute,second). Time series is a pre-aggregate table which can automatically roll-up the data to the desired level during incremental load and serve the query from the most appropriate pre-aggregate table.
-
 - ##### Bloom filter
 
   CarbonData supports bloom filter as a datamap in order to quickly and efficiently prune the data for scanning and acheive faster query performance.
@@ -89,7 +81,7 @@ CarbonData has rich set of features to support various use cases in Big Data ana
 
 - ##### MV (Materialized Views)
 
-  MVs are kind of pre-aggregate tables which can support efficent query re-write and processing.CarbonData provides MV which can rewrite query to fetch from any table(including non-carbondata tables). Typical usecase is to store the aggregated data of a non-carbondata fact table into carbondata and use mv to rewrite the query to fetch from carbondata.
+  MVs are kind of pre-aggregate and pre-join tables which can support efficient query re-write and processing.CarbonData provides MV which can rewrite query to fetch from any table(including non-carbondata tables). Typical usecase is to store the aggregated data of a non-carbondata fact table into carbondata and use mv to rewrite the query to fetch from carbondata.
 
 ### Streaming
 

diff --git a/docs/presto-guide.md b/docs/presto-guide.md
@@ -294,5 +294,5 @@ carbondata files.
 ### Supported features of presto carbon
 Presto carbon only supports reading the carbon table which is written by spark carbon or carbon SDK. 
 During reading, it supports the non-distributed datamaps like block datamap and bloom datamap.
-It doesn't support MV datamap and Pre-aggregate datamap as it needs query plan to be changed and presto does not allow it.
+It doesn't support Materialized View as it needs query plan to be changed and presto does not allow it.
 Also Presto carbon supports streaming segment read from streaming table created by spark.
diff --git a/docs/streaming-guide.md b/docs/streaming-guide.md
@@ -273,8 +273,8 @@ ALTER TABLE streaming_table COMPACT 'close_streaming'
 ## Constraint
 1. reject set streaming property from true to false.
 2. reject UPDATE/DELETE command on the streaming table.
-3. reject create pre-aggregation DataMap on the streaming table.
-4. reject add the streaming property on the table with pre-aggregation DataMap.
+3. reject create MV on the streaming table.
+4. reject add the streaming property on the table with MV DataMap.
 5. if the table has dictionary columns, it will not support concurrent data loading.
 6. block delete "streaming" segment while the streaming ingestion is running.
 7. block drop the streaming table while the streaming ingestion is running.

diff --git a/docs/usecases.md b/docs/usecases.md
@@ -189,8 +189,7 @@ Concurrent queries can be more due to the BI dashboard
 
 - Use table block size as 128MB so that pruning is more effective
 - Use global sort mode so that the data to be fetched are grouped together
-- Create pre-aggregate tables for non timestamp based group by queries
-- For queries containing group by date, create timeseries based Datamap(pre-aggregate) tables so that the data is rolled up during creation and fetch is faster
+- Create Materialized View for aggregation queries
 - Reduce the Spark shuffle partitions.(In our configuration on 14 node cluster, it was reduced to 35 from default of 200)
 - Enable global dictionary for columns which have less cardinalities. Aggregation can be done on encoded data, there by improving the performance
 - For columns whose cardinality is high,enable the local dictionary so that store size is less and can take dictionary benefit for scan

diff --git a/examples/spark2/src/main/scala/org/apache/carbondata/examples/MVDataMapExample.scala b/examples/spark2/src/main/scala/org/apache/carbondata/examples/MVDataMapExample.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.SparkSession
 import org.apache.carbondata.examples.util.ExampleUtils
 
 /**
- * This example is for pre-aggregate tables.
+ * This example is for Materialized View.
  */
 
 object MVDataMapExample {
@@ -42,7 +42,6 @@ object MVDataMapExample {
                             + "../../../..").getCanonicalPath
     val testData = s"$rootPath/integration/spark-common-test/src/test/resources/sample.csv"
 
-    // 1. simple usage for Pre-aggregate tables creation and query
     spark.sql("DROP TABLE IF EXISTS mainTable")
     spark.sql("DROP TABLE IF EXISTS dimtable")
     spark.sql(

diff --git a/format/src/main/thrift/schema.thrift b/format/src/main/thrift/schema.thrift
@@ -105,6 +105,7 @@ struct ColumnSchema{
 	/** 
 	 * Used when this column is part of an aggregate table.
 	 */
+	 /** Deprecated */
 	11: optional string aggregate_function;
 
 	12: optional binary default_value;
@@ -129,6 +130,7 @@ struct ColumnSchema{
   *  to maintain the column relation with parent table.
   *  will be usefull in case of pre-aggregate
   **/
+  /** Deprecated */
 	17: optional list<ParentColumnTableRelation> parentColumnTableRelations;
 }
 
@@ -204,10 +206,11 @@ struct DataMapSchema  {
     // stores properties of select query, query type like groupby, join in
     // case of preaggregate/timeseries
     3: optional map<string, string> properties;
-    // relation identifier of a table which stores data of datamaps like preaggregate/timeseries.
+    // relation identifier of a table which stores data of Materialize View.
     4: optional RelationIdentifier childTableIdentifier;
     // in case of preaggregate/timeseries datamap it will be used to maintain the child schema
     // which will be usefull in case of query and data load
+    // Deprecated
     5: optional TableSchema childTableSchema;
 }
 

diff --git a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java
@@ -176,28 +176,6 @@ public List<InputSplit> getSplits(JobContext job) throws IOException {
    * @throws IOException
    */
 
-  /**
-   * Below method will be used to get the filter segments when query is fired on pre Aggregate
-   * and main table in case of streaming.
-   * For Pre Aggregate rules it will set all the valid segments for both streaming and
-   * and normal for fact table, so if any handoff happened in between it will
-   * select only new hand off segments segments for fact.
-   * @param validSegments
-   * @param segmentsToAccess
-   * @return
-   */
-  private List<Segment> getFilteredNormalSegments(List<Segment> validSegments,
-      Segment[] segmentsToAccess) {
-    List<Segment> segmentToAccessSet = Arrays.asList(segmentsToAccess);
-    List<Segment> filteredSegment = new ArrayList<>();
-    for (Segment seg : validSegments) {
-      if (!segmentToAccessSet.contains(seg)) {
-        filteredSegment.add(seg);
-      }
-    }
-    return filteredSegment;
-  }
-
   /**
    * Return segment list after filtering out valid segments and segments set by user by
    * `INPUT_SEGMENT_NUMBERS` in job configuration

diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/events/CreateTableEvents.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/events/CreateTableEvents.scala
@@ -33,7 +33,7 @@ case class CreateTablePreExecutionEvent(
 
 /**
  * Class for handling operations after data load completion and before final
- * commit of load operation. Example usage: For loading pre-aggregate tables
+ * commit of load operation.
  */
 case class CreateTablePostExecutionEvent(sparkSession: SparkSession,
     identifier: AbsoluteTableIdentifier) extends Event with TableEventInfo

diff --git a/...c/main/scala/org/apache/spark/sql/execution/command/management/CarbonAddLoadCommand.scala b/...c/main/scala/org/apache/spark/sql/execution/command/management/CarbonAddLoadCommand.scala
@@ -86,7 +86,7 @@ case class CarbonAddLoadCommand(
         "Unsupported operation on global dictionary columns table")
     }
     if (carbonTable.isChildTableForMV) {
-      throw new MalformedCarbonCommandException("Unsupported operation on MV/Pre-aggrergated table")
+      throw new MalformedCarbonCommandException("Unsupported operation on MV table")
     }
     // if insert overwrite in progress, do not allow add segment
     if (SegmentStatusManager.isOverwriteInProgressInTable(carbonTable)) {

diff --git a/.../src/main/scala/org/apache/spark/sql/execution/command/table/CarbonDropTableCommand.scala b/.../src/main/scala/org/apache/spark/sql/execution/command/table/CarbonDropTableCommand.scala
@@ -91,35 +91,6 @@ case class CarbonDropTableCommand(
       val relationIdentifiers = carbonTable.getTableInfo.getParentRelationIdentifiers
       if (relationIdentifiers != null && !relationIdentifiers.isEmpty) {
         var ignoreParentTableCheck = false
-        if (carbonTable.getTableInfo.getParentRelationIdentifiers.size() == 1) {
-          /**
-           * below handling in case when pre aggregation creation failed in scenario
-           * while creating a pre aggregate data map it created pre aggregate table and registered
-           * in hive, but failed to register in main table because of some exception.
-           * in this case if it will not allow user to drop datamap and data map table
-           * for this if user run drop table command for pre aggregate it should allow user to drop
-           * the same
-           */
-          val parentDbName =
-            carbonTable.getTableInfo.getParentRelationIdentifiers.get(0).getDatabaseName
-          val parentTableName =
-            carbonTable.getTableInfo.getParentRelationIdentifiers.get(0).getTableName
-          val parentCarbonTable = try {
-            Some(CarbonEnv.getCarbonTable(Some(parentDbName), parentTableName)(sparkSession))
-          } catch {
-            case _: Exception => None
-          }
-          if (parentCarbonTable.isDefined) {
-            val dataMapSchemaName = CarbonUtil.getDatamapNameFromTableName(carbonTable.getTableName)
-            if (null != dataMapSchemaName) {
-              val dataMapSchema = parentCarbonTable.get.getDataMapSchema(dataMapSchemaName)
-              if (null == dataMapSchema) {
-                LOGGER.info(s"Force dropping datamap ${carbonTable.getTableName}")
-                ignoreParentTableCheck = true
-              }
-            }
-          }
-        }
         if (!ignoreParentTableCheck && !dropChildTable) {
           if (!ifExistsSet) {
             throwMetadataException(dbName, tableName,

diff --git a/integration/spark2/src/main/scala/org/apache/spark/util/DataMapUtil.scala b/integration/spark2/src/main/scala/org/apache/spark/util/DataMapUtil.scala
@@ -32,7 +32,7 @@ import org.apache.carbondata.core.metadata.schema.datamap.DataMapClassProvider.M
 import org.apache.carbondata.core.metadata.schema.table.CarbonTable
 
 /**
- * Utility class for keeping all the utility methods common for pre-aggregate and mv datamap
+ * Utility class for keeping all the utility methods common for MV
  */
 object DataMapUtil {
 

diff --git a/processing/src/main/java/org/apache/carbondata/processing/loading/events/LoadEvents.java b/processing/src/main/java/org/apache/carbondata/processing/loading/events/LoadEvents.java
@@ -50,7 +50,7 @@ public CarbonLoadModel getCarbonLoadModel() {
 
   /**
    * Class for handling operations after data load completion and before final
-   * commit of load operation. Example usage: For loading pre-aggregate tables
+   * commit of load operation. Example usage: For loading MV
    */
 
   public static class LoadTablePostExecutionEvent extends Event {
@@ -74,7 +74,7 @@ public CarbonLoadModel getCarbonLoadModel() {
 
   /**
    * Event for handling operations after data load completion and before final
-   * commit of load operation. Example usage: For loading pre-aggregate tables
+   * commit of load operation. Example usage: For loading MV
    */
 
   public static class LoadTablePreStatusUpdateEvent extends Event {