Extend the PARTITION BY clause to accept string literals for the time…

… partitioning (#15836) This PR contains a portion of the changes from the inactive draft PR for integrating the catalog with the Calcite planner #13686 from @paul-rogers, extending the PARTITION BY clause to accept string literals for the time partitioning
apache · Feb 9, 2024 · f9ee2c3 · f9ee2c3
1 parent 6e9eee4
commit f9ee2c3
Show file tree

Hide file tree

Showing 21 changed files with 424 additions and 116 deletions.
diff --git a/docs/multi-stage-query/reference.md b/docs/multi-stage-query/reference.md
@@ -287,6 +287,33 @@ The following ISO 8601 periods are supported for `TIME_FLOOR` and the string con
 - P3M
 - P1Y
 
+The string constant can also include any of the keywords mentioned above:
+
+- `HOUR` - Same as `'PT1H'`
+- `DAY` - Same as `'P1D'`
+- `MONTH` - Same as `'P1M'`
+- `YEAR` - Same as `'P1Y'`
+- `ALL TIME`
+- `ALL` - Alias for `ALL TIME`
+
+The `WEEK` granularity is deprecated and not supported in MSQ.
+
+Examples:
+
+```SQL
+-- Keyword
+PARTITIONED BY HOUR
+
+-- String literal
+PARTITIONED BY 'HOUR'
+
+-- ISO 8601 period
+PARTITIONED BY 'PT1H'
+
+-- TIME_FLOOR function
+PARTITIONED BY TIME_FLOOR(__time, 'PT1H')
+```
+
 For more information about partitioning, see [Partitioning](concepts.md#partitioning-by-time). <br /><br />
 *Avoid  partitioning by week, `P1W`, because weeks don't align neatly with months and years, making it difficult to partition by coarser granularities later.
 

diff --git a/sql/src/main/codegen/config.fmpp b/sql/src/main/codegen/config.fmpp
@@ -54,6 +54,7 @@ data: {
       "org.apache.calcite.sql.SqlNodeList"
       "org.apache.calcite.sql.SqlBasicCall"
       "org.apache.druid.java.util.common.granularity.Granularity"
+      "org.apache.druid.java.util.common.granularity.GranularityType"
       "org.apache.druid.java.util.common.granularity.Granularities"
       "org.apache.druid.sql.calcite.parser.DruidSqlInsert"
       "org.apache.druid.sql.calcite.parser.DruidSqlParserUtils"

diff --git a/sql/src/main/codegen/includes/common.ftl b/sql/src/main/codegen/includes/common.ftl
@@ -17,8 +17,7 @@
  * under the License.
  */
 
-// Using fully qualified name for Pair class, since Calcite also has a same class name being used in the Parser.jj
-org.apache.druid.java.util.common.Pair<Granularity, String> PartitionGranularity() :
+SqlGranularityLiteral PartitionGranularity() :
 {
   SqlNode e;
   Granularity granularity;
@@ -52,14 +51,14 @@ org.apache.druid.java.util.common.Pair<Granularity, String> PartitionGranularity
   |
     <ALL>
     {
-      granularity = Granularities.ALL;
-      unparseString = "ALL";
+          granularity = Granularities.ALL;
+          unparseString = "ALL";
     }
     [
-      <TIME>
-      {
-        unparseString += " TIME";
-      }
+       <TIME>
+       {
+            unparseString += " TIME";
+       }
     ]
   |
     e = Expression(ExprContext.ACCEPT_SUB_QUERY)
@@ -69,7 +68,7 @@ org.apache.druid.java.util.common.Pair<Granularity, String> PartitionGranularity
     }
   )
   {
-    return new org.apache.druid.java.util.common.Pair(granularity, unparseString);
+    return new SqlGranularityLiteral(granularity, unparseString, getPos());
   }
 }
 

diff --git a/sql/src/main/codegen/includes/insert.ftl b/sql/src/main/codegen/includes/insert.ftl
@@ -33,7 +33,7 @@ SqlNode DruidSqlInsertEof() :
   final SqlNodeList columnList;
   final Span s;
   final Pair<SqlNodeList, SqlNodeList> p;
-  org.apache.druid.java.util.common.Pair<Granularity, String> partitionedBy = new org.apache.druid.java.util.common.Pair(null, null);
+  SqlGranularityLiteral partitionedBy = null;
   SqlNodeList clusteredBy = null;
   String exportFileFormat = null;
 }
@@ -93,7 +93,7 @@ SqlNode DruidSqlInsertEof() :
     clusteredBy = ClusteredBy()
   ]
   {
-      if (clusteredBy != null && partitionedBy.lhs == null) {
+      if (clusteredBy != null && partitionedBy == null) {
         throw org.apache.druid.sql.calcite.parser.DruidSqlParserUtils.problemParsing(
           "CLUSTERED BY found before PARTITIONED BY, CLUSTERED BY must come after the PARTITIONED BY clause"
         );
@@ -112,6 +112,6 @@ SqlNode DruidSqlInsertEof() :
       return insertNode;
     }
     SqlInsert sqlInsert = (SqlInsert) insertNode;
-    return new DruidSqlInsert(sqlInsert, partitionedBy.lhs, partitionedBy.rhs, clusteredBy, exportFileFormat);
+    return DruidSqlInsert.create(sqlInsert, partitionedBy, clusteredBy, exportFileFormat);
   }
 }
diff --git a/sql/src/main/codegen/includes/replace.ftl b/sql/src/main/codegen/includes/replace.ftl
@@ -26,8 +26,7 @@ SqlNode DruidSqlReplaceEof() :
     final Span s;
     SqlNode tableRef = null;
     SqlInsert sqlInsert;
-    // Using fully qualified name for Pair class, since Calcite also has a same class name being used in the Parser.jj
-    org.apache.druid.java.util.common.Pair<Granularity, String> partitionedBy = new org.apache.druid.java.util.common.Pair(null, null);
+    SqlGranularityLiteral partitionedBy = null;
     SqlNodeList clusteredBy = null;
     final Pair<SqlNodeList, SqlNodeList> p;
     SqlNode replaceTimeQuery = null;
@@ -78,7 +77,7 @@ SqlNode DruidSqlReplaceEof() :
       clusteredBy = ClusteredBy()
     ]
     {
-        if (clusteredBy != null && partitionedBy.lhs == null) {
+        if (clusteredBy != null && partitionedBy == null) {
           throw org.apache.druid.sql.calcite.parser.DruidSqlParserUtils.problemParsing(
             "CLUSTERED BY found before PARTITIONED BY, CLUSTERED BY must come after the PARTITIONED BY clause"
           );
@@ -91,7 +90,7 @@ SqlNode DruidSqlReplaceEof() :
     <EOF>
     {
       sqlInsert = new SqlInsert(s.end(source), SqlNodeList.EMPTY, destination, source, columnList);
-      return new DruidSqlReplace(sqlInsert, partitionedBy.lhs, partitionedBy.rhs, clusteredBy, replaceTimeQuery, exportFileFormat);
+      return DruidSqlReplace.create(sqlInsert, partitionedBy, clusteredBy, replaceTimeQuery, exportFileFormat);
     }
 }
 

diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/parser/DruidSqlIngest.java b/sql/src/main/java/org/apache/druid/sql/calcite/parser/DruidSqlIngest.java
@@ -23,9 +23,10 @@
 import org.apache.calcite.sql.SqlNode;
 import org.apache.calcite.sql.SqlNodeList;
 import org.apache.calcite.sql.parser.SqlParserPos;
-import org.apache.druid.java.util.common.granularity.Granularity;
+import org.apache.calcite.util.ImmutableNullableList;
 
 import javax.annotation.Nullable;
+import java.util.List;
 
 /**
  * Common base class to the two Druid "ingest" statements: INSERT and REPLACE.
@@ -37,10 +38,7 @@ public abstract class DruidSqlIngest extends SqlInsert
   public static final String SQL_EXPORT_FILE_FORMAT = "__exportFileFormat";
 
   @Nullable
-  protected final Granularity partitionedBy;
-
-  // Used in the unparse function to generate the original query since we convert the string to an enum
-  protected final String partitionedByStringForUnparse;
+  protected final SqlGranularityLiteral partitionedBy;
 
   @Nullable
   protected final SqlNodeList clusteredBy;
@@ -53,22 +51,20 @@ public DruidSqlIngest(
       SqlNode targetTable,
       SqlNode source,
       SqlNodeList columnList,
-      @Nullable Granularity partitionedBy,
-      @Nullable String partitionedByStringForUnparse,
+      @Nullable SqlGranularityLiteral partitionedBy,
       @Nullable SqlNodeList clusteredBy,
       @Nullable String exportFileFormat
   )
   {
     super(pos, keywords, targetTable, source, columnList);
 
-    this.partitionedByStringForUnparse = partitionedByStringForUnparse;
     this.partitionedBy = partitionedBy;
     this.clusteredBy = clusteredBy;
     this.exportFileFormat = exportFileFormat;
   }
 
   @Nullable
-  public Granularity getPartitionedBy()
+  public SqlGranularityLiteral getPartitionedBy()
   {
     return partitionedBy;
   }
@@ -84,4 +80,14 @@ public String getExportFileFormat()
   {
     return exportFileFormat;
   }
+
+  @Override
+  public List<SqlNode> getOperandList()
+  {
+    return ImmutableNullableList.<SqlNode>builder()
+        .addAll(super.getOperandList())
+        .add(partitionedBy)
+        .add(clusteredBy)
+        .build();
+  }
 }
diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/parser/DruidSqlInsert.java b/sql/src/main/java/org/apache/druid/sql/calcite/parser/DruidSqlInsert.java
@@ -24,7 +24,7 @@
 import org.apache.calcite.sql.SqlNodeList;
 import org.apache.calcite.sql.SqlOperator;
 import org.apache.calcite.sql.SqlWriter;
-import org.apache.druid.java.util.common.granularity.Granularity;
+import org.apache.calcite.sql.parser.SqlParserPos;
 
 import javax.annotation.Nonnull;
 import javax.annotation.Nullable;
@@ -41,28 +41,49 @@ public class DruidSqlInsert extends DruidSqlIngest
   // This allows reusing super.unparse
   public static final SqlOperator OPERATOR = SqlInsert.OPERATOR;
 
+  public static DruidSqlInsert create(
+      @Nonnull SqlInsert insertNode,
+      @Nullable SqlGranularityLiteral partitionedBy,
+      @Nullable SqlNodeList clusteredBy,
+      @Nullable String exportFileFormat
+  )
+  {
+    return new DruidSqlInsert(
+        insertNode.getParserPosition(),
+        (SqlNodeList) insertNode.getOperandList().get(0), // No better getter to extract this
+        insertNode.getTargetTable(),
+        insertNode.getSource(),
+        insertNode.getTargetColumnList(),
+        partitionedBy,
+        clusteredBy,
+        exportFileFormat
+    );
+  }
+
   /**
-   * While partitionedBy and partitionedByStringForUnparse can be null as arguments to the constructor, this is
+   * While partitionedBy can be null as arguments to the constructor, this is
    * disallowed (semantically) and the constructor performs checks to ensure that. This helps in producing friendly
    * errors when the PARTITIONED BY custom clause is not present, and keeps its error separate from JavaCC/Calcite's
    * custom errors which can be cryptic when someone accidentally forgets to explicitly specify the PARTITIONED BY clause
    */
   public DruidSqlInsert(
-      @Nonnull SqlInsert insertNode,
-      @Nullable Granularity partitionedBy,
-      @Nullable String partitionedByStringForUnparse,
+      SqlParserPos pos,
+      SqlNodeList keywords,
+      SqlNode targetTable,
+      SqlNode source,
+      SqlNodeList columnList,
+      @Nullable SqlGranularityLiteral partitionedBy,
       @Nullable SqlNodeList clusteredBy,
       @Nullable String exportFileFormat
   )
   {
     super(
-        insertNode.getParserPosition(),
-        (SqlNodeList) insertNode.getOperandList().get(0), // No better getter to extract this
-        insertNode.getTargetTable(),
-        insertNode.getSource(),
-        insertNode.getTargetColumnList(),
+        pos,
+        keywords,
+        targetTable,
+        source,
+        columnList,
         partitionedBy,
-        partitionedByStringForUnparse,
         clusteredBy,
         exportFileFormat
     );
@@ -95,9 +116,9 @@ public void unparse(SqlWriter writer, int leftPrec, int rightPrec)
     getSource().unparse(writer, 0, 0);
     writer.newlineAndIndent();
 
-    if (partitionedByStringForUnparse != null) {
+    if (getPartitionedBy() != null) {
       writer.keyword("PARTITIONED BY");
-      writer.keyword(partitionedByStringForUnparse);
+      getPartitionedBy().unparse(writer, leftPrec, rightPrec);
     }
 
     if (getClusteredBy() != null) {