Upgrade calcite to 1.37.0 (#16504)

* contains Make a full copy of the parser and apply our modifications to it #16503 * some minor api changes pair/entry * some unnecessary aggregation was removed from a set of queries in `CalciteSubqueryTest` * `AliasedOperatorConversion` was detecting `CHAR_LENGTH` as not a function ; I've removed the check * the field it was using doesn't look maintained that much * the `kind` is passed for the created `SqlFunction` so I don't think this check is actually needed * some decoupled test cases become broken - will be fixed later * some aggregate related changes: due to the fact that SUM() and COUNT() of no inputs are different * upgrade avatica to 1.25.0 * `CalciteQueryTest#testExactCountDistinctWithFilter` is now executable Close #16503
apache · Jun 13, 2024 · ac19b14 · ac19b14
1 parent d5a25a9
commit ac19b14
Show file tree

Hide file tree

Showing 99 changed files with 10,532 additions and 1,287 deletions.
diff --git a/dev/upgrade-calcite-parser b/dev/upgrade-calcite-parser
@@ -0,0 +1,72 @@
+#!/bin/bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#--------------------------------------------------------------------
+
+# Adopts base Calcite parser changes 
+#
+# Establishes a git friendly merge situation:
+#
+# Creates a commit which matches the original state of the calcite parser;
+# To this point creates to alternates:
+#   * one with local customizations
+#   * another with all the upstream updates
+# merges the two branches to obtain the upgrade state
+#
+
+[ $# -ne 2 ] && echo -e "updates base parser sources.\n usage: $0 <old_calcite_version> <new_calcite_version>" && exit 1
+
+CALCITE_OLD=$1
+CALCITE_NEW=$2
+
+set -e
+set -x
+
+BRANCH=`git name-rev --name-only HEAD`
+
+REPO=.git/calcite-upgrade
+rm -rf "$REPO"
+git clone $PWD --reference $PWD --branch $BRANCH $REPO
+
+cd "$REPO"
+git checkout -b curr-changes
+
+mvn -q generate-sources -pl sql -Dcalcite.version=$CALCITE_OLD -Pskip-static-checks
+cp -r sql/target/calcite-base-parser/codegen/./  sql/src/main/codegen/./
+git commit -m 'current reverse' -a
+git revert --no-edit HEAD
+# HEAD is now at the same as before; but parent are the base calcite changes
+
+git branch base-changes curr-changes^
+git checkout base-changes
+git show|patch -p0 -R # undo temproarily to ensure maven runs
+
+mvn -q generate-sources -pl sql -Dcalcite.version=$CALCITE_NEW -Pskip-static-checks
+cp -r sql/target/calcite-base-parser/codegen/./  sql/src/main/codegen/./
+
+git commit --allow-empty -m base-changes -a
+git checkout -b new-state
+git merge --no-edit curr-changes
+
+echo ok
+cd -
+
+git remote remove calcite-upgrade &>/dev/null || echo -n
+git remote add -f calcite-upgrade "$REPO"
+
+
+echo "merge branch calcite-upgrade/curr-changes if satisfied with those changes"
+
diff --git a/...ions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskQueryMaker.java b/...ions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskQueryMaker.java
@@ -24,7 +24,6 @@
 import com.google.common.base.Preconditions;
 import org.apache.calcite.runtime.Hook;
 import org.apache.calcite.sql.type.SqlTypeName;
-import org.apache.calcite.util.Pair;
 import org.apache.druid.common.guava.FutureUtils;
 import org.apache.druid.error.DruidException;
 import org.apache.druid.error.InvalidInput;
@@ -78,6 +77,7 @@
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Map.Entry;
 import java.util.Optional;
 import java.util.stream.Collectors;
 
@@ -91,15 +91,15 @@ public class MSQTaskQueryMaker implements QueryMaker
   private final OverlordClient overlordClient;
   private final PlannerContext plannerContext;
   private final ObjectMapper jsonMapper;
-  private final List<Pair<Integer, String>> fieldMapping;
+  private final List<Entry<Integer, String>> fieldMapping;
 
 
   MSQTaskQueryMaker(
       @Nullable final IngestDestination targetDataSource,
       final OverlordClient overlordClient,
       final PlannerContext plannerContext,
       final ObjectMapper jsonMapper,
-      final List<Pair<Integer, String>> fieldMapping
+      final List<Entry<Integer, String>> fieldMapping
   )
   {
     this.targetDataSource = targetDataSource;
@@ -193,7 +193,7 @@ public QueryResponse<Object[]> runQuery(final DruidQuery druidQuery)
     final List<ColumnType> columnTypeList = new ArrayList<>();
     final List<ColumnMapping> columnMappings = QueryUtils.buildColumnMappings(fieldMapping, druidQuery);
 
-    for (final Pair<Integer, String> entry : fieldMapping) {
+    for (final Entry<Integer, String> entry : fieldMapping) {
       final String queryColumn = druidQuery.getOutputRowSignature().getColumnName(entry.getKey());
 
       final SqlTypeName sqlTypeName;
@@ -238,7 +238,7 @@ public QueryResponse<Object[]> runQuery(final DruidQuery druidQuery)
 
       MSQTaskQueryMakerUtils.validateSegmentSortOrder(
           segmentSortOrder,
-          fieldMapping.stream().map(f -> f.right).collect(Collectors.toList())
+          fieldMapping.stream().map(f -> f.getValue()).collect(Collectors.toList())
       );
 
       final DataSourceMSQDestination dataSourceMSQDestination = new DataSourceMSQDestination(

diff --git a/...sions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskSqlEngine.java b/...sions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskSqlEngine.java
@@ -33,7 +33,6 @@
 import org.apache.calcite.schema.Table;
 import org.apache.calcite.sql.dialect.CalciteSqlDialect;
 import org.apache.calcite.sql.type.SqlTypeName;
-import org.apache.calcite.util.Pair;
 import org.apache.druid.error.DruidException;
 import org.apache.druid.error.InvalidInput;
 import org.apache.druid.error.InvalidSqlInput;
@@ -65,6 +64,7 @@
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
+import java.util.Map.Entry;
 import java.util.Set;
 
 public class MSQTaskSqlEngine implements SqlEngine
@@ -222,7 +222,7 @@ private static void validateSelect(final PlannerContext plannerContext)
    */
   private static void validateInsert(
       final RelNode rootRel,
-      final List<Pair<Integer, String>> fieldMappings,
+      final List<Entry<Integer, String>> fieldMappings,
       @Nullable Table targetTable,
       final PlannerContext plannerContext
   )
@@ -241,13 +241,13 @@ private static void validateInsert(
    * SQL allows multiple output columns with the same name. However, we don't allow this for INSERT or REPLACE
    * queries, because we use these output names to generate columns in segments. They must be unique.
    */
-  private static void validateNoDuplicateAliases(final List<Pair<Integer, String>> fieldMappings)
+  private static void validateNoDuplicateAliases(final List<Entry<Integer, String>> fieldMappings)
   {
     final Set<String> aliasesSeen = new HashSet<>();
 
-    for (final Pair<Integer, String> field : fieldMappings) {
-      if (!aliasesSeen.add(field.right)) {
-        throw InvalidSqlInput.exception("Duplicate field in SELECT: [%s]", field.right);
+    for (final Entry<Integer, String> field : fieldMappings) {
+      if (!aliasesSeen.add(field.getValue())) {
+        throw InvalidSqlInput.exception("Duplicate field in SELECT: [%s]", field.getValue());
       }
     }
   }
@@ -353,7 +353,7 @@ private static void validateLimitAndOffset(final RelNode rootRel, final boolean
    */
   private static void validateTypeChanges(
       final RelNode rootRel,
-      final List<Pair<Integer, String>> fieldMappings,
+      final List<Entry<Integer, String>> fieldMappings,
       @Nullable final Table targetTable,
       final PlannerContext plannerContext
   )
@@ -366,9 +366,9 @@ private static void validateTypeChanges(
         MultiStageQueryContext.getColumnsExcludedFromTypeVerification(plannerContext.queryContext());
     final ArrayIngestMode arrayIngestMode = MultiStageQueryContext.getArrayIngestMode(plannerContext.queryContext());
 
-    for (Pair<Integer, String> fieldMapping : fieldMappings) {
-      final int columnIndex = fieldMapping.left;
-      final String columnName = fieldMapping.right;
+    for (Entry<Integer, String> fieldMapping : fieldMappings) {
+      final int columnIndex = fieldMapping.getKey();
+      final String columnName = fieldMapping.getValue();
       final RelDataTypeField oldSqlTypeField =
           targetTable.getRowType(DruidTypeSystem.TYPE_FACTORY).getField(columnName, true, false);
 
@@ -427,11 +427,11 @@ private static void validateTypeChanges(
    *
    * Returns -1 if the list does not contain a time column.
    */
-  private static int getTimeColumnIndex(final List<Pair<Integer, String>> fieldMappings)
+  private static int getTimeColumnIndex(final List<Entry<Integer, String>> fieldMappings)
   {
-    for (final Pair<Integer, String> field : fieldMappings) {
-      if (field.right.equals(ColumnHolder.TIME_COLUMN_NAME)) {
-        return field.left;
+    for (final Entry<Integer, String> field : fieldMappings) {
+      if (field.getValue().equals(ColumnHolder.TIME_COLUMN_NAME)) {
+        return field.getKey();
       }
     }
 

diff --git a/.../multi-stage-query/src/test/java/org/apache/druid/msq/test/CalciteSelectQueryMSQTest.java b/.../multi-stage-query/src/test/java/org/apache/druid/msq/test/CalciteSelectQueryMSQTest.java
@@ -150,6 +150,14 @@ public void testExactCountDistinctWithFilter()
 
   }
 
+  @Disabled
+  @Override
+  @Test
+  public void testExactCountDistinctWithFilter2()
+  {
+
+  }
+
   @Disabled
   @Override
   @Test

diff --git a/licenses.yaml b/licenses.yaml
@@ -1616,7 +1616,7 @@ name: Apache Calcite
 license_category: binary
 module: java-core
 license_name: Apache License version 2.0
-version: 1.35.0
+version: 1.37.0
 libraries:
   - org.apache.calcite: calcite-core
   - org.apache.calcite: calcite-linq4j
@@ -1634,7 +1634,7 @@ name: Apache Calcite Avatica
 license_category: binary
 module: java-core
 license_name: Apache License version 2.0
-version: 1.23.0
+version: 1.25.0
 libraries:
   - org.apache.calcite.avatica: avatica-core
   - org.apache.calcite.avatica: avatica-metrics

diff --git a/pom.xml b/pom.xml
@@ -82,13 +82,12 @@
         <apache.ranger.version>2.4.0</apache.ranger.version>
         <gson.version>2.10.1</gson.version>
         <scala.library.version>2.13.14</scala.library.version>
-        <avatica.version>1.23.0</avatica.version>
+        <avatica.version>1.25.0</avatica.version>
         <avro.version>1.11.3</avro.version>
-        <!-- When updating Calcite, also propagate updates to these files which we've copied and modified:
-             default_config.fmpp
-             sql/src/main/codegen/includes/*
+        <!--
+            The base calcite parser was copied into the project; when updating Calcite run dev/upgrade-calcite-parser to adopt upstream changes
           -->
-        <calcite.version>1.35.0</calcite.version>
+        <calcite.version>1.37.0</calcite.version>
         <confluent.version>6.2.12</confluent.version>
         <datasketches.version>4.2.0</datasketches.version>
         <datasketches.memory.version>2.2.0</datasketches.memory.version>
@@ -2189,6 +2188,7 @@
                                 <exclude>**/.classpath</exclude> <!-- Eclipse -->
                                 <exclude>**/.project</exclude> <!-- Eclipse -->
                                 <exclude>**/*.iq</exclude> <!-- quidem testfiles -->
+                                <exclude>**/*.iq.out</exclude> <!-- quidem testfiles -->
                             </excludes>
                         </configuration>
                     </plugin>

diff --git a/processing/src/test/resources/log4j2.xml b/processing/src/test/resources/log4j2.xml
@@ -31,5 +31,8 @@
     <Logger level="info" name="org.apache.druid" additivity="false">
       <AppenderRef ref="Console"/>
     </Logger>
+    <Logger level="debug" name="org.apache.calcite.plan.AbstractRelOptPlanner.rule_execution_summary" additivity="false">
+      <AppenderRef ref="Console"/>
+    </Logger>
   </Loggers>
 </Configuration>
diff --git a/sql/pom.xml b/sql/pom.xml
@@ -333,8 +333,8 @@
         </executions>
       </plugin>
 
-      <!-- Following plugins and their configurations are used to generate the custom Calcite's SQL parser for Druid -->
-      <!-- Extracts the Parser.jj from Calcite to ${project.build.directory}, where all the Freemarker templates are -->
+      <!-- Extracts the base parser from the calcite-core artifcat. -->
+      <!-- This is provided to have the base available - to compare changes to -->
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-dependency-plugin</artifactId>
@@ -353,50 +353,16 @@
                   <version>${calcite.version}</version>
                   <type>jar</type>
                   <overWrite>true</overWrite>
-                  <outputDirectory>${project.build.directory}/</outputDirectory>
-                  <includes>**/Parser.jj</includes>
-                </artifactItem>
-                <artifactItem>
-                  <groupId>org.apache.calcite</groupId>
-                  <artifactId>calcite-core</artifactId>
-                  <version>${calcite.version}</version>
-                  <type>jar</type>
-                  <overWrite>true</overWrite>
-                  <outputDirectory>${project.build.directory}/</outputDirectory>
-                  <includes>**/default_config.fmpp</includes>
+                  <outputDirectory>${project.build.directory}/calcite-base-parser</outputDirectory>
+                  <includes>codegen/**</includes>
                 </artifactItem>
               </artifactItems>
             </configuration>
           </execution>
         </executions>
       </plugin>
 
-      <!-- Copy the templates present in the codegen directory of druid-sql containing custom SQL rules to
-        ${project.build.directory}/codegen -->
-      <plugin>
-        <artifactId>maven-resources-plugin</artifactId>
-        <executions>
-          <execution>
-            <id>copy-fmpp-resources</id>
-            <phase>generate-sources</phase>
-            <goals>
-              <goal>copy-resources</goal>
-            </goals>
-            <configuration>
-              <outputDirectory>${project.build.directory}/codegen</outputDirectory>
-              <resources>
-                <resource>
-                  <directory>src/main/codegen</directory>
-                  <filtering>false</filtering>
-                </resource>
-              </resources>
-            </configuration>
-          </execution>
-        </executions>
-      </plugin>
-
-      <!-- "Plugs in" the Calcite's Parser.jj with the variables present in config.fmpp. These contain the custom rules
-        as well as the class to which the custom implementation will get generated -->
+      <!-- Applies the fmpp templates to produce the final Parser.jj -->
       <plugin>
         <groupId>com.googlecode.fmpp-maven-plugin</groupId>
         <artifactId>fmpp-maven-plugin</artifactId>
@@ -408,9 +374,9 @@
               <goal>generate</goal>
             </goals>
             <configuration>
-              <cfgFile>${project.build.directory}/codegen/config.fmpp</cfgFile>
+              <cfgFile>src/main/codegen/config.fmpp</cfgFile>
               <outputDirectory>${project.build.directory}/generated-sources</outputDirectory>
-              <templateDirectory>${project.build.directory}/codegen/templates</templateDirectory>
+              <templateDirectory>src/main/codegen/templates</templateDirectory>
             </configuration>
           </execution>
         </executions>
@@ -440,38 +406,6 @@
         </executions>
       </plugin>
 
-      <!-- This plugin is used to replace the production rule for FROM clause in the calcite grammar.
-           It is done by a search and replace since calcite doesn't allow to override production rules generally
-           in its grammar (override is possible only if there's a hook for it in the grammar). And the FROM clause
-           doesn't contain any hook for extension. For the custom changes done in
-           extension, please check from.ftl file in sql module.
-      -->
-      <plugin>
-        <groupId>com.google.code.maven-replacer-plugin</groupId>
-        <artifactId>replacer</artifactId>
-        <version>1.5.3</version>
-        <executions>
-          <execution>
-            <phase>generate-sources</phase>
-            <goals>
-              <goal>replace</goal>
-            </goals>
-          </execution>
-        </executions>
-        <configuration>
-          <basedir>${project.build.directory}/generated-sources/org/apache/druid/sql/calcite/parser</basedir>
-          <includes>
-            <include>**/DruidSqlParserImpl.java</include>
-          </includes>
-          <replacements>
-            <replacement>
-              <token>fromClause = FromClause</token>
-              <value>fromClause = DruidFromClause</value>
-            </replacement>
-          </replacements>
-        </configuration>
-      </plugin>
-
       <!-- Adds the path of the generated parser to the build path -->
       <plugin>
         <groupId>org.codehaus.mojo</groupId>