Skip to content

Commit

Permalink
Upgrade calcite to 1.37.0 (#16504)
Browse files Browse the repository at this point in the history
* contains Make a full copy of the parser and apply our modifications to it #16503
* some minor api changes pair/entry
* some unnecessary aggregation was removed from a set of queries in `CalciteSubqueryTest`
* `AliasedOperatorConversion` was detecting `CHAR_LENGTH` as not a function ; I've removed the check
  * the field it was using doesn't look maintained that much
  * the `kind` is passed for the created `SqlFunction` so I don't think this check is actually needed
* some decoupled test cases become broken - will be fixed later
* some aggregate related changes: due to the fact that SUM() and COUNT() of no inputs are different
* upgrade avatica to 1.25.0
* `CalciteQueryTest#testExactCountDistinctWithFilter` is now executable

Close #16503
  • Loading branch information
kgyrtkirk committed Jun 13, 2024
1 parent d5a25a9 commit ac19b14
Show file tree
Hide file tree
Showing 99 changed files with 10,532 additions and 1,287 deletions.
72 changes: 72 additions & 0 deletions dev/upgrade-calcite-parser
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#!/bin/bash

# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#--------------------------------------------------------------------

# Adopts base Calcite parser changes
#
# Establishes a git friendly merge situation:
#
# Creates a commit which matches the original state of the calcite parser;
# To this point creates to alternates:
# * one with local customizations
# * another with all the upstream updates
# merges the two branches to obtain the upgrade state
#

[ $# -ne 2 ] && echo -e "updates base parser sources.\n usage: $0 <old_calcite_version> <new_calcite_version>" && exit 1

CALCITE_OLD=$1
CALCITE_NEW=$2

set -e
set -x

BRANCH=`git name-rev --name-only HEAD`

REPO=.git/calcite-upgrade
rm -rf "$REPO"
git clone $PWD --reference $PWD --branch $BRANCH $REPO

cd "$REPO"
git checkout -b curr-changes

mvn -q generate-sources -pl sql -Dcalcite.version=$CALCITE_OLD -Pskip-static-checks
cp -r sql/target/calcite-base-parser/codegen/./ sql/src/main/codegen/./
git commit -m 'current reverse' -a
git revert --no-edit HEAD
# HEAD is now at the same as before; but parent are the base calcite changes

git branch base-changes curr-changes^
git checkout base-changes
git show|patch -p0 -R # undo temproarily to ensure maven runs

mvn -q generate-sources -pl sql -Dcalcite.version=$CALCITE_NEW -Pskip-static-checks
cp -r sql/target/calcite-base-parser/codegen/./ sql/src/main/codegen/./

git commit --allow-empty -m base-changes -a
git checkout -b new-state
git merge --no-edit curr-changes

echo ok
cd -

git remote remove calcite-upgrade &>/dev/null || echo -n
git remote add -f calcite-upgrade "$REPO"


echo "merge branch calcite-upgrade/curr-changes if satisfied with those changes"

Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
import com.google.common.base.Preconditions;
import org.apache.calcite.runtime.Hook;
import org.apache.calcite.sql.type.SqlTypeName;
import org.apache.calcite.util.Pair;
import org.apache.druid.common.guava.FutureUtils;
import org.apache.druid.error.DruidException;
import org.apache.druid.error.InvalidInput;
Expand Down Expand Up @@ -78,6 +77,7 @@
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Optional;
import java.util.stream.Collectors;

Expand All @@ -91,15 +91,15 @@ public class MSQTaskQueryMaker implements QueryMaker
private final OverlordClient overlordClient;
private final PlannerContext plannerContext;
private final ObjectMapper jsonMapper;
private final List<Pair<Integer, String>> fieldMapping;
private final List<Entry<Integer, String>> fieldMapping;


MSQTaskQueryMaker(
@Nullable final IngestDestination targetDataSource,
final OverlordClient overlordClient,
final PlannerContext plannerContext,
final ObjectMapper jsonMapper,
final List<Pair<Integer, String>> fieldMapping
final List<Entry<Integer, String>> fieldMapping
)
{
this.targetDataSource = targetDataSource;
Expand Down Expand Up @@ -193,7 +193,7 @@ public QueryResponse<Object[]> runQuery(final DruidQuery druidQuery)
final List<ColumnType> columnTypeList = new ArrayList<>();
final List<ColumnMapping> columnMappings = QueryUtils.buildColumnMappings(fieldMapping, druidQuery);

for (final Pair<Integer, String> entry : fieldMapping) {
for (final Entry<Integer, String> entry : fieldMapping) {
final String queryColumn = druidQuery.getOutputRowSignature().getColumnName(entry.getKey());

final SqlTypeName sqlTypeName;
Expand Down Expand Up @@ -238,7 +238,7 @@ public QueryResponse<Object[]> runQuery(final DruidQuery druidQuery)

MSQTaskQueryMakerUtils.validateSegmentSortOrder(
segmentSortOrder,
fieldMapping.stream().map(f -> f.right).collect(Collectors.toList())
fieldMapping.stream().map(f -> f.getValue()).collect(Collectors.toList())
);

final DataSourceMSQDestination dataSourceMSQDestination = new DataSourceMSQDestination(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@
import org.apache.calcite.schema.Table;
import org.apache.calcite.sql.dialect.CalciteSqlDialect;
import org.apache.calcite.sql.type.SqlTypeName;
import org.apache.calcite.util.Pair;
import org.apache.druid.error.DruidException;
import org.apache.druid.error.InvalidInput;
import org.apache.druid.error.InvalidSqlInput;
Expand Down Expand Up @@ -65,6 +64,7 @@
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;

public class MSQTaskSqlEngine implements SqlEngine
Expand Down Expand Up @@ -222,7 +222,7 @@ private static void validateSelect(final PlannerContext plannerContext)
*/
private static void validateInsert(
final RelNode rootRel,
final List<Pair<Integer, String>> fieldMappings,
final List<Entry<Integer, String>> fieldMappings,
@Nullable Table targetTable,
final PlannerContext plannerContext
)
Expand All @@ -241,13 +241,13 @@ private static void validateInsert(
* SQL allows multiple output columns with the same name. However, we don't allow this for INSERT or REPLACE
* queries, because we use these output names to generate columns in segments. They must be unique.
*/
private static void validateNoDuplicateAliases(final List<Pair<Integer, String>> fieldMappings)
private static void validateNoDuplicateAliases(final List<Entry<Integer, String>> fieldMappings)
{
final Set<String> aliasesSeen = new HashSet<>();

for (final Pair<Integer, String> field : fieldMappings) {
if (!aliasesSeen.add(field.right)) {
throw InvalidSqlInput.exception("Duplicate field in SELECT: [%s]", field.right);
for (final Entry<Integer, String> field : fieldMappings) {
if (!aliasesSeen.add(field.getValue())) {
throw InvalidSqlInput.exception("Duplicate field in SELECT: [%s]", field.getValue());
}
}
}
Expand Down Expand Up @@ -353,7 +353,7 @@ private static void validateLimitAndOffset(final RelNode rootRel, final boolean
*/
private static void validateTypeChanges(
final RelNode rootRel,
final List<Pair<Integer, String>> fieldMappings,
final List<Entry<Integer, String>> fieldMappings,
@Nullable final Table targetTable,
final PlannerContext plannerContext
)
Expand All @@ -366,9 +366,9 @@ private static void validateTypeChanges(
MultiStageQueryContext.getColumnsExcludedFromTypeVerification(plannerContext.queryContext());
final ArrayIngestMode arrayIngestMode = MultiStageQueryContext.getArrayIngestMode(plannerContext.queryContext());

for (Pair<Integer, String> fieldMapping : fieldMappings) {
final int columnIndex = fieldMapping.left;
final String columnName = fieldMapping.right;
for (Entry<Integer, String> fieldMapping : fieldMappings) {
final int columnIndex = fieldMapping.getKey();
final String columnName = fieldMapping.getValue();
final RelDataTypeField oldSqlTypeField =
targetTable.getRowType(DruidTypeSystem.TYPE_FACTORY).getField(columnName, true, false);

Expand Down Expand Up @@ -427,11 +427,11 @@ private static void validateTypeChanges(
*
* Returns -1 if the list does not contain a time column.
*/
private static int getTimeColumnIndex(final List<Pair<Integer, String>> fieldMappings)
private static int getTimeColumnIndex(final List<Entry<Integer, String>> fieldMappings)
{
for (final Pair<Integer, String> field : fieldMappings) {
if (field.right.equals(ColumnHolder.TIME_COLUMN_NAME)) {
return field.left;
for (final Entry<Integer, String> field : fieldMappings) {
if (field.getValue().equals(ColumnHolder.TIME_COLUMN_NAME)) {
return field.getKey();
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,14 @@ public void testExactCountDistinctWithFilter()

}

@Disabled
@Override
@Test
public void testExactCountDistinctWithFilter2()
{

}

@Disabled
@Override
@Test
Expand Down
4 changes: 2 additions & 2 deletions licenses.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1616,7 +1616,7 @@ name: Apache Calcite
license_category: binary
module: java-core
license_name: Apache License version 2.0
version: 1.35.0
version: 1.37.0
libraries:
- org.apache.calcite: calcite-core
- org.apache.calcite: calcite-linq4j
Expand All @@ -1634,7 +1634,7 @@ name: Apache Calcite Avatica
license_category: binary
module: java-core
license_name: Apache License version 2.0
version: 1.23.0
version: 1.25.0
libraries:
- org.apache.calcite.avatica: avatica-core
- org.apache.calcite.avatica: avatica-metrics
Expand Down
10 changes: 5 additions & 5 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -82,13 +82,12 @@
<apache.ranger.version>2.4.0</apache.ranger.version>
<gson.version>2.10.1</gson.version>
<scala.library.version>2.13.14</scala.library.version>
<avatica.version>1.23.0</avatica.version>
<avatica.version>1.25.0</avatica.version>
<avro.version>1.11.3</avro.version>
<!-- When updating Calcite, also propagate updates to these files which we've copied and modified:
default_config.fmpp
sql/src/main/codegen/includes/*
<!--
The base calcite parser was copied into the project; when updating Calcite run dev/upgrade-calcite-parser to adopt upstream changes
-->
<calcite.version>1.35.0</calcite.version>
<calcite.version>1.37.0</calcite.version>
<confluent.version>6.2.12</confluent.version>
<datasketches.version>4.2.0</datasketches.version>
<datasketches.memory.version>2.2.0</datasketches.memory.version>
Expand Down Expand Up @@ -2189,6 +2188,7 @@
<exclude>**/.classpath</exclude> <!-- Eclipse -->
<exclude>**/.project</exclude> <!-- Eclipse -->
<exclude>**/*.iq</exclude> <!-- quidem testfiles -->
<exclude>**/*.iq.out</exclude> <!-- quidem testfiles -->
</excludes>
</configuration>
</plugin>
Expand Down
3 changes: 3 additions & 0 deletions processing/src/test/resources/log4j2.xml
Original file line number Diff line number Diff line change
Expand Up @@ -31,5 +31,8 @@
<Logger level="info" name="org.apache.druid" additivity="false">
<AppenderRef ref="Console"/>
</Logger>
<Logger level="debug" name="org.apache.calcite.plan.AbstractRelOptPlanner.rule_execution_summary" additivity="false">
<AppenderRef ref="Console"/>
</Logger>
</Loggers>
</Configuration>
80 changes: 7 additions & 73 deletions sql/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -333,8 +333,8 @@
</executions>
</plugin>

<!-- Following plugins and their configurations are used to generate the custom Calcite's SQL parser for Druid -->
<!-- Extracts the Parser.jj from Calcite to ${project.build.directory}, where all the Freemarker templates are -->
<!-- Extracts the base parser from the calcite-core artifcat. -->
<!-- This is provided to have the base available - to compare changes to -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
Expand All @@ -353,50 +353,16 @@
<version>${calcite.version}</version>
<type>jar</type>
<overWrite>true</overWrite>
<outputDirectory>${project.build.directory}/</outputDirectory>
<includes>**/Parser.jj</includes>
</artifactItem>
<artifactItem>
<groupId>org.apache.calcite</groupId>
<artifactId>calcite-core</artifactId>
<version>${calcite.version}</version>
<type>jar</type>
<overWrite>true</overWrite>
<outputDirectory>${project.build.directory}/</outputDirectory>
<includes>**/default_config.fmpp</includes>
<outputDirectory>${project.build.directory}/calcite-base-parser</outputDirectory>
<includes>codegen/**</includes>
</artifactItem>
</artifactItems>
</configuration>
</execution>
</executions>
</plugin>

<!-- Copy the templates present in the codegen directory of druid-sql containing custom SQL rules to
${project.build.directory}/codegen -->
<plugin>
<artifactId>maven-resources-plugin</artifactId>
<executions>
<execution>
<id>copy-fmpp-resources</id>
<phase>generate-sources</phase>
<goals>
<goal>copy-resources</goal>
</goals>
<configuration>
<outputDirectory>${project.build.directory}/codegen</outputDirectory>
<resources>
<resource>
<directory>src/main/codegen</directory>
<filtering>false</filtering>
</resource>
</resources>
</configuration>
</execution>
</executions>
</plugin>

<!-- "Plugs in" the Calcite's Parser.jj with the variables present in config.fmpp. These contain the custom rules
as well as the class to which the custom implementation will get generated -->
<!-- Applies the fmpp templates to produce the final Parser.jj -->
<plugin>
<groupId>com.googlecode.fmpp-maven-plugin</groupId>
<artifactId>fmpp-maven-plugin</artifactId>
Expand All @@ -408,9 +374,9 @@
<goal>generate</goal>
</goals>
<configuration>
<cfgFile>${project.build.directory}/codegen/config.fmpp</cfgFile>
<cfgFile>src/main/codegen/config.fmpp</cfgFile>
<outputDirectory>${project.build.directory}/generated-sources</outputDirectory>
<templateDirectory>${project.build.directory}/codegen/templates</templateDirectory>
<templateDirectory>src/main/codegen/templates</templateDirectory>
</configuration>
</execution>
</executions>
Expand Down Expand Up @@ -440,38 +406,6 @@
</executions>
</plugin>

<!-- This plugin is used to replace the production rule for FROM clause in the calcite grammar.
It is done by a search and replace since calcite doesn't allow to override production rules generally
in its grammar (override is possible only if there's a hook for it in the grammar). And the FROM clause
doesn't contain any hook for extension. For the custom changes done in
extension, please check from.ftl file in sql module.
-->
<plugin>
<groupId>com.google.code.maven-replacer-plugin</groupId>
<artifactId>replacer</artifactId>
<version>1.5.3</version>
<executions>
<execution>
<phase>generate-sources</phase>
<goals>
<goal>replace</goal>
</goals>
</execution>
</executions>
<configuration>
<basedir>${project.build.directory}/generated-sources/org/apache/druid/sql/calcite/parser</basedir>
<includes>
<include>**/DruidSqlParserImpl.java</include>
</includes>
<replacements>
<replacement>
<token>fromClause = FromClause</token>
<value>fromClause = DruidFromClause</value>
</replacement>
</replacements>
</configuration>
</plugin>

<!-- Adds the path of the generated parser to the build path -->
<plugin>
<groupId>org.codehaus.mojo</groupId>
Expand Down

0 comments on commit ac19b14

Please sign in to comment.