hazelcast · Fly-Style · Aug 17, 2023 · Aug 18, 2023 · Aug 18, 2023 · Aug 18, 2023
diff --git a/hazelcast-sql/src/main/java/com/hazelcast/jet/sql/impl/CalciteSqlOptimizer.java b/hazelcast-sql/src/main/java/com/hazelcast/jet/sql/impl/CalciteSqlOptimizer.java
@@ -312,6 +312,7 @@ public SqlPlan prepare(OptimizationTask task) {
         int memberCount = nodeEngine.getClusterService().getSize(MemberSelectors.DATA_MEMBER_SELECTOR);
 
         OptimizerContext context = OptimizerContext.create(
+                nodeEngine.getHazelcastInstance(),
                 task.getSchema(),
                 task.getSearchPaths(),
                 task.getArguments(),
@@ -804,8 +805,7 @@ private PhysicalRel optimize(
             logger.fine("After physical opt:\n" + RelOptUtil.toString(physicalRel));
         }
 
-        PhysicalRel finalPhysicalRel = physicalRel;
-        queryPlanListeners.forEach(l -> l.onQueryPlanBuilt(finalPhysicalRel));
+        // TODO[sasha]: capture final physical rel for listeners here.
         return physicalRel;
     }
 

diff --git a/hazelcast-sql/src/main/java/com/hazelcast/jet/sql/impl/OptimizerContext.java b/hazelcast-sql/src/main/java/com/hazelcast/jet/sql/impl/OptimizerContext.java
@@ -17,6 +17,7 @@
 package com.hazelcast.jet.sql.impl;
 
 import com.google.common.collect.ImmutableList;
+import com.hazelcast.core.HazelcastInstance;
 import com.hazelcast.jet.sql.impl.opt.cost.CostFactory;
 import com.hazelcast.jet.sql.impl.opt.metadata.HazelcastRelMdBoundedness;
 import com.hazelcast.jet.sql.impl.opt.metadata.HazelcastRelMdPrunability;
@@ -107,14 +108,15 @@ private OptimizerContext(
      * @return Context.
      */
     public static OptimizerContext create(
+            HazelcastInstance hz,
             SqlCatalog schema,
             List<List<String>> searchPaths,
             List<Object> arguments,
             int memberCount,
             IMapResolver iMapResolver
     ) {
         // Resolve tables.
-        HazelcastSchema rootSchema = HazelcastSchemaUtils.createRootSchema(schema);
+        HazelcastSchema rootSchema = HazelcastSchemaUtils.createRootSchema(hz, schema);
 
         return create(rootSchema, searchPaths, arguments, memberCount, iMapResolver);
     }

diff --git a/hazelcast-sql/src/main/java/com/hazelcast/jet/sql/impl/opt/cost/Cost.java b/hazelcast-sql/src/main/java/com/hazelcast/jet/sql/impl/opt/cost/Cost.java
@@ -40,6 +40,21 @@ public class Cost implements RelOptCost {
     public static final Cost HUGE = new Cost(Double.MAX_VALUE / 100, Double.MAX_VALUE / 100, Double.MAX_VALUE / 100);
     public static final Cost INFINITY = new Cost(Double.MAX_VALUE, Double.POSITIVE_INFINITY, Double.POSITIVE_INFINITY);
 
+    /**
+     * Multiplier to display hash table building actions:
+     * - row hash computation;          (estimate - 3 ops, the process itself is heavier from CPU ops POV)
+     * - probe hash table;              (estimate - 1 op)
+     * - walk through hash chain        (estimate - 1 op, assuming hash collision may happen)
+     * - and compare with each element; (estimate - 1 op, assuming hash collision may happen)
+     * - add the k-v to the table.      (estimate - 1 op).
+     */
+    public static final double HASH_JOIN_MULTIPLIER = 7;
+
+    /**
+     * Multiplier to display row comparison
+     */
+    public static final double JOIN_ROW_CMP_MULTIPLIER = 2;
+
     private final double rows;
     private final double cpu;
     private final double network;

diff --git a/hazelcast-sql/src/main/java/com/hazelcast/jet/sql/impl/opt/physical/JoinHashPhysicalRel.java b/hazelcast-sql/src/main/java/com/hazelcast/jet/sql/impl/opt/physical/JoinHashPhysicalRel.java
@@ -16,6 +16,7 @@
 
 package com.hazelcast.jet.sql.impl.opt.physical;
 
+import com.hazelcast.jet.sql.impl.opt.cost.Cost;
 import org.apache.calcite.plan.RelOptCluster;
 import org.apache.calcite.plan.RelOptCost;
 import org.apache.calcite.plan.RelOptPlanner;
@@ -28,7 +29,6 @@
 import org.checkerframework.checker.nullness.qual.Nullable;
 
 public class JoinHashPhysicalRel extends JoinPhysicalRel {
-    private static final double COST_FACTOR = 1.1;
 
     JoinHashPhysicalRel(
             RelOptCluster cluster,
@@ -58,9 +58,27 @@ public Join copy(
         return new JoinHashPhysicalRel(getCluster(), traitSet, left, right, conditionExpr, joinType);
     }
 
+    /**
+     * Cost calculation of Hash Join relation. It does not rely on children cost.
+     * <p>
+     * Hash Join algorithm is a more advanced join algorithm, which builds a hash table for the left
+     * row set, and then compare each row from the right side. Cost estimation is the following: <ol>
+     * <li> Produced row count is L * R * (join selectivity).
+     * <li> Processed row count is L + R because we traverse both sides once per join.
+     * <li> CPU is L * (hash table build cost) + R * (row comparison cost). </ol>
+     * <p>
+     * A perfect estimation must also include memory (occupied by the hash table) and IO costs.
+     */
     @Override
     @Nullable
     public RelOptCost computeSelfCost(RelOptPlanner planner, RelMetadataQuery mq) {
-        return super.computeSelfCost(planner, mq).multiplyBy(COST_FACTOR);
+        double leftRowCount = mq.getRowCount(getLeft());
+        double rightRowCount = mq.getRowCount(getRight());
+
+        double producedRowCount = mq.getRowCount(this);
+        double cpu = leftRowCount * Cost.HASH_JOIN_MULTIPLIER
+                + rightRowCount * Cost.JOIN_ROW_CMP_MULTIPLIER;
+
+        return planner.getCostFactory().makeCost(producedRowCount, cpu, 0.);
     }
 }
diff --git a/...-sql/src/main/java/com/hazelcast/jet/sql/impl/opt/physical/JoinNestedLoopPhysicalRel.java b/...-sql/src/main/java/com/hazelcast/jet/sql/impl/opt/physical/JoinNestedLoopPhysicalRel.java
@@ -19,19 +19,24 @@
 import com.google.common.collect.ImmutableList;
 import com.hazelcast.jet.sql.impl.HazelcastPhysicalScan;
 import com.hazelcast.jet.sql.impl.opt.OptUtils;
+import com.hazelcast.jet.sql.impl.opt.cost.Cost;
 import com.hazelcast.jet.sql.impl.schema.HazelcastTable;
 import com.hazelcast.jet.sql.impl.validate.HazelcastSqlOperatorTable;
 import org.apache.calcite.plan.RelOptCluster;
+import org.apache.calcite.plan.RelOptCost;
+import org.apache.calcite.plan.RelOptPlanner;
 import org.apache.calcite.plan.RelTraitSet;
 import org.apache.calcite.rel.RelNode;
 import org.apache.calcite.rel.core.Join;
 import org.apache.calcite.rel.core.JoinInfo;
 import org.apache.calcite.rel.core.JoinRelType;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
 import org.apache.calcite.rex.RexBuilder;
 import org.apache.calcite.rex.RexInputRef;
 import org.apache.calcite.rex.RexNode;
 import org.apache.calcite.rex.RexShuttle;
 import org.apache.calcite.util.ImmutableIntList;
+import org.checkerframework.checker.nullness.qual.Nullable;
 
 import java.util.ArrayList;
 import java.util.List;
@@ -109,6 +114,46 @@ public <V> V accept(CreateDagVisitor<V> visitor) {
         return visitor.onNestedLoopJoin(this);
     }
 
+    /**
+     * Cost calculation of Nested Loop Join relation.
+     * <p>
+     * Nested Loop Join algorithm is a simple join algorithm, where for each left row,
+     * we are traversing the whole right row set. Cost estimation is the following: <ol>
+     * <li> Produced row count is L * R * (join selectivity).
+     * <li> Processed row count is L * k * R, where k is 1 for non-equi-join,
+     *      (join selectivity) ≤ k ≤ 1 for equi-join and 1/R for key lookup.
+     * <li> CPU is L * (join selectivity) * R * (row comparison cost) assuming k
+     *      converges to the join selectivity on average. </ol>
+     * <p>
+     * A perfect estimation must also include memory and IO costs.
+     */
+    @Override
+    @Nullable
+    public RelOptCost computeSelfCost(RelOptPlanner planner, RelMetadataQuery mq) {
+        final double leftRowCount = mq.getRowCount(left);
+        final double rightRowCount = mq.getRowCount(right);
+        if (Double.isInfinite(leftRowCount) || Double.isInfinite(rightRowCount)) {
+            return planner.getCostFactory().makeInfiniteCost();
+        }
+
+        RelOptCost rightCost = planner.getCost(getRight(), mq);
+        if (rightCost == null) {
+            return planner.getCostFactory().makeInfiniteCost();
+        }
+
+        Double selectivity = mq.getSelectivity(this, condition);
+        if (selectivity == null) {
+            selectivity = 1.;
+        }
+
+        // TODO: introduce selectivity estimator, but ATM we taking the worst case scenario : selectivity = 1.0.
+        double producedRows = mq.getRowCount(this);
+        double processedRowsEstimate = leftRowCount * selectivity * rightRowCount;
+        double cpuEstimate = Math.max(1.0, processedRowsEstimate - 1) * Cost.JOIN_ROW_CMP_MULTIPLIER;
+
+        return planner.getCostFactory().makeCost(producedRows, cpuEstimate, 0);
+    }
+
     @Override
     public Join copy(
             RelTraitSet traitSet,

diff --git a/hazelcast-sql/src/main/java/com/hazelcast/jet/sql/impl/schema/HazelcastSchemaUtils.java b/hazelcast-sql/src/main/java/com/hazelcast/jet/sql/impl/schema/HazelcastSchemaUtils.java
@@ -16,9 +16,11 @@
 
 package com.hazelcast.jet.sql.impl.schema;
 
+import com.hazelcast.core.HazelcastInstance;
 import com.hazelcast.sql.impl.QueryUtils;
 import com.hazelcast.sql.impl.schema.SqlCatalog;
 import com.hazelcast.sql.impl.schema.Table;
+import com.hazelcast.sql.impl.schema.map.PartitionedMapTable;
 import org.apache.calcite.schema.Schema;
 import org.apache.calcite.schema.Statistic;
 
@@ -56,7 +58,7 @@ public static HazelcastSchema createCatalog(Schema schema) {
      *
      * @return Top-level schema.
      */
-    public static HazelcastSchema createRootSchema(SqlCatalog catalog) {
+    public static HazelcastSchema createRootSchema(HazelcastInstance hz, SqlCatalog catalog) {
         // Create schemas.
         Map<String, Schema> schemaMap = new HashMap<>();
 
@@ -68,11 +70,9 @@ public static HazelcastSchema createRootSchema(SqlCatalog catalog) {
             for (Map.Entry<String, Table> tableEntry : currentSchemaEntry.getValue().entrySet()) {
                 String tableName = tableEntry.getKey();
                 Table table = tableEntry.getValue();
-
-                HazelcastTable convertedTable = new HazelcastTable(
-                        table,
-                        createTableStatistic(table)
-                );
+                HazelcastTable convertedTable = table instanceof PartitionedMapTable
+                        ? new HazelcastTable(table, hz)
+                        : new HazelcastTable(table, createTableStatistic(table));
 
                 schemaTables.put(tableName, convertedTable);
             }

diff --git a/hazelcast-sql/src/main/java/com/hazelcast/jet/sql/impl/schema/HazelcastTable.java b/hazelcast-sql/src/main/java/com/hazelcast/jet/sql/impl/schema/HazelcastTable.java
@@ -16,6 +16,7 @@
 
 package com.hazelcast.jet.sql.impl.schema;
 
+import com.hazelcast.core.HazelcastInstance;
 import com.hazelcast.jet.sql.impl.opt.OptUtils;
 import com.hazelcast.jet.sql.impl.opt.common.CalcIntoScanRule;
 import com.hazelcast.jet.sql.impl.opt.cost.CostUtils;
@@ -46,6 +47,7 @@
 import java.util.Objects;
 import java.util.Set;
 import java.util.StringJoiner;
+import java.util.function.Supplier;
 
 import static java.util.stream.Collectors.joining;
 
@@ -83,30 +85,33 @@
  * properties, thus making further optimization more complex.
  */
 public class HazelcastTable extends AbstractTable {
-
     private final Table target;
-    private final Statistic statistic;
+    private final Supplier<Statistic> statisticSupplier;
     private final RexNode filter;
     private List<RexNode> projects;
 
     private RelDataType rowType;
     private final Set<String> hiddenFieldNames = new HashSet<>();
 
+    public HazelcastTable(Table target, HazelcastInstance instance) {
+        this(target, createTableStatistic(target, instance));
+    }
+
     public HazelcastTable(Table target, Statistic statistic) {
         this.target = target;
-        this.statistic = statistic;
+        this.statisticSupplier = () -> statistic;
         this.filter = null;
     }
 
     private HazelcastTable(
             Table target,
-            Statistic statistic,
-            @Nonnull List<RexNode> projects,
+            Supplier<Statistic> statisticSupplier,
+            List<RexNode> projects,
             @Nullable RelDataType rowType,
             @Nullable RexNode filter
     ) {
         this.target = target;
-        this.statistic = statistic;
+        this.statisticSupplier = statisticSupplier;
         this.projects = projects;
         this.rowType = rowType == null ? computeRowType(projects) : rowType;
         this.filter = filter;
@@ -127,11 +132,11 @@ private void initRowType() {
     }
 
     public HazelcastTable withProject(List<RexNode> projects, @Nullable RelDataType rowType) {
-        return new HazelcastTable(target, statistic, projects, rowType, filter);
+        return new HazelcastTable(target, statisticSupplier, projects, rowType, filter);
     }
 
     public HazelcastTable withFilter(RexNode filter) {
-        return new HazelcastTable(target, statistic, projects, rowType, filter);
+        return new HazelcastTable(target, statisticSupplier, projects, rowType, filter);
     }
 
     @Nonnull
@@ -158,6 +163,7 @@ public RelDataType getRowType(RelDataTypeFactory typeFactory) {
 
     @Override
     public Statistic getStatistic() {
+        Statistic statistic = statisticSupplier.get();
         if (filter == null) {
             return statistic;
         } else {
@@ -167,8 +173,9 @@ public Statistic getStatistic() {
         }
     }
 
+    @SuppressWarnings("DataFlowIssue")
     public double getTotalRowCount() {
-        return statistic.getRowCount();
+        return statisticSupplier.get().getRowCount();
     }
 
     public boolean isHidden(String fieldName) {
@@ -212,12 +219,17 @@ private RelDataType computeRowType(List<RexNode> projects) {
         return new RelRecordType(StructKind.PEEK_FIELDS, typeFields, false);
     }
 
+    private static Statistic createTableStatistic(Table table, HazelcastInstance instance) {
+        return new HazelcastTableStatistic(instance.getMap(table.getSqlName()).size());
+    }
+
     /**
      * Statistics that takes into account the row count after the filter is applied.
      */
     private final class AdjustedStatistic implements Statistic {
 
         private final Double rowCount;
+        private final Statistic statistic = statisticSupplier.get();
 
         private AdjustedStatistic(Double rowCount) {
             this.rowCount = rowCount;

diff --git a/hazelcast-sql/src/test/java/com/hazelcast/jet/sql/impl/opt/OptimizerTestSupport.java b/hazelcast-sql/src/test/java/com/hazelcast/jet/sql/impl/opt/OptimizerTestSupport.java
@@ -314,12 +314,7 @@ protected static PlanRow parse(String input) {
 
         @Override
         public String toString() {
-            StringBuilder builder = new StringBuilder();
-            for (int i = 0; i < level; i++) {
-                builder.append("  ");
-            }
-            builder.append(node);
-            return builder.toString();
+            return "  ".repeat(Math.max(0, level)) + node;
         }
 
         @Override

diff --git a/hazelcast-sql/src/test/java/com/hazelcast/jet/sql/impl/parse/ParserNameResolutionTest.java b/hazelcast-sql/src/test/java/com/hazelcast/jet/sql/impl/parse/ParserNameResolutionTest.java
@@ -224,6 +224,7 @@ private static OptimizerContext createContext() {
         List<List<String>> searchPaths = QueryUtils.prepareSearchPaths(emptyList(), tableResolvers);
 
         return OptimizerContext.create(
+                instance(),
                 new SqlCatalog(tableResolvers),
                 searchPaths,
                 emptyList(),

diff --git a/hazelcast-sql/src/test/java/com/hazelcast/jet/sql/impl/parse/ParserOperationsTest.java b/hazelcast-sql/src/test/java/com/hazelcast/jet/sql/impl/parse/ParserOperationsTest.java
@@ -206,6 +206,7 @@ private static OptimizerContext createContext() {
         List<List<String>> searchPaths = QueryUtils.prepareSearchPaths(emptyList(), tableResolvers);
 
         return OptimizerContext.create(
+                instance(),
                 new SqlCatalog(tableResolvers),
                 searchPaths,
                 emptyList(),

diff --git a/hazelcast-sql/src/test/java/com/hazelcast/jet/sql/impl/parse/UnparseTest.java b/hazelcast-sql/src/test/java/com/hazelcast/jet/sql/impl/parse/UnparseTest.java
@@ -91,6 +91,7 @@ private void checkQuery(String query) {
 
     private static OptimizerContext createContext() {
         return OptimizerContext.create(
+                instance(),
                 new SqlCatalog(emptyList()),
                 emptyList(),
                 emptyList(),