From 78fab67cddae6496f75c5b60e8947d95126b2332 Mon Sep 17 00:00:00 2001
From: Dave Birdsall <dbirdsall@apache.org>
Date: Mon, 26 Jun 2017 21:38:53 +0000
Subject: [PATCH 1/2] [TRAFODION-2661] Improve MDAM costing for RangeSpecs

---
 core/sql/optimizer/ScanOptimizer.cpp | 52 ++--------------------------
 1 file changed, 2 insertions(+), 50 deletions(-)

diff --git a/core/sql/optimizer/ScanOptimizer.cpp b/core/sql/optimizer/ScanOptimizer.cpp
index 8edc8b37c2..652891383c 100644
--- a/core/sql/optimizer/ScanOptimizer.cpp
+++ b/core/sql/optimizer/ScanOptimizer.cpp
@@ -9788,45 +9788,8 @@ void MDAMOptimalDisjunctPrefixWA::updateMinPrefix()
       if (CmpCommon::getDefault(SIMPLE_COST_MODEL) == DF_ON)
       {
 	DCMPASSERT(scmCost != NULL);
-        // When RangeSpec feture transforms old mdam disjuncts in to "Range in"
-        // types, we apply all disjuncts at one shot. The old way of taking Min(prefixCost) does
-        // not make sense and leads to gross underestimation of MDAM plan.
-        // For example, consider a predicate like this "A in (10, 20) and B in (100, 200)"
-        // without RangeSpec: we have 4 disjuncts Like this:
-        //     dis0 : "A = 10 and B = 100", cost C1 = MIN (CostA, CostB) for values (10,100)
-        //     dis1 : "A = 10 and B = 200", cost C2 = MIN (CostA, CostB) for values (10,200)
-        //     dis2 : "A = 20 and B = 100", cost C3 = MIN (CostA, CostB) for values (20,100)
-        //     dis3 : "A = 20 and B = 200", cost C4 = MIN (CostA, CostB) for values (20,200)
-        //     Total Mdam Cost = C1 + C2 + C3 + C4
-        // with RangeSpec: we have 1 disjunct like this:
-        //     dis0 : "A = 10 or A = 20" and "B = 100 or B = 200"
-        // we should not take MIN (CostA, CostB) for the second case, it should be the cost to apply 
-        // last key predicate. 
-        //     Total Mdam Cost = CostB
-        //
-        //  This is a heuristics in that we unconditionally include the last key column 
-        //  with IN list (OR preds) predicate without going through the cost comparison 
-        //  step.
-        //
-        // Updated comments: The commentary above is incorrect but I don't know quite
-        // what to do with it yet. MDAM at run time is a recursive algorithm. In the 
-        // example above, it will materialize values in the A column, and for each one,
-        // do a subset access on the second column. So the cost is a sum of the
-        // materialization cost on the first column and the subset access on the second.
-        // If there is a third key column C with no predicates on it, it would be 
-        // inefficient to go MDAM to the last column position; rather it would be better
-        // to use B as the stop column. That is, do subsets on each distinct value of (A,B),
-        // rather than do subsets on each distinct (A,B,C). The larger the UEC of C, the
-        // more gross the inefficiency. Unfortunately, the code below will cause us to 
-        // go MDAM to column C. In reference to the comments above, we need to devise
-        // a better way to estimate cost in the presence of RangeSpecs.
-        if ( (CmpCommon::getDefault(RANGESPEC_TRANSFORMATION) == DF_ON ) &&
-              optimizer_.getDisjuncts().containsOrPredsInRanges() &&
-              prefixColumnPosition_ == (lastColumnPosition_ - 1) 
-           ) {
-          newMinimumFound = TRUE;
-        } else
-          newMinimumFound = (pMinCost_ == NULL) ? TRUE :
+   
+        newMinimumFound = (pMinCost_ == NULL) ? TRUE :
             (scmCost->scmCompareCosts(*pMinCost_) == LESS);
       }
       else
@@ -9868,17 +9831,6 @@ void MDAMOptimalDisjunctPrefixWA::updateMinPrefix()
       optSeqKBRead_ = prefixKBRead_;
       optKeyPreds_.insert(prefixKeyPreds_); // is a copy more efficient?
 
-      // Note: Formerly there was code here that would set stopColumn_
-      // to the last column position if the CQD RANGESPEC_TRANSFORMATION
-      // was on. This is incorrect; it would cause us to use MDAM to 
-      // traverse through all columns always, even though it may be
-      // grossly inefficient to do so. (See commentary earlier in this
-      // method.) As it stands now, so long as there are no RangeSpec
-      // key predicates, this code will correctly pick the stop column.
-      // If there are RangeSpec predicates, code earlier in this method
-      // may cause us to only consider MDAM traversing on all columns.
-      // We can improve this later by improving how RangeSpec predicates
-      // are costed for MDAM.
       stopColumn_ = prefixColumnPosition_;
 
       prevColChosen_ = TRUE;

From e3c7edce74fc260e0ebc9f2d7cb6ad8c2d02ccee Mon Sep 17 00:00:00 2001
From: Dave Birdsall <dbirdsall@apache.org>
Date: Mon, 3 Jul 2017 21:21:13 +0000
Subject: [PATCH 2/2] Rework for [TRAFODION-2661]

This rework fixes a bug in the pre-code-gen time code. The bug was
that in the presence of partitioning key predicates, logic that
communicates stop column information to the generator was being
bypassed. This would result in traversal to the last key predicate
at run time, with poor performance if the stop column was prior to
the last key predicate.

Also included in this rework are some tweaks to the run-time debug
code for MDAM (the code had become non-functional from not being used
for a long time), and a fix to a run-time memory leak bug that shows
up as an assert when that debug code is present.
---
 core/sql/common/str.cpp           | 10 ++++++----
 core/sql/common/str.h             |  6 ++++--
 core/sql/executor/MdamPoint.cpp   |  2 +-
 core/sql/executor/MdamPoint.h     |  3 ---
 core/sql/executor/ex_mdam.cpp     |  2 ++
 core/sql/nskgmake/Makerules.linux |  3 +++
 core/sql/optimizer/mdam.cpp       | 22 +++++++++++++++-------
 7 files changed, 31 insertions(+), 17 deletions(-)

diff --git a/core/sql/common/str.cpp b/core/sql/common/str.cpp
index 4b301041d4..8e1195bf69 100644
--- a/core/sql/common/str.cpp
+++ b/core/sql/common/str.cpp
@@ -1620,9 +1620,11 @@ Int32 str_convertToHexAscii(const char * src,               // in
 //    char * dataPointer = getDataPointer();
 //    Lng32 len = tupp_.getAllocatedSize();
 //
-//    printBrief(dataPointer, len) 
+//    printBrief(dataPointer, len) if you want an end of line
+//
+//    printBrief(dataPointer, len, FALSE) if you don't 
 //    
-void printBrief(char* dataPointer, Lng32 len) 
+void printBrief(char* dataPointer, Lng32 len, NABoolean endLine) 
 {
    // We don't know what the data type is, but we do know how
    // long the field is. So we will guess the data type.
@@ -1771,7 +1773,7 @@ void printBrief(char* dataPointer, Lng32 len)
        }
      }    
    cout << local;
-   // cout << *(Lng32 *)dataPointer;
-   // End test change
+   if (endLine)
+     cout << endl;
  }
 
diff --git a/core/sql/common/str.h b/core/sql/common/str.h
index 12e039c0a5..eb60ae0d36 100644
--- a/core/sql/common/str.h
+++ b/core/sql/common/str.h
@@ -461,8 +461,10 @@ Int32 str_convertToHexAscii(const char * src,                   // in
 //    char * dataPointer = getDataPointer();
 //    Lng32 len = tupp_.getAllocatedSize();
 //
-//    printBrief(dataPointer, len) 
+//    printBrief(dataPointer, len) if you want an end of line
+//
+//    printBrief(dataPointer, len, FALSE) if you don't 
 //    
 NA_EIDPROC
-void printBrief(char* dataPointer, Lng32 keyLen); 
+void printBrief(char* dataPointer, Lng32 keyLen, NABoolean endLine = TRUE); 
 #endif // STR_H
diff --git a/core/sql/executor/MdamPoint.cpp b/core/sql/executor/MdamPoint.cpp
index 29a4867b6e..1ecc00c5ce 100644
--- a/core/sql/executor/MdamPoint.cpp
+++ b/core/sql/executor/MdamPoint.cpp
@@ -138,6 +138,6 @@ void MdamPoint::printBrief() const
   char * dataPointer = getDataPointer();
   Lng32 keyLen = tupp_.getAllocatedSize();
 
-  printBrief(dataPointer, keyLen);
+  ::printBrief(dataPointer, keyLen, FALSE /* no end of line wanted */);
 }
 #endif
diff --git a/core/sql/executor/MdamPoint.h b/core/sql/executor/MdamPoint.h
index 7eb47426fd..2dcdf0e3ce 100644
--- a/core/sql/executor/MdamPoint.h
+++ b/core/sql/executor/MdamPoint.h
@@ -103,9 +103,6 @@ class MdamPoint
   NA_EIDPROC void printBrief() const;
   #endif /* NA_MDAM_EXECUTOR_DEBUG */
   
-  #ifdef _DEBUG
-  NA_EIDPROC static void printBrief(char* ptr, Lng32 len) ;
-  #endif 
 private:
 
   // The point's value.
diff --git a/core/sql/executor/ex_mdam.cpp b/core/sql/executor/ex_mdam.cpp
index f238911395..9f6e35a4a5 100644
--- a/core/sql/executor/ex_mdam.cpp
+++ b/core/sql/executor/ex_mdam.cpp
@@ -762,6 +762,8 @@ while ((predIterator.positionToNextOr(&currentPred_)) &&
                                             columnGenInfo_->getLength(),
                                             mdamIntervalHeap,
                                             mdamRefListEntryHeap);
+                or_temp1.deleteAllIntervals(mdamIntervalHeap,
+                                            mdamRefListEntryHeap);
               }
           }
       }
diff --git a/core/sql/nskgmake/Makerules.linux b/core/sql/nskgmake/Makerules.linux
index 802f670126..9d90ebe1c5 100755
--- a/core/sql/nskgmake/Makerules.linux
+++ b/core/sql/nskgmake/Makerules.linux
@@ -216,6 +216,9 @@ ifeq ($(FLAVOR),debug)
    SYS_CXXFLAGS += -O0
    SYS_DEFS += -D_DEBUG
 
+   # uncomment the next line if you want the MDAM run-time debugging code
+   #SYS_CXXFLAGS += -DNA_MDAM_EXECUTOR_DEBUG
+
    # for coverage checking support
    ifeq ($(SQ_COVERAGE),1)
       SYS_CXXFLAGS += --coverage
diff --git a/core/sql/optimizer/mdam.cpp b/core/sql/optimizer/mdam.cpp
index 3a0a82cb8a..01d99f4fc0 100644
--- a/core/sql/optimizer/mdam.cpp
+++ b/core/sql/optimizer/mdam.cpp
@@ -1680,20 +1680,28 @@ void MdamKey::preCodeGen(ValueIdSet& executorPredicates,
   // might have wanted).  This code *does not* print any
   // *official* warnings (but it prints a warning to the console).
   // --------------------------------------------------------------------
-  // if the mdamkey has more than one disjunct and if one
-  // disjunct does not have key  predicates:
 
-  // Now, tell each element of the columnOrderListArray up to
-  // which column they contain valid key predicates:
+  // Note: The "for" loop below does two very important things:
+  // 1. Detects empty disjuncts (as discussed above) 
+  //
+  // and, while it is doing that,
+  //
+  // 2. Sets the stop column for each disjunct.
+  //
+  // Note that if we don't set the stop column, then MDAM will traverse
+  // *all* of the key columns, whether they have predicates or not, and
+  // performance will be terrible.
+
   NABoolean isEmpty = FALSE;
-  if (!partKeyPredsAdded)
+
     for (i=0; i < curDisjuncts->entries(); i++)
       {
         (*columnOrderListPtrArrayPtr_)[i]->setStopColumn(getStopColumn(i));
 
         // Find out if the disjunct has key preds:
         const ColumnOrderList& coList= *((*columnOrderListPtrArrayPtr_)[i]);
-        isEmpty = TRUE; // assume it is empty
+        if (!partKeyPredsAdded)
+          isEmpty = TRUE; // assume it is empty
         for (CollIndex j=0; j < coList.entries(); j++)
           {
             if (coList[j] AND (NOT coList[j]->isEmpty()))
@@ -1707,7 +1715,7 @@ void MdamKey::preCodeGen(ValueIdSet& executorPredicates,
             FSOWARNING("Empty disjunct");
             break;
           }
-      } // find out whether there is an empty disjunct
+      } // set stop columns and also find out whether there is an empty disjunct
 
   if (isEmpty)
     {