ad-freiburg · joka921 · May 6, 2021 · May 8, 2020 · May 11, 2020 · May 19, 2020
diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml
@@ -18,23 +18,26 @@ jobs:
     # See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix
     runs-on: ubuntu-latest
     strategy:
+      fail-fast: false
       matrix:
-        compiler: [g++, clang++]
+        compiler: [g++-10, clang++-11]
         os: [ ubuntu-latest ]
+        warnings: [ "", -Werror ]
 
 
     steps:
     - uses: actions/checkout@v2
     - name: Install dependencies
-      run:  sudo apt-get update && sudo apt-get install -y libsparsehash-dev libicu-dev tzdata clang
+      run:  sudo apt-get update && sudo apt-get install -y libsparsehash-dev libicu-dev tzdata clang gcc-10 clang-11
+
     - name: Checkout submodules
       run: git submodule update --init --recursive
 
 
     - name: Configure CMake
       # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make.
       # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type
-      run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_CXX_COMPILER=${{matrix.compiler}} -DADDITIONAL_COMPILER_FLAGS=-Werror
+      run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_CXX_COMPILER=${{matrix.compiler}} -DADDITIONAL_COMPILER_FLAGS=${{matrix.warnings}}
 
     - name: Build
       # Build your program with the given configuration

diff --git a/e2e/scientists_queries.yaml b/e2e/scientists_queries.yaml
@@ -381,6 +381,20 @@ queries:
       - contains_row: ["<Albert_Einstein>", "<Nobel_Prize_in_Physics>"]
       - contains_row: ["<Albert_Fert>", "<Wolf_Prize_in_Physics>"]
       - contains_row: ["<Albert_Overhauser>", "<National_Medal_of_Science_for_Physical_Science>"]
+  - query : having-height
+    type: no-text
+    sparql: |
+      SELECT (COUNT(?profession) as ?count) ?height WHERE {
+        ?x <Profession> ?profession .
+        ?x <Height> ?height
+      }
+      GROUP BY ?height
+      HAVING (?height > 1.7)
+    checks:
+      - num_rows: 32
+      - num_cols: 2
+      - selected: ["?count", "?height"]
+      - contains_row: ["5", "1.803"]
   - query : having-predicate-religion
     type: no-text
     sparql: |
@@ -720,20 +734,6 @@ queries:
       - num_rows: 1
       - selected: ["?a", "?b"]
       - contains_row: ["<Charles,_Prince_of_Wales>", "<Lord_of_the_Isles>"]
-  - query : having-height
-    type: no-text
-    sparql: |
-      SELECT (COUNT(?profession) as ?count) ?height WHERE {
-        ?x <Profession> ?profession .
-        ?x <Height> ?height
-      }
-      GROUP BY ?height
-      HAVING (?height > 1.7)
-    checks:
-      - num_rows: 32
-      - num_cols: 2
-      - selected: ["?count", "?height"]
-      - contains_row: ["5", "1.803"]
   - query : filter-depending-on-last-optional
     type: no-text
     sparql: |
@@ -764,6 +764,21 @@ queries:
       - contains_row: ["<Albert_Einstein>", "<Nobel_Prize_in_Physics>"]
       - contains_row: ["<Al_Gore>", "<Nobel_Peace_Prize>"]
       - contains_row: ["<Dennis_Gabor>", "<Nobel_Prize_in_Physics>"]
+  - query : minus-biologists 
+    type: no-text
+    sparql: |
+      SELECT ?a WHERE {
+        ?a <is-a> <Entrepreneur> .
+        MINUS {
+          ?a <is-a> <Engineer>
+        }
+      }
+    checks:
+      - num_rows: 152 
+      - num_cols: 1
+      - selected: ["?a"]
+      - contains_row: ["<Barney_Pell>"]
+      - contains_row: ["<Duc_Pham>"]
   - query : prefix-filter-disjunction
     type: no-text
     sparql: |

diff --git a/src/engine/CMakeLists.txt b/src/engine/CMakeLists.txt
@@ -32,6 +32,8 @@ add_library(engine
         Values.cpp Values.h
         Bind.cpp Bind.h
         IdTable.h
-        ../util/Random.h)
+        ../util/Random.h
+        Minus.h Minus.cpp
+        )
 
 target_link_libraries(engine index parser SortPerformanceEstimator absl::flat_hash_set)
diff --git a/src/engine/Minus.cpp b/src/engine/Minus.cpp
@@ -0,0 +1,230 @@
+// Copyright 2018, University of Freiburg,
+// Chair of Algorithms and Data Structures.
+// Author: Florian Kramer (florian.kramer@netpun.uni-freiburg.de)
+
+#include "Minus.h"
+
+#include "../util/Exception.h"
+#include "CallFixedSize.h"
+
+using std::string;
+
+// _____________________________________________________________________________
+Minus::Minus(QueryExecutionContext* qec,
+             std::shared_ptr<QueryExecutionTree> left,
+             std::shared_ptr<QueryExecutionTree> right,
+             std::vector<array<size_t, 2>> matchedColumns)
+    : Operation{qec},
+      _left{std::move(left)},
+      _right{std::move(right)},
+      _matchedColumns{std::move(matchedColumns)} {
+  // Check that the invariant (inputs are sorted on the matched columns) holds.
+  auto l = _left->resultSortedOn();
+  auto r = _right->resultSortedOn();
+  AD_CHECK(_matchedColumns.size() <= l.size());
+  AD_CHECK(_matchedColumns.size() <= r.size());
+  for (size_t i = 0; i < _matchedColumns.size(); ++i) {
+    AD_CHECK(_matchedColumns[i][0] == l[i]);
+    AD_CHECK(_matchedColumns[i][1] == r[i]);
+  }
+}
+
+// _____________________________________________________________________________
+string Minus::asString(size_t indent) const {
+  std::ostringstream os;
+  for (size_t i = 0; i < indent; ++i) {
+    os << " ";
+  }
+  os << "MINUS\n" << _left->asString(indent) << "\n";
+  os << _right->asString(indent) << " ";
+  return os.str();
+}
+
+// _____________________________________________________________________________
+string Minus::getDescriptor() const { return "Minus"; }
+
+// _____________________________________________________________________________
+void Minus::computeResult(ResultTable* result) {
+  AD_CHECK(result);
+  LOG(DEBUG) << "Minus result computation..." << endl;
+
+  RuntimeInformation& runtimeInfo = getRuntimeInfo();
+  result->_sortedBy = resultSortedOn();
+  result->_data.setCols(getResultWidth());
+
+  const auto leftResult = _left->getResult();
+  const auto rightResult = _right->getResult();
+
+  runtimeInfo.addChild(_left->getRootOperation()->getRuntimeInfo());
+  runtimeInfo.addChild(_right->getRootOperation()->getRuntimeInfo());
+
+  LOG(DEBUG) << "Minus subresult computation done." << std::endl;
+
+  // We have the same output columns as the left input, so we also
+  // have the same output column types.
+  result->_resultTypes = leftResult->_resultTypes;
+
+  LOG(DEBUG) << "Computing minus of results of size " << leftResult->size()
+             << " and " << rightResult->size() << endl;
+
+  int leftWidth = leftResult->_data.cols();
+  int rightWidth = rightResult->_data.cols();
+  CALL_FIXED_SIZE_2(leftWidth, rightWidth, computeMinus, leftResult->_data,
+                    rightResult->_data, _matchedColumns, &result->_data);
+  LOG(DEBUG) << "Minus result computation done." << endl;
+}
+
+// _____________________________________________________________________________
+ad_utility::HashMap<string, size_t> Minus::getVariableColumns() const {
+  return _left->getVariableColumns();
+}
+
+// _____________________________________________________________________________
+size_t Minus::getResultWidth() const { return _left->getResultWidth(); }
+
+// _____________________________________________________________________________
+vector<size_t> Minus::resultSortedOn() const { return _left->resultSortedOn(); }
+
+// _____________________________________________________________________________
+float Minus::getMultiplicity(size_t col) {
+  // This is an upper bound on the multiplicity as an arbitrary number
+  // of rows might be deleted in this operation.
+  return _left->getMultiplicity(col);
+}
+
+// _____________________________________________________________________________
+size_t Minus::getSizeEstimate() {
+  // This is an upper bound on the size as an arbitrary number
+  // of rows might be deleted in this operation.
+  return _left->getSizeEstimate();
+}
+
+// _____________________________________________________________________________
+size_t Minus::getCostEstimate() {
+  size_t costEstimate = _left->getSizeEstimate() + _right->getSizeEstimate();
+  return _left->getCostEstimate() + _right->getCostEstimate() + costEstimate;
+}
+
+// _____________________________________________________________________________
+template <int A_WIDTH, int B_WIDTH>
+void Minus::computeMinus(const IdTable& dynA, const IdTable& dynB,
+                         const vector<array<Id, 2>>& joinColumns,
+                         IdTable* dynResult) const {
+  // Substract dynB from dynA. The result should be all result mappings mu
+  // for which all result mappings mu' in dynB are not compatible (one value
+  // for a variable defined in both differs) or the domain of mu and mu' are
+  // disjoint (mu' defines no solution for any variables for which mu defines a
+  // solution).
+
+  // The output is always the same size as the left input
+  constexpr int OUT_WIDTH = A_WIDTH;
+
+  // check for trivial cases
+  if (dynA.size() == 0) {
+    return;
+  }
+
+  if (dynB.size() == 0 || joinColumns.empty()) {
+    // B is the empty set of solution mappings, so the result is A
+    // Copy a into the result, allowing for optimizations for small width by
+    // using the templated width types.
+    *dynResult = dynA;
+    return;
+  }
+
+  IdTableView<A_WIDTH> a = dynA.asStaticView<A_WIDTH>();
+  IdTableView<B_WIDTH> b = dynB.asStaticView<B_WIDTH>();
+  IdTableStatic<OUT_WIDTH> result = dynResult->moveToStatic<OUT_WIDTH>();
+
+  std::vector<size_t> rightToLeftCols(b.cols(),
+                                      std::numeric_limits<size_t>::max());
+  for (const auto& jc : joinColumns) {
+    rightToLeftCols[jc[1]] = jc[0];
+  }
+
+  /**
+   * @brief A function to copy a row from a to the end of result.
+   * @param ia The index of the row in a.
+   */
+  auto writeResult = [&result, &a](size_t ia) {
+    result.template push_back(a[ia]);
+  };
+
+  auto checkTimeout = checkTimeoutAfterNCallsFactory();
+
+  size_t ia = 0, ib = 0;
+  while (ia < a.size() && ib < b.size()) {
+    // Join columns 0 are the primary sort columns
+    while (a(ia, joinColumns[0][0]) < b(ib, joinColumns[0][1])) {
+      // Write a result
+      writeResult(ia);
+      ia++;
+      checkTimeout();
+      if (ia >= a.size()) {
+        goto finish;
+      }
+    }
+    while (b(ib, joinColumns[0][1]) < a(ia, joinColumns[0][0])) {
+      ib++;
+      checkTimeout();
+      if (ib >= b.size()) {
+        goto finish;
+      }
+    }
+
+    while (b(ib, joinColumns[0][1]) == a(ia, joinColumns[0][0])) {
+      // check if the rest of the join columns also match
+      RowComparison rowEq = isRowEqSkipFirst(a, b, ia, ib, joinColumns);
+      switch (rowEq) {
+        case RowComparison::EQUAL: {
+          ia++;
+          if (ia >= a.size()) {
+            goto finish;
+          }
+        } break;
+        case RowComparison::LEFT_SMALLER: {
+          // ib does not discard ia, and there can not be another ib that
+          // would discard ia.
+          writeResult(ia);
+          ia++;
+          if (ia >= a.size()) {
+            goto finish;
+          }
+        } break;
+        case RowComparison::RIGHT_SMALLER: {
+          ib++;
+          if (ib >= b.size()) {
+            goto finish;
+          }
+        } break;
+        default:
+          AD_CHECK(false);
+      }
+      checkTimeout();
+    }
+  }
+finish:
+  result.reserve(result.size() + (a.size() - ia));
+  while (ia < a.size()) {
+    writeResult(ia);
+    ia++;
+  }
+  *dynResult = result.moveToDynamic();
+}
+
+template <int A_WIDTH, int B_WIDTH>
+Minus::RowComparison Minus::isRowEqSkipFirst(
+    const IdTableView<A_WIDTH>& a, const IdTableView<B_WIDTH>& b, size_t ia,
+    size_t ib, const vector<array<size_t, 2>>& joinColumns) {
+  for (size_t i = 1; i < joinColumns.size(); ++i) {
+    Id va{a(ia, joinColumns[i][0])};
+    Id vb{b(ib, joinColumns[i][1])};
+    if (va < vb) {
+      return RowComparison::LEFT_SMALLER;
+    }
+    if (va > vb) {
+      return RowComparison::RIGHT_SMALLER;
+    }
+  }
+  return RowComparison::EQUAL;
+}