ad-freiburg · niklas88 · Aug 6, 2019 · Jul 10, 2019 · Jul 11, 2019 · Jul 16, 2019
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -78,7 +78,7 @@ set(USE_OPENMP OFF CACHE BOOL "Don't use OPENMP as default" FORCE)
 add_subdirectory(third_party/stxxl)
 # apply STXXL CXXFLAGS
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${STXXL_CXX_FLAGS}")
-include_directories(${STXXL_INCLUDE_DIRS})
+include_directories(SYSTEM ${STXXL_INCLUDE_DIRS})
 
 ################################
 # RE2
@@ -91,7 +91,7 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-parameter")
 
 set(RE2_BUILD_TESTING OFF CACHE BOOL "enable testing for RE2" FORCE)
 add_subdirectory(third_party/re2)
-include_directories(third_party/re2)
+include_directories(SYSTEM third_party/re2)
 
 # reinstate original flags including all warnings
 set(CMAKE_CXX_FLAGS "${LOCAL_CXX_BACKUP_FLAGS}")

diff --git a/e2e/scientists_queries.yaml b/e2e/scientists_queries.yaml
@@ -403,7 +403,7 @@ queries:
         ?x ql:has-predicate ?predicate .
       }
       GROUP BY ?predicate
-      HAVING (?predicate < <Z) (?predicate = <Religion>)
+      HAVING (?predicate < "<Z") (?predicate = <Religion>)
     checks:
       - num_rows: 1
       - num_cols: 2
@@ -535,7 +535,7 @@ queries:
               ?subject <Profession> ?object
       }
       GROUP BY ?object
-      ORDER BY DESC((COUNT(?object) AS ?count))
+      ORDER BY DESC(?count)
     checks:
       - num_rows: 836 
       - num_cols: 2

diff --git a/src/SparqlEngineMain.cpp b/src/SparqlEngineMain.cpp
@@ -192,8 +192,8 @@ int main(int argc, char** argv) {
 void processQuery(QueryExecutionContext& qec, const string& query) {
   ad_utility::Timer t;
   t.start();
-  SparqlParser sp;
-  ParsedQuery pq = sp.parse(query);
+  SparqlParser sp(query);
+  ParsedQuery pq = sp.parse();
   pq.expandPrefixes();
   QueryPlanner qp(&qec);
   ad_utility::Timer timer;

diff --git a/src/WriteIndexListsMain.cpp b/src/WriteIndexListsMain.cpp
@@ -90,11 +90,12 @@ int main(int argc, char** argv) {
     QueryExecutionContext qec(index, engine);
     ParsedQuery q;
     if (!freebase) {
-      q = SparqlParser::parse("SELECT ?x WHERE {?x <is-a> <Scientist>}");
+      q = SparqlParser("SELECT ?x WHERE {?x <is-a> <Scientist>}").parse();
     } else {
-      q = SparqlParser::parse(
-          "PREFIX fb: <http://rdf.freebase.com/ns/> SELECT ?p WHERE {?p "
-          "fb:people.person.profession fb:m.06q2q}");
+      q = SparqlParser(
+              "PREFIX fb: <http://rdf.freebase.com/ns/> SELECT ?p WHERE {?p "
+              "fb:people.person.profession fb:m.06q2q}")
+              .parse();
       q.expandPrefixes();
     }
     QueryPlanner queryPlanner(&qec);

diff --git a/src/engine/Filter.cpp b/src/engine/Filter.cpp
@@ -24,7 +24,8 @@ Filter::Filter(QueryExecutionContext* qec,
       _type(type),
       _lhs(lhs),
       _rhs(rhs),
-      _regexIgnoreCase(false) {}
+      _regexIgnoreCase(false),
+      _lhsAsString(false) {}
 
 // _____________________________________________________________________________
 string Filter::asString(size_t indent) const {
@@ -510,6 +511,11 @@ void Filter::computeResultFixedValue(
   IdTableStatic<WIDTH> result = resultTable->_data.moveToStatic<WIDTH>();
   const IdTableStatic<WIDTH> input = subRes->_data.asStaticView<WIDTH>();
 
+  if (_lhsAsString) {
+    AD_THROW(ad_semsearch::Exception::NOT_YET_IMPLEMENTED,
+             "The str function is not yet supported within filters.");
+  }
+
   // interpret the filters right hand side
   size_t lhs = _subtree->getVariableColumn(_lhs);
   Id rhs;
@@ -521,6 +527,19 @@ void Filter::computeResultFixedValue(
       } else if (ad_utility::isNumeric(_rhs)) {
         rhs_string = ad_utility::convertNumericToIndexWord(rhs_string);
       } else {
+        // TODO: This is not standard conform, but currently required due to
+        // our vocabulary storing iris with the greater than and
+        // literals with their quotation marks.
+        if (rhs_string.size() > 2 && rhs_string[1] == '<' &&
+            rhs_string[0] == '"' && rhs_string.back() == '"') {
+          // Remove the quotation marks surrounding the string.
+          rhs_string = rhs_string.substr(1, rhs_string.size() - 2);
+        } else if (std::count(rhs_string.begin(), rhs_string.end(), '"') > 2 &&
+                   rhs_string.back() == '"') {
+          // Remove the quotation marks surrounding the string.
+          rhs_string = rhs_string.substr(1, rhs_string.size() - 2);
+        }
+
         if (getIndex().getVocab().isCaseInsensitiveOrdering()) {
           // We have to move to the correct end of the
           // "same letters but different case" - range
@@ -544,7 +563,7 @@ void Filter::computeResultFixedValue(
         }
       }
       if (_type == SparqlFilter::EQ || _type == SparqlFilter::NE) {
-        if (!getIndex().getVocab().getId(_rhs, &rhs)) {
+        if (!getIndex().getVocab().getId(rhs_string, &rhs)) {
           rhs = std::numeric_limits<size_t>::max() - 1;
         }
       } else if (_type == SparqlFilter::GE) {

diff --git a/src/engine/Filter.h b/src/engine/Filter.h
@@ -71,6 +71,7 @@ class Filter : public Operation {
   }
 
   void setRegexIgnoreCase(bool i) { _regexIgnoreCase = i; }
+  void setLhsAsString(bool i) { _lhsAsString = i; }
 
   std::shared_ptr<QueryExecutionTree> getSubtree() const { return _subtree; };
 
@@ -93,6 +94,7 @@ class Filter : public Operation {
   string _lhs;
   string _rhs;
   bool _regexIgnoreCase;
+  bool _lhsAsString;
 
   /**
    * @brief Uses the result type and the filter type (_type) to apply the filter

diff --git a/src/engine/GroupBy.cpp b/src/engine/GroupBy.cpp
@@ -174,7 +174,7 @@ void processGroup(const GroupBy::Aggregate& a, size_t blockStart,
                   const Index& index,
                   ad_utility::HashSet<size_t>& distinctHashSet) {
   switch (a._type) {
-    case GroupBy::AggregateType::AVG: {
+    case ParsedQuery::AggregateType::AVG: {
       float res = 0;
       if (inputTypes[a._inCol] == ResultTable::ResultType::VERBATIM) {
         if (a._distinct) {
@@ -253,7 +253,7 @@ void processGroup(const GroupBy::Aggregate& a, size_t blockStart,
       std::memcpy(&(*result)(resultRow, a._outCol), &res, sizeof(float));
       break;
     }
-    case GroupBy::AggregateType::COUNT:
+    case ParsedQuery::AggregateType::COUNT:
       if (a._distinct) {
         size_t count = 0;
         for (size_t i = blockStart; i <= blockEnd; i++) {
@@ -269,7 +269,7 @@ void processGroup(const GroupBy::Aggregate& a, size_t blockStart,
         (*result)(resultRow, a._outCol) = blockEnd - blockStart + 1;
       }
       break;
-    case GroupBy::AggregateType::GROUP_CONCAT: {
+    case ParsedQuery::AggregateType::GROUP_CONCAT: {
       std::ostringstream out;
       std::string* delim = reinterpret_cast<string*>(a._userData);
       if (inputTypes[a._inCol] == ResultTable::ResultType::VERBATIM) {
@@ -421,7 +421,7 @@ void processGroup(const GroupBy::Aggregate& a, size_t blockStart,
       outTable->_localVocab->push_back(out.str());
       break;
     }
-    case GroupBy::AggregateType::MAX: {
+    case ParsedQuery::AggregateType::MAX: {
       if (inputTypes[a._inCol] == ResultTable::ResultType::VERBATIM) {
         Id res = std::numeric_limits<Id>::lowest();
         for (size_t i = blockStart; i <= blockEnd; i++) {
@@ -451,7 +451,7 @@ void processGroup(const GroupBy::Aggregate& a, size_t blockStart,
       }
       break;
     }
-    case GroupBy::AggregateType::MIN: {
+    case ParsedQuery::AggregateType::MIN: {
       if (inputTypes[a._inCol] == ResultTable::ResultType::VERBATIM) {
         Id res = std::numeric_limits<Id>::max();
         for (size_t i = blockStart; i <= blockEnd; i++) {
@@ -481,10 +481,10 @@ void processGroup(const GroupBy::Aggregate& a, size_t blockStart,
       }
       break;
     }
-    case GroupBy::AggregateType::SAMPLE:
+    case ParsedQuery::AggregateType::SAMPLE:
       (*result)(resultRow, a._outCol) = input(blockEnd, a._inCol);
       break;
-    case GroupBy::AggregateType::SUM: {
+    case ParsedQuery::AggregateType::SUM: {
       float res = 0;
       if (inputTypes[a._inCol] == ResultTable::ResultType::VERBATIM) {
         if (a._distinct) {
@@ -561,12 +561,12 @@ void processGroup(const GroupBy::Aggregate& a, size_t blockStart,
       std::memcpy(&(*result)(resultRow, a._outCol), &res, sizeof(float));
       break;
     }
-    case GroupBy::AggregateType::FIRST:
+    case ParsedQuery::AggregateType::FIRST:
       // This does the same as sample, as the non grouping rows have no
       // inherent order.
       (*result)(resultRow, a._outCol) = input(blockStart, a._inCol);
       break;
-    case GroupBy::AggregateType::LAST:
+    case ParsedQuery::AggregateType::LAST:
       // This does the same as sample, as the non grouping rows have no
       // inherent order.
       (*result)(resultRow, a._outCol) = input(blockEnd, a._inCol);
@@ -674,7 +674,7 @@ void GroupBy::computeResult(ResultTable* result) {
     // Add an "identity" aggregate in the form of a sample aggregate to
     // facilitate the passthrough of the groupBy columns into the result
     aggregates.emplace_back();
-    aggregates.back()._type = AggregateType::SAMPLE;
+    aggregates.back()._type = ParsedQuery::AggregateType::SAMPLE;
     aggregates.back()._inCol = it->second;
     aggregates.back()._outCol = _varColMap.find(var)->second;
     aggregates.back()._userData = nullptr;
@@ -685,95 +685,14 @@ void GroupBy::computeResult(ResultTable* result) {
   for (const ParsedQuery::Alias& alias : _aliases) {
     if (alias._isAggregate) {
       aggregates.emplace_back();
-      if (ad_utility::startsWith(alias._function, "COUNT")) {
-        aggregates.back()._type = AggregateType::COUNT;
-      } else if (ad_utility::startsWith(alias._function, "GROUP_CONCAT")) {
-        aggregates.back()._type = AggregateType::GROUP_CONCAT;
-      } else if (ad_utility::startsWith(alias._function, "SAMPLE")) {
-        aggregates.back()._type = AggregateType::SAMPLE;
-      } else if (ad_utility::startsWith(alias._function, "MIN")) {
-        aggregates.back()._type = AggregateType::MIN;
-      } else if (ad_utility::startsWith(alias._function, "MAX")) {
-        aggregates.back()._type = AggregateType::MAX;
-      } else if (ad_utility::startsWith(alias._function, "SUM")) {
-        aggregates.back()._type = AggregateType::SUM;
-      } else if (ad_utility::startsWith(alias._function, "AVG")) {
-        aggregates.back()._type = AggregateType::AVG;
+      aggregates.back()._type = alias._type;
+      aggregates.back()._distinct = alias._isDistinct;
+      if (alias._type == ParsedQuery::AggregateType::GROUP_CONCAT) {
+        aggregates.back()._userData = new std::string(alias._delimiter);
       } else {
-        LOG(WARN) << "Unknown aggregate " << alias._function << std::endl;
-        aggregates.pop_back();
-        continue;
+        aggregates.back()._userData = nullptr;
       }
-
-      std::string inVarName;
-      if (aggregates.back()._type == AggregateType::GROUP_CONCAT) {
-        size_t varStart = alias._function.find('(');
-        size_t varStop = alias._function.rfind(')');
-        size_t delimitorPos = alias._function.find(';');
-        if (varStop > varStart && varStop != std::string::npos &&
-            varStart != std::string::npos) {
-          // found a matching pair of brackets
-          // look for a distinct keyword
-
-          if (alias._function.find("DISTINCT") != std::string::npos ||
-              alias._function.find("distinct") != std::string::npos) {
-            aggregates.back()._distinct = true;
-          } else {
-            aggregates.back()._distinct = false;
-          }
-
-          if (delimitorPos != std::string::npos) {
-            // found a delimiter, need to look for a separator assignment
-            inVarName = alias._function.substr(varStart + 1,
-                                               delimitorPos - varStart - 1);
-            if (aggregates.back()._distinct) {
-              inVarName = ad_utility::strip(inVarName, " \t").substr(8);
-            }
-            std::string concatString = alias._function.substr(
-                delimitorPos + 1, varStop - delimitorPos - 1);
-            concatString = ad_utility::strip(concatString, " ");
-            size_t startConcat = concatString.find('"');
-            size_t stopConcat = concatString.rfind('"');
-            if (stopConcat > startConcat && stopConcat != std::string::npos &&
-                startConcat != std::string::npos) {
-              aggregates.back()._userData = new std::string(concatString.substr(
-                  startConcat + 1, stopConcat - startConcat - 1));
-            } else {
-              LOG(WARN) << "Unable to parse the delimiter in GROUP_CONCAT"
-                           "aggregrate "
-                        << alias._function;
-              aggregates.back()._userData = new std::string(" ");
-            }
-          } else {
-            // found no delimiter, using the default separator ' '
-            inVarName =
-                alias._function.substr(varStart + 1, varStop - varStart - 1);
-            if (aggregates.back()._distinct) {
-              inVarName = ad_utility::strip(inVarName, " \t").substr(8);
-            }
-            aggregates.back()._userData = new std::string(" ");
-          }
-        }
-      } else {
-        size_t varStart = alias._function.find('(');
-        size_t varStop = alias._function.rfind(')');
-        if (varStop > varStart && varStop != std::string::npos &&
-            varStart != std::string::npos) {
-          if (alias._function.find("DISTINCT") != std::string::npos ||
-              alias._function.find("distinct") != std::string::npos) {
-            aggregates.back()._distinct = true;
-          } else {
-            aggregates.back()._distinct = false;
-          }
-          inVarName =
-              alias._function.substr(varStart + 1, varStop - varStart - 1);
-          if (aggregates.back()._distinct) {
-            inVarName = ad_utility::strip(inVarName, " \t").substr(8);
-          }
-        }
-      }
-      inVarName = ad_utility::strip(inVarName, " \t");
-      auto inIt = subtreeVarCols.find(inVarName);
+      auto inIt = subtreeVarCols.find(alias._inVarName);
       if (inIt == subtreeVarCols.end()) {
         LOG(WARN) << "The aggregate alias " << alias._function << " refers to "
                   << "a column not present in the query." << std::endl;
@@ -794,28 +713,28 @@ void GroupBy::computeResult(ResultTable* result) {
   result->_resultTypes.resize(result->_data.cols());
   for (size_t i = 0; i < result->_data.cols(); i++) {
     switch (aggregates[i]._type) {
-      case AggregateType::AVG:
+      case ParsedQuery::AggregateType::AVG:
         result->_resultTypes[i] = ResultTable::ResultType::FLOAT;
         break;
-      case AggregateType::COUNT:
+      case ParsedQuery::AggregateType::COUNT:
         result->_resultTypes[i] = ResultTable::ResultType::VERBATIM;
         break;
-      case AggregateType::GROUP_CONCAT:
+      case ParsedQuery::AggregateType::GROUP_CONCAT:
         result->_resultTypes[i] = ResultTable::ResultType::LOCAL_VOCAB;
         break;
-      case AggregateType::MAX:
+      case ParsedQuery::AggregateType::MAX:
         result->_resultTypes[i] =
             subresult->getResultType(aggregates[i]._inCol);
         break;
-      case AggregateType::MIN:
+      case ParsedQuery::AggregateType::MIN:
         result->_resultTypes[i] =
             subresult->getResultType(aggregates[i]._inCol);
         break;
-      case AggregateType::SAMPLE:
+      case ParsedQuery::AggregateType::SAMPLE:
         result->_resultTypes[i] =
             subresult->getResultType(aggregates[i]._inCol);
         break;
-      case AggregateType::SUM:
+      case ParsedQuery::AggregateType::SUM:
         result->_resultTypes[i] = ResultTable::ResultType::FLOAT;
         break;
       default:
@@ -843,7 +762,7 @@ void GroupBy::computeResult(ResultTable* result) {
 
   // Free the user data used by GROUP_CONCAT aggregates.
   for (Aggregate& a : aggregates) {
-    if (a._type == AggregateType::GROUP_CONCAT) {
+    if (a._type == ParsedQuery::AggregateType::GROUP_CONCAT) {
       delete static_cast<std::string*>(a._userData);
     }
   }

diff --git a/src/engine/GroupBy.h b/src/engine/GroupBy.h
@@ -18,26 +18,11 @@ using std::vector;
 
 class GroupBy : public Operation {
  public:
-  /**
-   * @brief All supported types of aggregate aliases
-   */
-  enum class AggregateType {
-    COUNT,
-    GROUP_CONCAT,
-    FIRST,
-    LAST,
-    SAMPLE,
-    MIN,
-    MAX,
-    SUM,
-    AVG
-  };
-
   /**
    * @brief Represents an aggregate alias in the select part of the query.
    */
   struct Aggregate {
-    AggregateType _type;
+    ParsedQuery::AggregateType _type;
     size_t _inCol, _outCol;
     // Used to store the string necessary for the group concat aggregate.
     // A void pointer is used to allow for storing arbitrary data should any