ad-freiburg · hannahbast · Jan 24, 2022 · Jan 23, 2022 · Jan 23, 2022 · Jan 23, 2022
diff --git a/src/ServerMain.cpp b/src/ServerMain.cpp
@@ -97,7 +97,7 @@ void printUsage(char* execName) {
 
 // Main function.
 int main(int argc, char** argv) {
-  char* locale = setlocale(LC_CTYPE, "");
+  setlocale(LC_CTYPE, "");
 
   std::locale loc;
   ad_utility::ReadableNumberFacet facet(1);
@@ -186,11 +186,8 @@ int main(int argc, char** argv) {
     exit(1);
   }
 
-  cout << endl
-       << EMPH_ON << "ServerMain, version " << __DATE__ << " " << __TIME__
-       << EMPH_OFF << endl
-       << endl;
-  cout << "Set locale LC_CTYPE to: " << locale << endl;
+  LOG(INFO) << EMPH_ON << "QLever Server, compiled on " << __DATE__ << " "
+            << __TIME__ << EMPH_OFF << std::endl;
 
   try {
     Server server(port, numThreads, memLimit);

diff --git a/src/engine/Server.cpp b/src/engine/Server.cpp
@@ -27,7 +27,7 @@ using Awaitable = Server::Awaitable<T>;
 // __________________________________________________________________________
 void Server::initialize(const string& ontologyBaseName, bool useText,
                         bool usePatterns, bool usePatternTrick) {
-  LOG(INFO) << "Initializing and running server..." << std::endl;
+  LOG(INFO) << "Initializing server ..." << std::endl;
 
   _enablePatternTrick = usePatternTrick;
   _index.setUsePatterns(usePatterns);
@@ -44,7 +44,7 @@ void Server::initialize(const string& ontologyBaseName, bool useText,
 
   // Set flag.
   _initialized = true;
-  LOG(INFO) << "Done initializing server." << std::endl;
+  LOG(INFO) << "The server is ready" << std::endl;
 }
 
 // _____________________________________________________________________________

diff --git a/src/engine/SortPerformanceEstimator.cpp b/src/engine/SortPerformanceEstimator.cpp
@@ -120,7 +120,7 @@ void SortPerformanceEstimator::computeEstimatesExpensively(
   static_assert(isSorted(sampleValuesRows));
 
   LOG(INFO) << "Sorting random result tables to estimate the sorting "
-               "performance of this machine"
+               "performance of this machine ..."
             << std::endl;
 
   _samples.fill({});
@@ -183,6 +183,6 @@ void SortPerformanceEstimator::computeEstimatesExpensively(
       }
     }
   }
-  LOG(INFO) << "Done creating sort estimates." << std::endl;
+  LOG(DEBUG) << "Done computing sort estimates" << std::endl;
   _estimatesWereCalculated = true;
 }
diff --git a/src/global/Constants.h b/src/global/Constants.h
@@ -79,16 +79,13 @@ static const std::string ERROR_IGNORE_CASE_UNSUPPORTED =
     "your settings.json and rebuild your index. You can optionally specify the "
     "\"locale\" key, otherwise \"en.US\" will be used as default";
 static const std::string WARNING_ASCII_ONLY_PREFIXES =
-    "You requested the CTRE parser for tokenization (either via "
-    "ascii-prefixes-only = true in the settings.json or by requesting it "
-    "explicitly in TurtleParserMain). This means that the input Turtle data "
-    "may only use characters from the ASCII range and that no escape sequences "
-    "may be used in prefixed names (e.g., rdfs:label\\,el is not allowed). "
-    " Additionally, multiline literals are not allowed and triples have to end "
-    "at line boundaries (The regex \". *\\n\" must safely identify the end of "
-    "a triple)."
-    "This is stricter than the SPARQL standard but makes parsing faster. It "
-    "works for many Turtle dumps, e.g. that from Wikidata.";
+    "You specified \"ascii-prefixes-only = true\", which enables faster "
+    "parsing for well-behaved TTL files (see qlever/docs on GitHub)";
+// " but only works correctly if there are no escape sequences in "
+// "prefixed names (e.g., rdfs:label\\,el is not allowed), no multiline "
+// "literals, and the regex \". *\\n\" only matches at the end of a triple. "
+// "Most Turtle files fulfill these properties (e.g. that from Wikidata), "
+// "but not all";
 static const std::string LOCALE_DEFAULT_LANG = "en";
 static const std::string LOCALE_DEFAULT_COUNTRY = "US";
 static constexpr bool LOCALE_DEFAULT_IGNORE_PUNCTUATION = false;

diff --git a/src/index/CompressedRelation.h b/src/index/CompressedRelation.h
@@ -1,6 +1,6 @@
-//
-// Created by johannes on 01.07.21.
-//
+// Copyright 2021, University of Freiburg,
+// Chair of Algorithms and Data Structures
+// Author: Johannes Kalmbach <johannes.kalmbach@gmail.com>
 
 #ifndef QLEVER_COMPRESSEDRELATION_H
 #define QLEVER_COMPRESSEDRELATION_H
@@ -15,23 +15,24 @@
 #include "../util/Serializer/Serializer.h"
 #include "../util/Timer.h"
 
-// The meta data of a compressed block of Id triples in a certain permutation.
+// The meta data of a compressed block of ID triples in an index permutation.
 struct CompressedBlockMetaData {
   off_t _offsetInFile;
-  size_t _compressedSize;  // in Bytes
+  size_t _compressedSize;  // In bytes.
   size_t _numRows;
-  // e.g. in the POS permutation, "P" (not stored in the block) is col0),
-  // "O" is the col1.
-  // first and last are inclusive, the lastXXX is still contained in the block.
+  // For example, in the PSO permutation, col0 is the P and col1 is the S. The
+  // col0 ID is not stored in the block. First and last are meant inclusively,
+  // that is, they are both part of the block.
   Id _col0FirstId;
   Id _col0LastId;
   Id _col1FirstId;
   Id _col1LastId;
 
+  // Two of these are equal if all members are equal.
   bool operator==(const CompressedBlockMetaData&) const = default;
 };
 
-// How to serialize block meta data
+// Serialization of the block meta data.
 template <typename Serializer>
 void serialize(Serializer& s, CompressedBlockMetaData& b) {
   s | b._offsetInFile;
@@ -43,48 +44,55 @@ void serialize(Serializer& s, CompressedBlockMetaData& b) {
   s | b._col1LastId;
 }
 
-// The meta data of a compressed relation. (e.g. in the PSO permutation,
-// all triples with the same P form a relation.
+// The meta data of a whole compressed "relation", where relation refers to a
+// maximal sequence of triples with equal first component (e.g., P for the PSO
+// permutation).
 struct CompressedRelationMetaData {
   Id _col0Id;
   size_t _numRows;
-  float _multiplicityCol1;  // e.g. in PSO the multiplicity of "S"
-  float _multiplicityCol2;  // e.g. in PSO the multiplicity of "O"
-  // If there are multiple Ids in the same block (e.g. for SPO, OSP index), then
-  // all of these IDs/relations span exactly one block and it suffices to store
-  // their offset in the (uncompressed!) block. The special value Id(-1)
-  // signals, that this relation owns all its blocks exclusively;
+  float _multiplicityCol1;  // E.g., in PSO this is the multiplicity of "S".
+  float _multiplicityCol2;  // E.g., in PSO this is the multiplicity of "O".
+  // If this "relation" is contained in a block together with other "relations",
+  // then all of these relations are contained only in this block and
+  // `_offsetInBlock` stores the offset in this block (referring to the index in
+  // the uncompressed sequence of triples).  Otherwise, this "relation" is
+  // stored in one or several blocks of its own, and we set `_offsetInBlock` to
+  // `Id(-1)`.
   Id _offsetInBlock = Id(-1);
 
   size_t getNofElements() const { return _numRows; }
 
-  // Setters and getters for the multiplicities
+  // Setters and getters for the multiplicities.
   float getCol1Multiplicity() const { return _multiplicityCol1; }
   float getCol2Multiplicity() const { return _multiplicityCol2; }
   void setCol1Multiplicity(float mult) { _multiplicityCol1 = mult; }
   void setCol2Multiplicity(float mult) { _multiplicityCol2 = mult; }
 
   bool isFunctional() const { return _multiplicityCol1 == 1.0; }
 
-  // A special value for an "empty" or "nonexisting" meta data, needed for
-  // the MmapBased metaData.
+  // A special value for an "empty" or "nonexisting" meta data. This is needed
+  // for the mmap-based meta data.
   static CompressedRelationMetaData emptyMetaData() {
     size_t m = size_t(-1);
     return CompressedRelationMetaData{m, m, 0, 0, m};
   }
 
-  // Equal if all members are equal.
+  // Two of these are equal if all members are equal.
   bool operator==(const CompressedRelationMetaData&) const = default;
 
   /**
-   * @brief Perform a scan for one col0Id i.e. retrieve all YZ from the XYZ
-   * permutation for a specific col0Id value of X
+   * @brief For a permutation XYZ, retrieve all YZ for a given X.
+   *
    * @tparam Permutation The permutations Index::POS()... have different types
-   * @param col0Id The col0Id (in Id space) for which to search, e.g. fixed
-   * value for O in OSP permutation.
-   * @param result The Id table to which we will write. Must have 2 columns.
-   * @param permutation The Permutation to use (in particularly POS(), SOP(),...
-   * members of Index class).
+   *
+   * @param col0Id The ID of the "relation". That is, for permutation XYZ, the
+   * ID of an X.
+   *
+   * @param result The ID table to which we write the result, which must have
+   * exactly two columns.
+   *
+   * @param permutation The permutation from which to scan, which is one of:
+   * PSO, POS, SPO, SOP, OSO, OPS.
    */
   // The IdTable is a rather expensive type, so we don't include it here.
   // but we can also not forward declare it because it is actually an alias.
@@ -94,26 +102,26 @@ struct CompressedRelationMetaData {
                    ad_utility::SharedConcurrentTimeoutTimer timer = nullptr);
 
   /**
-   * @brief Perform a scan for two keys i.e. retrieve all Z from the XYZ
-   * permutation for specific key values of X and Y.
+   * @brief For a permutation XYZ, retrieve all Z for given X and Y.
+   *
    * @tparam Permutation The permutations Index::POS()... have different types
-   * @param keyFirst The first key (as a raw string that is yet to be
-   * transformed to index space) for which to search, e.g. fixed value for O in
-   * OSP permutation.
-   * @param keySecond The second key (as a raw string that is yet to be
-   * transformed to index space) for which to search, e.g. fixed value for S in
-   * OSP permutation.
-   * @param result The Id table to which we will write. Must have 2 columns.
-   * @param permutation The Permutation to use (in particularly POS(), SOP,...
-   * members of Index class).
+   *
+   * @param col0Id The ID for X.
+   *
+   * @param col1Id The ID for Y.
+   *
+   * @param result The ID table to which we write the result.
+   *
+   * @param permutation The permutation from which to scan, which is one of:
+   * PSO, POS, SPO, SOP, OSO, OPS.
    */
   template <class PermutationInfo, typename IdTableImpl>
   static void scan(const Id count, const Id& col1Id, IdTableImpl* result,
                    const PermutationInfo& permutation,
                    ad_utility::SharedConcurrentTimeoutTimer timer = nullptr);
 
  private:
-  // some helper functions for reading and decompressing of blocks.
+  // Some helper functions for reading and decompressing blocks.
 
   template <class Permutation>
   static std::vector<char> readCompressedBlockFromFile(
@@ -131,7 +139,7 @@ struct CompressedRelationMetaData {
       const CompressedBlockMetaData& block, const Permutation& permutation);
 };
 
-// How to serialize a CompressedRelationMetaData
+// Serialization of the compressed "relation" meta data.
 template <class Serializer>
 void serialize(Serializer& s, CompressedRelationMetaData& c) {
   s | c._col0Id;
@@ -155,12 +163,22 @@ class CompressedRelationWriter {
   /// Create using a filename, to which the relation data will be written.
   CompressedRelationWriter(ad_utility::File f) : _outfile{std::move(f)} {}
 
-  /// Add a complete (single) relation
-  /// \param col0Id the Id of the relation (e.g. the P in PSO permutation)
-  /// \param data the sorted data of the relation (e.g. vector of (S, O) in PSO)
-  /// \param numDistinctCol1 the number of distinct elements in the 1st colun (S
-  /// in PSO), needed for calculating the multiplicity \param functional is this
-  /// relation functional
+  /**
+   * Add a complete (single) relation.
+   *
+   * \param col0Id The ID of the relation, that is, the value of X for a
+   * permutation XYZ.
+   *
+   * \param data The sorted data of the relation, that is, the sequence of all
+   * pairs of Y and Z for the given X.
+   *
+   * \param numDistinctCol1 The number of distinct values for X (from which we
+   * can also calculate the average multiplicity, so we don't need to store that
+   * explicitly).
+   *
+   * \param functional Whether each value of Y occurs at most once (so that we
+   * have a function from the occurring Ys to the Zs).
+   */
   void addRelation(Id col0Id,
                    const ad_utility::BufferedVector<array<Id, 2>>& data,
                    size_t numDistinctCol1, bool functional);
@@ -169,33 +187,33 @@ class CompressedRelationWriter {
   /// still be in some internal buffer.
   void finish() { writeBufferedRelationsToSingleBlock(); }
 
-  /// Get all the CompressedRelationMetaData that was created by the calls to
+  /// Get the complete CompressedRelationMetaData created by the calls to
   /// addRelation. This meta data is then deleted from the
-  /// CompressedRelationWriter. Typical workflow: add all relations, then call
-  /// finish() and then call this method
+  /// CompressedRelationWriter. The typical workflow is: add all relations,
+  /// then call `finish()` and then call this method.
   auto getFinishedMetaData() {
     return std::move(_metaDataBuffer);
     _metaDataBuffer.clear();
   }
 
-  /// Get all the CompressedBlockMetaData that was created by the calls to
+  /// Get all the CompressedBlockMetaData that were created by the calls to
   /// addRelation. This meta data is then deleted from the
-  /// CompressedRelationWriter. Typical workflow: add all relations, then call
-  /// finish() and then call this method
+  /// CompressedRelationWriter. The typical workflow is: add all relations,
+  /// then call `finish()` and then call this method.
   auto getFinishedBlocks() {
     return std::move(_blockBuffer);
     _blockBuffer.clear();
   }
 
  private:
-  // Compress the _buffer into a single block and write it to _outfile.
-  // Update _currentBlockData with the metaData of the written Block.
-  // Clear the _buffer.
+  // Compress the contents of `_buffer` into a single block and write it to
+  // _outfile. Update `_currentBlockData` with the meta data of the written
+  // block. Then clear `_buffer`.
   void writeBufferedRelationsToSingleBlock();
 
-  // Compress the relation from arg data into one or more blocks, depending on
-  // its size. Write the _blocks to _outfile and append all the created
-  // block meta data to the _blockBuffer.
+  // Compress the relation from `data` into one or more blocks, depending on
+  // its size. Write the blocks to `_outfile` and append all the created
+  // block meta data to `_blockBuffer`.
   void writeRelationToExclusiveBlocks(
       Id col0Id, const ad_utility::BufferedVector<array<Id, 2>>& data);
 };

diff --git a/src/index/ConstantsIndexBuilding.h b/src/index/ConstantsIndexBuilding.h
@@ -48,11 +48,12 @@ static const size_t BZIP2_MAX_TOTAL_BUFFER_SIZE = 1 << 30;
 static const size_t THRESHOLD_RELATION_CREATION = 2 << 20;
 
 // ________________________________________________________________
-static const std::string PARTIAL_VOCAB_FILE_NAME = ".partial-vocabulary";
-static const std::string PARTIAL_MMAP_IDS = ".partial-ids-mmap";
+static const std::string PARTIAL_VOCAB_FILE_NAME = ".tmp.partial-vocabulary.";
+static const std::string PARTIAL_MMAP_IDS = ".tmp.partial-ids-mmap.";
 
 // ________________________________________________________________
-static const std::string TMP_BASENAME_COMPRESSION = ".tmp.compression_index";
+static const std::string TMP_BASENAME_COMPRESSION =
+    ".tmp.for-prefix-compression.";
 
 // _________________________________________________________________
 // The degree of parallelism that is used for the index building step, where the

diff --git a/src/index/ExternalVocabulary.cpp b/src/index/ExternalVocabulary.cpp
@@ -103,8 +103,6 @@ void ExternalVocabulary<Comp>::initFromFile(const string& file) {
     off_t posLastOfft = _file.getLastOffset(&_startOfOffsets);
     _size = (posLastOfft - _startOfOffsets) / sizeof(off_t);
   }
-  LOG(INFO) << "Initialized external vocabulary. It contains " << _size
-            << " elements." << std::endl;
 }
 
 template class ExternalVocabulary<TripleComponentComparator>;