Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve log #549

Merged
merged 16 commits into from
Jan 24, 2022
9 changes: 3 additions & 6 deletions src/ServerMain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ void printUsage(char* execName) {

// Main function.
int main(int argc, char** argv) {
char* locale = setlocale(LC_CTYPE, "");
setlocale(LC_CTYPE, "");

std::locale loc;
ad_utility::ReadableNumberFacet facet(1);
Expand Down Expand Up @@ -186,11 +186,8 @@ int main(int argc, char** argv) {
exit(1);
}

cout << endl
<< EMPH_ON << "ServerMain, version " << __DATE__ << " " << __TIME__
<< EMPH_OFF << endl
<< endl;
cout << "Set locale LC_CTYPE to: " << locale << endl;
LOG(INFO) << EMPH_ON << "QLever Server, compiled on " << __DATE__ << " "
<< __TIME__ << EMPH_OFF << std::endl;

try {
Server server(port, numThreads, memLimit);
Expand Down
4 changes: 2 additions & 2 deletions src/engine/Server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ using Awaitable = Server::Awaitable<T>;
// __________________________________________________________________________
void Server::initialize(const string& ontologyBaseName, bool useText,
bool usePatterns, bool usePatternTrick) {
LOG(INFO) << "Initializing and running server..." << std::endl;
LOG(INFO) << "Initializing server ..." << std::endl;

_enablePatternTrick = usePatternTrick;
_index.setUsePatterns(usePatterns);
Expand All @@ -44,7 +44,7 @@ void Server::initialize(const string& ontologyBaseName, bool useText,

// Set flag.
_initialized = true;
LOG(INFO) << "Done initializing server." << std::endl;
LOG(INFO) << "The server is ready" << std::endl;
}

// _____________________________________________________________________________
Expand Down
4 changes: 2 additions & 2 deletions src/engine/SortPerformanceEstimator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ void SortPerformanceEstimator::computeEstimatesExpensively(
static_assert(isSorted(sampleValuesRows));

LOG(INFO) << "Sorting random result tables to estimate the sorting "
"performance of this machine"
"performance of this machine ..."
<< std::endl;

_samples.fill({});
Expand Down Expand Up @@ -183,6 +183,6 @@ void SortPerformanceEstimator::computeEstimatesExpensively(
}
}
}
LOG(INFO) << "Done creating sort estimates." << std::endl;
LOG(DEBUG) << "Done computing sort estimates" << std::endl;
_estimatesWereCalculated = true;
}
17 changes: 7 additions & 10 deletions src/global/Constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,16 +79,13 @@ static const std::string ERROR_IGNORE_CASE_UNSUPPORTED =
"your settings.json and rebuild your index. You can optionally specify the "
"\"locale\" key, otherwise \"en.US\" will be used as default";
static const std::string WARNING_ASCII_ONLY_PREFIXES =
"You requested the CTRE parser for tokenization (either via "
"ascii-prefixes-only = true in the settings.json or by requesting it "
"explicitly in TurtleParserMain). This means that the input Turtle data "
"may only use characters from the ASCII range and that no escape sequences "
"may be used in prefixed names (e.g., rdfs:label\\,el is not allowed). "
" Additionally, multiline literals are not allowed and triples have to end "
"at line boundaries (The regex \". *\\n\" must safely identify the end of "
"a triple)."
"This is stricter than the SPARQL standard but makes parsing faster. It "
"works for many Turtle dumps, e.g. that from Wikidata.";
"You specified \"ascii-prefixes-only = true\", which enables faster "
"parsing for well-behaved TTL files (see qlever/docs on GitHub)";
// " but only works correctly if there are no escape sequences in "
// "prefixed names (e.g., rdfs:label\\,el is not allowed), no multiline "
// "literals, and the regex \". *\\n\" only matches at the end of a triple. "
// "Most Turtle files fulfill these properties (e.g. that from Wikidata), "
// "but not all";
static const std::string LOCALE_DEFAULT_LANG = "en";
static const std::string LOCALE_DEFAULT_COUNTRY = "US";
static constexpr bool LOCALE_DEFAULT_IGNORE_PUNCTUATION = false;
Expand Down
136 changes: 77 additions & 59 deletions src/index/CompressedRelation.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
//
// Created by johannes on 01.07.21.
//
// Copyright 2021, University of Freiburg,
// Chair of Algorithms and Data Structures
// Author: Johannes Kalmbach <johannes.kalmbach@gmail.com>

#ifndef QLEVER_COMPRESSEDRELATION_H
#define QLEVER_COMPRESSEDRELATION_H
Expand All @@ -15,23 +15,24 @@
#include "../util/Serializer/Serializer.h"
#include "../util/Timer.h"

// The meta data of a compressed block of Id triples in a certain permutation.
// The meta data of a compressed block of ID triples in an index permutation.
struct CompressedBlockMetaData {
off_t _offsetInFile;
size_t _compressedSize; // in Bytes
size_t _compressedSize; // In bytes.
size_t _numRows;
// e.g. in the POS permutation, "P" (not stored in the block) is col0),
// "O" is the col1.
// first and last are inclusive, the lastXXX is still contained in the block.
// For example, in the PSO permutation, col0 is the P and col1 is the S. The
// col0 ID is not stored in the block. First and last are meant inclusively,
// that is, they are both part of the block.
Id _col0FirstId;
Id _col0LastId;
Id _col1FirstId;
Id _col1LastId;

// Two of these are equal if all members are equal.
bool operator==(const CompressedBlockMetaData&) const = default;
};

// How to serialize block meta data
// Serialization of the block meta data.
template <typename Serializer>
void serialize(Serializer& s, CompressedBlockMetaData& b) {
s | b._offsetInFile;
Expand All @@ -43,48 +44,55 @@ void serialize(Serializer& s, CompressedBlockMetaData& b) {
s | b._col1LastId;
}

// The meta data of a compressed relation. (e.g. in the PSO permutation,
// all triples with the same P form a relation.
// The meta data of a whole compressed "relation", where relation refers to a
// maximal sequence of triples with equal first component (e.g., P for the PSO
// permutation).
struct CompressedRelationMetaData {
Id _col0Id;
size_t _numRows;
float _multiplicityCol1; // e.g. in PSO the multiplicity of "S"
float _multiplicityCol2; // e.g. in PSO the multiplicity of "O"
// If there are multiple Ids in the same block (e.g. for SPO, OSP index), then
// all of these IDs/relations span exactly one block and it suffices to store
// their offset in the (uncompressed!) block. The special value Id(-1)
// signals, that this relation owns all its blocks exclusively;
float _multiplicityCol1; // E.g., in PSO this is the multiplicity of "S".
float _multiplicityCol2; // E.g., in PSO this is the multiplicity of "O".
// If this "relation" is contained in a block together with other "relations",
// then all of these relations are contained only in this block and
// `_offsetInBlock` stores the offset in this block (referring to the index in
// the uncompressed sequence of triples). Otherwise, this "relation" is
// stored in one or several blocks of its own, and we set `_offsetInBlock` to
// `Id(-1)`.
Id _offsetInBlock = Id(-1);

size_t getNofElements() const { return _numRows; }

// Setters and getters for the multiplicities
// Setters and getters for the multiplicities.
float getCol1Multiplicity() const { return _multiplicityCol1; }
float getCol2Multiplicity() const { return _multiplicityCol2; }
void setCol1Multiplicity(float mult) { _multiplicityCol1 = mult; }
void setCol2Multiplicity(float mult) { _multiplicityCol2 = mult; }

bool isFunctional() const { return _multiplicityCol1 == 1.0; }

// A special value for an "empty" or "nonexisting" meta data, needed for
// the MmapBased metaData.
// A special value for an "empty" or "nonexisting" meta data. This is needed
// for the mmap-based meta data.
static CompressedRelationMetaData emptyMetaData() {
size_t m = size_t(-1);
return CompressedRelationMetaData{m, m, 0, 0, m};
}

// Equal if all members are equal.
// Two of these are equal if all members are equal.
bool operator==(const CompressedRelationMetaData&) const = default;

/**
* @brief Perform a scan for one col0Id i.e. retrieve all YZ from the XYZ
* permutation for a specific col0Id value of X
* @brief For a permutation XYZ, retrieve all YZ for a given X.
*
* @tparam Permutation The permutations Index::POS()... have different types
* @param col0Id The col0Id (in Id space) for which to search, e.g. fixed
* value for O in OSP permutation.
* @param result The Id table to which we will write. Must have 2 columns.
* @param permutation The Permutation to use (in particularly POS(), SOP(),...
* members of Index class).
*
* @param col0Id The ID of the "relation". That is, for permutation XYZ, the
* ID of an X.
*
* @param result The ID table to which we write the result, which must have
* exactly two columns.
*
* @param permutation The permutation from which to scan, which is one of:
* PSO, POS, SPO, SOP, OSO, OPS.
*/
// The IdTable is a rather expensive type, so we don't include it here.
// but we can also not forward declare it because it is actually an alias.
Expand All @@ -94,26 +102,26 @@ struct CompressedRelationMetaData {
ad_utility::SharedConcurrentTimeoutTimer timer = nullptr);

/**
* @brief Perform a scan for two keys i.e. retrieve all Z from the XYZ
* permutation for specific key values of X and Y.
* @brief For a permutation XYZ, retrieve all Z for given X and Y.
*
* @tparam Permutation The permutations Index::POS()... have different types
* @param keyFirst The first key (as a raw string that is yet to be
* transformed to index space) for which to search, e.g. fixed value for O in
* OSP permutation.
* @param keySecond The second key (as a raw string that is yet to be
* transformed to index space) for which to search, e.g. fixed value for S in
* OSP permutation.
* @param result The Id table to which we will write. Must have 2 columns.
* @param permutation The Permutation to use (in particularly POS(), SOP,...
* members of Index class).
*
* @param col0Id The ID for X.
*
* @param col1Id The ID for Y.
*
* @param result The ID table to which we write the result.
*
* @param permutation The permutation from which to scan, which is one of:
* PSO, POS, SPO, SOP, OSO, OPS.
*/
template <class PermutationInfo, typename IdTableImpl>
static void scan(const Id count, const Id& col1Id, IdTableImpl* result,
const PermutationInfo& permutation,
ad_utility::SharedConcurrentTimeoutTimer timer = nullptr);

private:
// some helper functions for reading and decompressing of blocks.
// Some helper functions for reading and decompressing blocks.

template <class Permutation>
static std::vector<char> readCompressedBlockFromFile(
Expand All @@ -131,7 +139,7 @@ struct CompressedRelationMetaData {
const CompressedBlockMetaData& block, const Permutation& permutation);
};

// How to serialize a CompressedRelationMetaData
// Serialization of the compressed "relation" meta data.
template <class Serializer>
void serialize(Serializer& s, CompressedRelationMetaData& c) {
s | c._col0Id;
Expand All @@ -155,12 +163,22 @@ class CompressedRelationWriter {
/// Create using a filename, to which the relation data will be written.
CompressedRelationWriter(ad_utility::File f) : _outfile{std::move(f)} {}

/// Add a complete (single) relation
/// \param col0Id the Id of the relation (e.g. the P in PSO permutation)
/// \param data the sorted data of the relation (e.g. vector of (S, O) in PSO)
/// \param numDistinctCol1 the number of distinct elements in the 1st colun (S
/// in PSO), needed for calculating the multiplicity \param functional is this
/// relation functional
/**
* Add a complete (single) relation.
*
* \param col0Id The ID of the relation, that is, the value of X for a
* permutation XYZ.
*
* \param data The sorted data of the relation, that is, the sequence of all
* pairs of Y and Z for the given X.
*
* \param numDistinctCol1 The number of distinct values for X (from which we
* can also calculate the average multiplicity, so we don't need to store that
* explicitly).
*
* \param functional Whether each value of Y occurs at most once (so that we
* have a function from the occurring Ys to the Zs).
*/
void addRelation(Id col0Id,
const ad_utility::BufferedVector<array<Id, 2>>& data,
size_t numDistinctCol1, bool functional);
Expand All @@ -169,33 +187,33 @@ class CompressedRelationWriter {
/// still be in some internal buffer.
void finish() { writeBufferedRelationsToSingleBlock(); }

/// Get all the CompressedRelationMetaData that was created by the calls to
/// Get the complete CompressedRelationMetaData created by the calls to
/// addRelation. This meta data is then deleted from the
/// CompressedRelationWriter. Typical workflow: add all relations, then call
/// finish() and then call this method
/// CompressedRelationWriter. The typical workflow is: add all relations,
/// then call `finish()` and then call this method.
auto getFinishedMetaData() {
return std::move(_metaDataBuffer);
_metaDataBuffer.clear();
}

/// Get all the CompressedBlockMetaData that was created by the calls to
/// Get all the CompressedBlockMetaData that were created by the calls to
/// addRelation. This meta data is then deleted from the
/// CompressedRelationWriter. Typical workflow: add all relations, then call
/// finish() and then call this method
/// CompressedRelationWriter. The typical workflow is: add all relations,
/// then call `finish()` and then call this method.
auto getFinishedBlocks() {
return std::move(_blockBuffer);
_blockBuffer.clear();
}

private:
// Compress the _buffer into a single block and write it to _outfile.
// Update _currentBlockData with the metaData of the written Block.
// Clear the _buffer.
// Compress the contents of `_buffer` into a single block and write it to
// _outfile. Update `_currentBlockData` with the meta data of the written
// block. Then clear `_buffer`.
void writeBufferedRelationsToSingleBlock();

// Compress the relation from arg data into one or more blocks, depending on
// its size. Write the _blocks to _outfile and append all the created
// block meta data to the _blockBuffer.
// Compress the relation from `data` into one or more blocks, depending on
// its size. Write the blocks to `_outfile` and append all the created
// block meta data to `_blockBuffer`.
void writeRelationToExclusiveBlocks(
Id col0Id, const ad_utility::BufferedVector<array<Id, 2>>& data);
};
Expand Down
7 changes: 4 additions & 3 deletions src/index/ConstantsIndexBuilding.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,12 @@ static const size_t BZIP2_MAX_TOTAL_BUFFER_SIZE = 1 << 30;
static const size_t THRESHOLD_RELATION_CREATION = 2 << 20;

// ________________________________________________________________
static const std::string PARTIAL_VOCAB_FILE_NAME = ".partial-vocabulary";
static const std::string PARTIAL_MMAP_IDS = ".partial-ids-mmap";
static const std::string PARTIAL_VOCAB_FILE_NAME = ".tmp.partial-vocabulary.";
static const std::string PARTIAL_MMAP_IDS = ".tmp.partial-ids-mmap.";

// ________________________________________________________________
static const std::string TMP_BASENAME_COMPRESSION = ".tmp.compression_index";
static const std::string TMP_BASENAME_COMPRESSION =
".tmp.for-prefix-compression.";

// _________________________________________________________________
// The degree of parallelism that is used for the index building step, where the
Expand Down
2 changes: 0 additions & 2 deletions src/index/ExternalVocabulary.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,6 @@ void ExternalVocabulary<Comp>::initFromFile(const string& file) {
off_t posLastOfft = _file.getLastOffset(&_startOfOffsets);
_size = (posLastOfft - _startOfOffsets) / sizeof(off_t);
}
LOG(INFO) << "Initialized external vocabulary. It contains " << _size
<< " elements." << std::endl;
}

template class ExternalVocabulary<TripleComponentComparator>;
Expand Down