Skip to content

Commit

Permalink
Deterministic Exporting of Permutations
Browse files Browse the repository at this point in the history
* Implemented `PermutationExporterMain`, which exports the permutations of a built index into a deterministic text format.

* This can be used to check if the index building still works as expected after changes to the index format or index building.
  • Loading branch information
joka921 committed Sep 9, 2021
1 parent 766843e commit c953a00
Show file tree
Hide file tree
Showing 5 changed files with 180 additions and 4 deletions.
3 changes: 3 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,9 @@ target_link_libraries(TurtleParserMain parser ${CMAKE_THREAD_LIBS_INIT} absl::fl
add_executable(VocabularyMergerMain src/VocabularyMergerMain.cpp)
target_link_libraries(VocabularyMergerMain index ${CMAKE_THREAD_LIBS_INIT})

add_executable(PermutationExporterMain src/index/PermutationExporterMain.cpp)
target_link_libraries(PermutationExporterMain index ${CMAKE_THREAD_LIBS_INIT})

#add_executable(TextFilterComparison src/experiments/TextFilterComparison.cpp)
#target_link_libraries (TextFilterComparison experiments)

6 changes: 6 additions & 0 deletions src/index/Index.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,11 +115,17 @@ class Index {
SortByOSP(), "OSP", ".osp", {2, 0, 1});

const auto& POS() const { return _POS; }
auto& POS() { return _POS; }
const auto& PSO() const { return _PSO; }
auto& PSO() { return _PSO; }
const auto& SPO() const { return _SPO; }
auto& SPO() { return _SPO; }
const auto& SOP() const { return _SOP; }
auto& SOP() { return _SOP; }
const auto& OPS() const { return _OPS; }
auto& OPS() { return _OPS; }
const auto& OSP() const { return _OSP; }
auto& OSP() { return _OSP; }

// Creates an index from a file. Parameter Parser must be able to split the
// file's format into triples.
Expand Down
80 changes: 80 additions & 0 deletions src/index/MetaDataHandler.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,11 @@ template <class M>
class MetaDataWrapperDense {
public:
using Iterator = VecWrapperImpl::Iterator<M>;
// The VecWrapperImpl::Iterator is actually const
using ConstIterator = VecWrapperImpl::Iterator<M>;
// The VecWrapperImpl::Iterator iterates in order;
using ConstOrderedIterator = VecWrapperImpl::Iterator<M>;

using value_type = typename M::value_type;

// _________________________________________________________
Expand Down Expand Up @@ -147,12 +152,18 @@ class MetaDataWrapperDense {
return it;
}

// __________________________________________________________________________
ConstOrderedIterator ordered_begin() const { return begin(); }

// __________________________________________________________________________
Iterator cend() const { return Iterator(_vec.size(), _vec.cend(), &_vec); }

// __________________________________________________________________________
Iterator end() const { return Iterator(_vec.size(), _vec.end(), &_vec); }

// __________________________________________________________________________
ConstOrderedIterator ordered_end() const { return end(); }

// ____________________________________________________________
void set(Id id, const value_type& value) {
if (id >= _vec.size()) {
Expand Down Expand Up @@ -206,6 +217,61 @@ class MetaDataWrapperHashMap {
using Iterator = typename hashMap::iterator;
using value_type = typename hashMap::mapped_type;

// An iterator on the underlying hashMap that iterates over the elements
// in order. This is used for deterministically exporting the underlying
// permutation.
class ConstOrderedIterator {
using key_type = typename hashMap::key_type;

const MetaDataWrapperHashMap& wrapper_;
std::vector<key_type> sortedKeys_;
size_t position_;

public:
// ________________________________________________________________________
ConstOrderedIterator(const MetaDataWrapperHashMap& wrapper, size_t position)
: wrapper_{wrapper}, position_{position} {
// Sort all the keys from the underlying hashMap and store them.
sortedKeys_.reserve(wrapper.size());
for (const auto& [key, value] : wrapper_) {
(void)value;
sortedKeys_.push_back(key);
}
std::sort(sortedKeys_.begin(), sortedKeys_.end());
}

// ________________________________________________________________________
const auto& operator*() const {
const auto& m = wrapper_.getUnderlyingHashMap();
return *m.find(sortedKeys_[position_]);
}

// _________________________________________________
std::pair<Id, std::reference_wrapper<const value_type>>* operator->() {
// Call operator* and return a pointer to the result.
// This is safe, because the underlying hashMap ensures the lifetime of
// the returned reference;
return &(**this);
}

// ________________________________________________________________________
ConstOrderedIterator& operator++() {
++position_;
return *this;
}

// _________________________________________________________________________
ConstOrderedIterator operator++(int) {
auto cpy = *this;
++position_;
return cpy;
}

bool operator==(const ConstOrderedIterator& rhs) const {
return position_ == rhs.position_;
}
};

// nothing to do here, since the default constructor of the hashMap does
// everything we want
explicit MetaDataWrapperHashMap() = default;
Expand All @@ -217,19 +283,33 @@ class MetaDataWrapperHashMap {

// __________________________________________________________________
ConstIterator cbegin() const { return _map.begin(); }
ConstIterator begin() const { return _map.begin(); }

// __________________________________________________________________
Iterator begin() { return _map.begin(); }

// _________________________________________________________________________
ConstOrderedIterator ordered_begin() const {
return ConstOrderedIterator{*this, 0};
}

// ____________________________________________________________
ConstIterator cend() const { return _map.end(); }
ConstIterator end() const { return _map.end(); }

// ____________________________________________________________
Iterator end() { return _map.end(); }

// _________________________________________________________________________
ConstOrderedIterator ordered_end() const {
return ConstOrderedIterator{*this, size()};
}

// ____________________________________________________________
void set(Id id, value_type value) { _map[id] = std::move(value); }

const auto& getUnderlyingHashMap() const { return _map; }

// __________________________________________________________
const value_type& getAsserted(Id id) const {
auto it = _map.find(id);
Expand Down
19 changes: 15 additions & 4 deletions src/index/MetaDataIterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ class MetaDataIterator {
public:
MetaDataIterator(const MetaDataType& meta, ad_utility::File file)
: meta_(meta),
_iterator(meta.data().begin()),
_iterator(meta.data().ordered_begin()),
_endIterator(meta.data().ordered_end()),
_buffer_offset(0),
_file(file) {
scanCurrentPos();
Expand All @@ -26,6 +27,10 @@ class MetaDataIterator {
++_buffer_offset;
if (_buffer_offset >= _buffer.size()) {
++_iterator;
if (empty()) {
// don't do anything if we have already reached the end
return *this;
}
scanCurrentPos();
_buffer_offset = 0;
}
Expand All @@ -37,18 +42,24 @@ class MetaDataIterator {
_buffer[_buffer_offset][1]};
}

bool empty() { return _iterator == meta_.data().end(); }
bool empty() const { return _iterator == _endIterator; }

private:
void scanCurrentPos() {
const FullRelationMetaData& rmd = _iterator->second.get();
FullRelationMetaData rmd;
if constexpr (requires { _iterator->second.get(); }) {
rmd = _iterator->second.get();
} else {
rmd = _iterator->second;
}
_buffer.resize(rmd.getNofElements());
_file.read(_buffer.data(), rmd.getNofElements() * 2 * sizeof(Id),
rmd._startFullIndex);
}

const MetaDataType& meta_;
typename MetaDataType::MapType::Iterator _iterator;
typename MetaDataType::MapType::ConstOrderedIterator _iterator;
const typename MetaDataType::MapType::ConstOrderedIterator _endIterator;

// This buffers the results of the scans we need to use to read the relations
std::vector<std::array<Id, 2>> _buffer;
Expand Down
76 changes: 76 additions & 0 deletions src/index/PermutationExporterMain.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
// Copyright 2021, University of Freiburg, Chair of Algorithms and Data
// Structures. Author: Johannes Kalmbach <kalmbacj@cs.uni-freiburg.de>

#include "./Index.h"
#include "./MetaDataIterator.h"

/// Dump a certain permutation to stdout in a human-readable way as IDs, and
/// in deterministic order
template <typename Permutation>
void dumpToStdout(const Permutation& permutation) {
MetaDataIterator it{permutation._meta, permutation._file};
while (!it.empty()) {
auto triple = *it;
std::cout << triple[0] << " " << triple[1] << " " << triple[2] << std::endl;
++it;
}
}

/// Load a certain permutation from a certain index, and dump it to stdout
/// in a human-readable and deterministic way. This can be used for large
/// regression tests, when the index format or the index building procedure
/// changes.
/// Args: ./PermutationExporterMain <indexBasename> <permutation>
/// (<permutation> must be one of pso, pos, spo, sop, osp, ops
int main(int argc, char** argv) {
// Actual output goes to std::cout,output of LOG(...) to std::cerr
ad_utility::setGlobalLogginStream(&std::cerr);

if (argc != 3) {
LOG(ERROR) << "Usage: PermutationExporterMain <indexBasename> <permutation "
"to dump>"
<< std::endl;
return EXIT_FAILURE;
}

Index i;
std::string indexName{argv[1]};
std::string p{argv[2]};
if (p == "sop") {
i.SOP().loadFromDisk(indexName);
dumpToStdout(i.SOP());
return EXIT_SUCCESS;
}
if (p == "spo") {
i.SPO().loadFromDisk(indexName);
dumpToStdout(i.SPO());
return EXIT_SUCCESS;
}
if (p == "osp") {
i.OSP().loadFromDisk(indexName);
dumpToStdout(i.OSP());
return EXIT_SUCCESS;
}
if (p == "ops") {
i.OPS().loadFromDisk(indexName);
dumpToStdout(i.OPS());
return EXIT_SUCCESS;
}

if (p == "pos") {
i.POS().loadFromDisk(indexName);
dumpToStdout(i.POS());
return EXIT_SUCCESS;
}

if (p == "pso") {
i.PSO().loadFromDisk(indexName);
dumpToStdout(i.PSO());
return EXIT_SUCCESS;
}

LOG(ERROR)
<< "<permutation> must be one of pso, pos, spo, sop, osp, ops, but was \""
<< p << "\"" << std::endl;
return EXIT_FAILURE;
}

0 comments on commit c953a00

Please sign in to comment.