Skip to content

Commit

Permalink
Updated MetaDataConverter to handle basically any old format
Browse files Browse the repository at this point in the history
- Todo until Merging:
- Unit tests for compressed vocabulary ( they are  currently a little
bit  sparse)
- Better output of the converter (maybe  tell what was created and what
has to be renamed)

- verify, that the converter works as expected (maybe write verification
script);
  • Loading branch information
joka921 committed Aug 22, 2018
1 parent 1d604d0 commit 8291aff
Show file tree
Hide file tree
Showing 5 changed files with 133 additions and 57 deletions.
12 changes: 3 additions & 9 deletions src/MetaDataConverterMain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,7 @@ int main(int argc, char** argv) {
"this index. Skipping\n";
continue;
}
addMagicNumberToSparseMetaDataPermutation(permutName,
permutName + ".converted");
convertPermutationToHmap(permutName, permutName + ".converted");
}

std::array<std::string, 4> denseNames{".spo", ".sop", ".osp", ".ops"};
Expand All @@ -37,12 +36,7 @@ int main(int argc, char** argv) {
"this index. Skipping\n";
continue;
}
// TODO<joka921> determine magic Number and check if this is necessary at
// all
/*convertHmapBasedPermutatationToMmap(permutName, permutName + ".converted",
permutName + MMAP_FILE_SUFFIX);
*/
addBlockListToMmapMetaDataPermutation(permutName,
permutName + ".converted");
convertPermutationToMmap(permutName, permutName + ".converted",
permutName + MMAP_FILE_SUFFIX);
}
}
45 changes: 32 additions & 13 deletions src/index/IndexMetaData.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <stdio.h>
#include <algorithm>
#include <cmath>
#include <exception>
#include <google/sparse_hash_map>
#include "../global/Id.h"
#include "../util/File.h"
Expand All @@ -23,6 +24,16 @@ using std::array;
using std::pair;
using std::vector;

// an exception thrown when we want to construct Mmap meta data from Hmap meta
// data and vice versa
class WrongFormatException : public std::exception {
public:
WrongFormatException(std::string msg) : _msg(std::move(msg)) {}
const char* what() const throw() { return _msg.c_str(); }
private:
std::string _msg;
};

// simple ReturnValue struct
struct VersionInfo {
size_t _version;
Expand All @@ -38,6 +49,19 @@ T readFromBuf(unsigned char** buf) {
return res;
}

// constants for Magic Numbers to separate different types of MetaData;
constexpr size_t MAGIC_NUMBER_MMAP_META_DATA = static_cast<size_t>(-1);
constexpr size_t MAGIC_NUMBER_SPARSE_META_DATA = static_cast<size_t>(-2);
constexpr size_t MAGIC_NUMBER_MMAP_META_DATA_VERSION = static_cast<size_t>(-3);
constexpr size_t MAGIC_NUMBER_SPARSE_META_DATA_VERSION =
static_cast<size_t>(-4);

// constants for meta data versions in case the format is changed again
constexpr size_t V_NO_VERSION = 0; // this is a dummy
constexpr size_t V_BLOCK_LIST_AND_STATISTICS = 1;

constexpr size_t V_CURRENT = V_BLOCK_LIST_AND_STATISTICS;

// Check index_layout.md for explanations (expected comments).
// Removed comments here so that not two places had to be kept up-to-date.

Expand Down Expand Up @@ -115,17 +139,18 @@ class IndexMetaData {
void calculateExpensiveStatistics();
string statistics() const;

size_t getNofTriples() const { return _nofTriples; }
size_t getNofTriples() const { return _totalElements; }

void setName(const string& name) { _name = name; }

const string& getName() const { return _name; }

size_t getNofDistinctC1() const;

size_t getVersion() const { return _version; }

private:
off_t _offsetAfter = 0;
size_t _nofTriples;

string _name;
string _filename;
Expand All @@ -135,6 +160,7 @@ class IndexMetaData {
size_t _totalElements = 0;
size_t _totalBytes = 0;
size_t _totalBlocks = 0;
size_t _version = V_CURRENT;

// friend declaration for external converter function with ugly types
// using IndexMetaDataHmap = IndexMetaData<MetaDataWrapperHashMap>;
Expand All @@ -144,6 +170,10 @@ class IndexMetaData {
MetaDataWrapperDense<ad_utility::MmapVector<FullRelationMetaData>>>;
friend IndexMetaDataMmap convertHmapMetaDataToMmap(
const IndexMetaDataHmapSparse&, const std::string&, bool);
friend IndexMetaDataHmapSparse convertMmapMetaDataToHmap(
const IndexMetaDataMmap&, const std::string&, bool);
friend IndexMetaDataHmapSparse convertMmapMetaDataToHmap(
const IndexMetaDataMmap& mmap, bool verify);

// this way all instantations will be friends with each other,
// but this should not be an issue.
Expand Down Expand Up @@ -177,16 +207,5 @@ using IndexMetaDataMmap = IndexMetaData<
using IndexMetaDataMmapView = IndexMetaData<
MetaDataWrapperDense<ad_utility::MmapVectorView<FullRelationMetaData>>>;

// constants for Magic Numbers to separate different types of MetaData;
const size_t MAGIC_NUMBER_MMAP_META_DATA = static_cast<size_t>(-1);
const size_t MAGIC_NUMBER_SPARSE_META_DATA = static_cast<size_t>(-2);
const size_t MAGIC_NUMBER_MMAP_META_DATA_VERSION = static_cast<size_t>(-3);
const size_t MAGIC_NUMBER_SPARSE_META_DATA_VERSION = static_cast<size_t>(-4);

// constants for meta data versions in case the format is changed again
constexpr size_t V_NO_VERSION = 0; // this is a dummy
constexpr size_t V_BLOCK_LIST_AND_STATISTICS = 1;

constexpr size_t V_CURRENT = V_BLOCK_LIST_AND_STATISTICS;

#include "./IndexMetaDataImpl.h"
38 changes: 20 additions & 18 deletions src/index/IndexMetaDataImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ void IndexMetaData<MapType>::createFromByteBuffer(unsigned char* buf) {
// read magic number
auto v = parseMagicNumberAndVersioning(buf);
size_t version = v._version;
_version = version;
buf += v._nOfBytes;

size_t nameLength = readFromBuf<size_t>(&buf);
Expand All @@ -56,14 +57,13 @@ void IndexMetaData<MapType>::createFromByteBuffer(unsigned char* buf) {
_data.setSize(nofRelations);
}
_offsetAfter = readFromBuf<off_t>(&buf);
_nofTriples = 0;

// look for blockData in the already existing mmaped vector
if constexpr (!_isMmapBased) {
// HashMap-based means that FullRMD and Blocks are all stored withing the
// permutation file
for (size_t i = 0; i < nofRelations; ++i) {
FullRelationMetaData rmd;
rmd.createFromByteBuffer(buf);
_nofTriples += rmd.getNofElements();
buf += rmd.bytesRequired();
if (rmd.hasBlocks()) {
BlockBasedRelationMetaData bRmd;
Expand All @@ -75,10 +75,10 @@ void IndexMetaData<MapType>::createFromByteBuffer(unsigned char* buf) {
}
}
} else {
// MmapBased
if (version < V_BLOCK_LIST_AND_STATISTICS) {
for (auto it = _data.cbegin(); it != _data.cend(); ++it) {
const FullRelationMetaData& rmd = (*it).second;
_nofTriples += rmd.getNofElements();
if (rmd.hasBlocks()) {
BlockBasedRelationMetaData bRmd;
bRmd.createFromByteBuffer(buf);
Expand All @@ -92,6 +92,8 @@ void IndexMetaData<MapType>::createFromByteBuffer(unsigned char* buf) {
}
calculateExpensiveStatistics();
} else {
// version >= V_BLOCK_LIST_AND_STATISTICS, no need to touch Relations that
// don't have blocks
size_t numBlockData = readFromBuf<size_t>(&buf);
for (size_t i = 0; i < numBlockData; ++i) {
Id id = readFromBuf<Id>(&buf);
Expand All @@ -108,6 +110,8 @@ void IndexMetaData<MapType>::createFromByteBuffer(unsigned char* buf) {
_totalElements = readFromBuf<size_t>(&buf);
_totalBytes = readFromBuf<size_t>(&buf);
_totalBlocks = readFromBuf<size_t>(&buf);
} else {
calculateExpensiveStatistics();
}
}
// _____________________________________________________________________________
Expand Down Expand Up @@ -294,13 +298,12 @@ VersionInfo IndexMetaData<MapType>::parseMagicNumberAndVersioning(
if constexpr (!_isMmapBased) {
if (magicNumber == MAGIC_NUMBER_MMAP_META_DATA ||
magicNumber == MAGIC_NUMBER_MMAP_META_DATA_VERSION) {
LOG(INFO)
<< "ERROR: magic number of MetaData indicates that we are trying "
"to construct a hashMap based IndexMetaData from mmap-based meta "
"data. This is not validx."
"Please use ./MetaDataConverterMain"
"to convert old indices without rebuilding them (See README.md). "
"Terminating...\n";
throw WrongFormatException(
"ERROR: magic number of MetaData indicates that we are trying "
"to construct a hashMap based IndexMetaData from mmap-based meta "
"data. This is not valid."
"Please use ./MetaDataConverterMain"
"to convert old indices without rebuilding them (See README.md).\n");
AD_CHECK(false);
} else if (magicNumber == MAGIC_NUMBER_SPARSE_META_DATA) {
hasVersion = false;
Expand All @@ -321,12 +324,12 @@ VersionInfo IndexMetaData<MapType>::parseMagicNumberAndVersioning(
hasVersion = true;
nOfBytes = sizeof(size_t);
} else {
LOG(INFO) << "ERROR: No or wrong magic number found in persistent "
"mmap-based meta data. "
"Please use ./MetaDataConverterMain "
"to convert old indices without rebuilding them (See "
"README.md).Terminating...\n";
AD_CHECK(false);
throw WrongFormatException(
"ERROR: No or wrong magic number found in persistent "
"mmap-based meta data. "
"Please use ./MetaDataConverterMain "
"to convert old indices without rebuilding them (See "
"README.md).Terminating...\n");
}
}

Expand All @@ -352,4 +355,3 @@ VersionInfo IndexMetaData<MapType>::parseMagicNumberAndVersioning(
}
return res;
}

79 changes: 66 additions & 13 deletions src/index/MetaDataConverter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,22 @@ MmapHandler convertHmapHandlerToMmap(const MetaDataWrapperHashMapSparse& hmap,
return res;
}

// ___________________________________________________________________________
MetaDataWrapperHashMapSparse convertMmapHandlerToHmap(const MmapHandler& mmap) {
MetaDataWrapperHashMapSparse res;
for (auto it = mmap.cbegin(); it != mmap.cend(); ++it) {
res.set(it->first, it->second);
}
return res;
}

// _______________________________________________________________________
IndexMetaDataMmap convertHmapMetaDataToMmap(const IndexMetaDataHmapSparse& hmap,
const std::string& filename,
bool verify) {
IndexMetaDataMmap res;
res._offsetAfter = hmap._offsetAfter;
res._nofTriples = hmap._nofTriples;
res._totalElements = hmap._totalElements;
res._name = hmap._name;
res._filename = hmap._filename;
res._data = convertHmapHandlerToMmap(hmap._data, filename);
Expand All @@ -57,22 +66,66 @@ IndexMetaDataMmap convertHmapMetaDataToMmap(const IndexMetaDataHmapSparse& hmap,
return res;
}

// _______________________________________________________________________
IndexMetaDataHmapSparse convertMmapMetaDataToHmap(const IndexMetaDataMmap& mmap,
bool verify) {
IndexMetaDataHmapSparse res;
res._offsetAfter = mmap._offsetAfter;
res._totalElements = mmap._totalElements;
res._name = mmap._name;
res._filename = mmap._filename;
res._data = convertMmapHandlerToHmap(mmap._data);
res._blockData = mmap._blockData;

if (verify) {
for (auto it = res._data.cbegin(); it != res._data.cend(); ++it) {
if (mmap._data.getAsserted(it->first) != it->second) {
std::cerr << "mismatch in converted Meta data, exiting\n";
exit(1);
}
}
for (auto it = mmap._data.cbegin(); it != mmap._data.cend(); ++it) {
if (res._data.getAsserted(it->first) != it->second) {
std::cerr << "mismatch in converted Meta data, exiting\n";
exit(1);
}
}
}
return res;
}

// ______________________________________________________________________
void convertHmapBasedPermutatationToMmap(const string& permutIn,
const string& permutOut,
const string& mmap, bool verify) {
IndexMetaDataHmapSparse h;
h.readFromFile(permutIn);
IndexMetaDataMmap m = convertHmapMetaDataToMmap(h, mmap, verify);
writeNewPermutation(permutIn, permutOut, m);
void convertPermutationToMmap(const string& permutIn, const string& permutOut,
const string& mmap, bool verify) {
try {
IndexMetaDataHmapSparse h;
h.readFromFile(permutIn);
IndexMetaDataMmap m = convertHmapMetaDataToMmap(h, mmap, verify);
writeNewPermutation(permutIn, permutOut, m);
} catch (const WrongFormatException& e) {
std::cerr << "this is not a sparse permutation, Trying to read as Mmap";
IndexMetaDataMmap m;
m.readFromFile(permutIn);
if (m.getVersion() < V_CURRENT) {
writeNewPermutation(permutIn, permutOut, m);
}
}
}

// _________________________________________________________________________
void addMagicNumberToSparseMetaDataPermutation(const string& permutIn,
const string& permutOut) {
IndexMetaDataHmapSparse h;
h.readFromFile(permutIn);
writeNewPermutation(permutIn, permutOut, h);
void convertPermutationToHmap(const string& permutIn, const string& permutOut,
bool verify) {
try {
IndexMetaDataHmapSparse h;
h.readFromFile(permutIn);
writeNewPermutation(permutIn, permutOut, h);
} catch (const WrongFormatException& e) {
std::cerr << "this is not a sparse permutation, Trying to read as Mmap";
IndexMetaDataMmap m;
m.readFromFile(permutIn);
IndexMetaDataHmapSparse h = convertMmapMetaDataToHmap(m, verify);
writeNewPermutation(permutIn, permutOut, h);
}
}

// _________________________________________________________________________
Expand Down
16 changes: 12 additions & 4 deletions src/index/MetaDataConverter.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,22 +12,30 @@ using MmapHandler = MetaDataWrapperDense<MmapVector<FullRelationMetaData>>;
MmapHandler convertHmapHandlerToMmap(const MetaDataWrapperHashMapSparse& hmap,
const std::string& filename);

// _______________________________________________________________________
MmapHandler convertHmapHandlerToMmap(const MmapHandler& mmap);

// _______________________________________________________________________
IndexMetaDataMmap convertHmapMetaDataToMmap(const IndexMetaDataHmap& hmap,
const std::string& filename,
bool verify);

// _______________________________________________________________________
IndexMetaDataHmapSparse convertMmapMetaDataToHmap(const IndexMetaDataMmap& mmap,
bool verify);

// Convert hashmap based permutation to mmap-based permutation
// Arguments:
// permutIn: path to permutation with hash map based meta data
// permutOut: path to File where mmap based permutation is written. This file
// will be overwritten
// mmap : path to file were the persistent mmap vector will be stored (will be
// overwritten)
void convertHmapBasedPermutatationToMmap(const string& permutIn,
const string& permutOut,
const string& mmap,
bool verify = true);
void convertPermutationToMmap(const string& permutIn, const string& permutOut,
const string& mmap, bool verify = true);

void convertPermutationToHmap(const string& permutIn, const string& permutOut,
bool verify = true);

// Copy hashMap based permutation and update meta data format (add magic number)
// permutIn is read, permutOut is (over)written.
Expand Down

0 comments on commit 8291aff

Please sign in to comment.