-
Notifications
You must be signed in to change notification settings - Fork 37
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Prefix Compression and faster startup time #105
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,13 +9,22 @@ | |
#include "./util/File.h" | ||
|
||
// _________________________________________________________ | ||
// Opens an index from disk. Determines whether this index was built by an older | ||
// QLever version and has to be updated in order to use it (efficiently or at | ||
// all) with the current QLever version. Will NOT overwrite existing files but | ||
// create new files with a .converted suffix which has to be manually removed | ||
// to make the index work. It is highly recommended to backup the original index | ||
// before overwriting it like this. | ||
// | ||
// This converter prints detailed information about which files were created and | ||
// which files have to be renamed in ordere to complete the index update | ||
int main(int argc, char** argv) { | ||
if (argc != 2) { | ||
std::cerr << "Usage: ./MetaDataConverterMain <indexPrefix>\n"; | ||
exit(1); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add a bit of additional information on what this does |
||
} | ||
std::string in = argv[1]; | ||
std::array<std::string, 4> sparseNames{".pso", ".pos", ".spo", ".sop"}; | ||
std::array<std::string, 2> sparseNames{".pso", ".pos"}; | ||
for (const auto& n : sparseNames) { | ||
std::string permutName = in + ".index" + n; | ||
if (!ad_utility::File::exists(permutName)) { | ||
|
@@ -24,11 +33,10 @@ int main(int argc, char** argv) { | |
"this index. Skipping\n"; | ||
continue; | ||
} | ||
addMagicNumberToSparseMetaDataPermutation(permutName, | ||
permutName + ".converted"); | ||
convertPermutationToHmap(permutName, permutName + ".converted"); | ||
} | ||
|
||
std::array<std::string, 2> denseNames{".osp", ".ops"}; | ||
std::array<std::string, 4> denseNames{".spo", ".sop", ".osp", ".ops"}; | ||
for (const auto& n : denseNames) { | ||
std::string permutName = in + ".index" + n; | ||
if (!ad_utility::File::exists(permutName)) { | ||
|
@@ -37,7 +45,8 @@ int main(int argc, char** argv) { | |
"this index. Skipping\n"; | ||
continue; | ||
} | ||
convertHmapBasedPermutatationToMmap(permutName, permutName + ".converted", | ||
permutName + MMAP_FILE_SUFFIX); | ||
convertPermutationToMmap(permutName, permutName + ".converted", | ||
permutName + MMAP_FILE_SUFFIX); | ||
} | ||
CompressVocabAndCreateConfigurationFile(in); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
// Copyright 2018, University of Freiburg, | ||
// Chair of Algorithms and Data Structures. | ||
// Author: Johannes Kalmbach<joka921> (johannes.kalmbach@gmail.com) | ||
|
||
#include <iostream> | ||
#include "./global/Constants.h" | ||
#include "./index/PrefixHeuristic.h" | ||
|
||
// Reads a vocabulary of words from file, calculates the prefixes with which the | ||
// greedy heuristic would compress this vocabulary and prints them on the | ||
// screen (mostly for testing and evaluation purposes of the greedy algorithm) | ||
// | ||
// It is assumed, that there are 127 prefixes which are encoded by 1 byte each. | ||
// Also prints some statistics about the compression (e.g. compression ratio) | ||
// | ||
// The vocabulary in the input file at argv[1] must be one word per line and | ||
// alphabetically sorted | ||
// _______________________________________________________________ | ||
int main(int argc, char** argv) { | ||
if (argc != 2) { | ||
std::cerr << "Usage: ./PrefixHeuristicEvaluatorMain <filename>\n"; | ||
std::cerr << "Reads a vocabulary of words from file, calculates the " | ||
"prefixes with which the greedy heuristic would compress this " | ||
"vocabulary and prints them on the" | ||
" screen (mostly for testing and evaluation purposes of the " | ||
"greedy algorithm).\n" | ||
|
||
" It is assumed, that there are 127 prefixes which are " | ||
"encoded by 1 byte each." | ||
" Also prints some statistics about the compression (e.g. " | ||
"compression ratio)\n" | ||
|
||
" The vocabulary in the input file at argv[1] must be one " | ||
"word per line and alphabetically sorted"; | ||
exit(1); | ||
} | ||
|
||
for (const auto& p : | ||
calculatePrefixes(argv[1], 127, NUM_COMPRESSION_PREFIXES, true)) { | ||
std::cout << p << '\n'; | ||
} | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
// Copyright 2018, University of Freiburg, | ||
// Chair of Algorithms and Data Structures. | ||
// Author: Johannes Kalmbach<joka921> (johannes.kalmbach@gmail.com) | ||
|
||
#pragma once | ||
|
||
#include <functional> | ||
#include <string> | ||
|
||
using std::string; | ||
|
||
// Class to store strings that have been compressed. | ||
// Forbids automatic conversion from the compressed strings in the vocabulary to | ||
// "ordinary" strings to avoid bugs. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. good idea |
||
// only implements/inherits functionality from std::string that is actually used | ||
// TODO<niklas> is there a better way to do this? | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, it seems one can do |
||
class CompressedString : private string { | ||
public: | ||
CompressedString() : string() {} | ||
|
||
// explicit conversions from strings | ||
static CompressedString fromString(const string& other) { return other; } | ||
|
||
// ______________________________________________________________ | ||
static CompressedString fromString(string&& other) { | ||
return std::move(other); | ||
} | ||
|
||
// explicit conversions to strings and string_views | ||
string toString() const { return *this; } | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. c++ stl has std::to_string(42); But if this is the convention I am going to change it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No makes sense to keep it in our camel case format. |
||
|
||
// ______________________________________________________ | ||
std::string_view toStringView() const { return *this; } | ||
|
||
// _______________________________________________________ | ||
bool empty() const { return string::empty(); } | ||
|
||
// __________________________________________________________ | ||
const char& operator[](size_t pos) const { return string::operator[](pos); } | ||
|
||
private: | ||
// private constructors and assignments internally used by the to and from | ||
// string conversions | ||
CompressedString(string&& other) : string(std::move(other)){}; | ||
|
||
// _____________________________________________________________ | ||
CompressedString(const string& other) : string(other){}; | ||
|
||
// _____________________________________________________________ | ||
CompressedString& operator=(string&& other) { | ||
*this = CompressedString(std::move(other)); | ||
return *this; | ||
} | ||
|
||
// _______________________________________________________________ | ||
CompressedString& operator=(const string& other) { | ||
*this = CompressedString(other); | ||
return *this; | ||
} | ||
}; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think version 3.2 was released 2 days ago so we might want to make sure we are at that version
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes indeed, how do I choose/fixate a certain commit of a certain branch of the submodule for my project?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
https://stackoverflow.com/a/5828396/692303