-
Notifications
You must be signed in to change notification settings - Fork 37
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #105 from joka921/f.prefixCompressionNew
Prefix Compression and faster startup time
- Loading branch information
Showing
37 changed files
with
1,855 additions
and
455 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
// Copyright 2018, University of Freiburg, | ||
// Chair of Algorithms and Data Structures. | ||
// Author: Johannes Kalmbach<joka921> (johannes.kalmbach@gmail.com) | ||
|
||
#include <iostream> | ||
#include "./global/Constants.h" | ||
#include "./index/PrefixHeuristic.h" | ||
|
||
// Reads a vocabulary of words from file, calculates the prefixes with which the | ||
// greedy heuristic would compress this vocabulary and prints them on the | ||
// screen (mostly for testing and evaluation purposes of the greedy algorithm) | ||
// | ||
// It is assumed, that there are 127 prefixes which are encoded by 1 byte each. | ||
// Also prints some statistics about the compression (e.g. compression ratio) | ||
// | ||
// The vocabulary in the input file at argv[1] must be one word per line and | ||
// alphabetically sorted | ||
// _______________________________________________________________ | ||
int main(int argc, char** argv) { | ||
if (argc != 2) { | ||
std::cerr << "Usage: ./PrefixHeuristicEvaluatorMain <filename>\n"; | ||
std::cerr << "Reads a vocabulary of words from file, calculates the " | ||
"prefixes with which the greedy heuristic would compress this " | ||
"vocabulary and prints them on the" | ||
" screen (mostly for testing and evaluation purposes of the " | ||
"greedy algorithm).\n" | ||
|
||
" It is assumed, that there are 127 prefixes which are " | ||
"encoded by 1 byte each." | ||
" Also prints some statistics about the compression (e.g. " | ||
"compression ratio)\n" | ||
|
||
" The vocabulary in the input file at argv[1] must be one " | ||
"word per line and alphabetically sorted"; | ||
exit(1); | ||
} | ||
|
||
for (const auto& p : | ||
calculatePrefixes(argv[1], 127, NUM_COMPRESSION_PREFIXES, true)) { | ||
std::cout << p << '\n'; | ||
} | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
// Copyright 2018, University of Freiburg, | ||
// Chair of Algorithms and Data Structures. | ||
// Author: Johannes Kalmbach<joka921> (johannes.kalmbach@gmail.com) | ||
|
||
#pragma once | ||
|
||
#include <functional> | ||
#include <string> | ||
|
||
using std::string; | ||
|
||
// Class to store strings that have been compressed. | ||
// Forbids automatic conversion from the compressed strings in the vocabulary to | ||
// "ordinary" strings to avoid bugs. | ||
// only implements/inherits functionality from std::string that is actually used | ||
// TODO<niklas> is there a better way to do this? | ||
class CompressedString : private string { | ||
public: | ||
CompressedString() : string() {} | ||
|
||
// explicit conversions from strings | ||
static CompressedString fromString(const string& other) { return other; } | ||
|
||
// ______________________________________________________________ | ||
static CompressedString fromString(string&& other) { | ||
return std::move(other); | ||
} | ||
|
||
// explicit conversions to strings and string_views | ||
string toString() const { return *this; } | ||
|
||
// ______________________________________________________ | ||
std::string_view toStringView() const { return *this; } | ||
|
||
// _______________________________________________________ | ||
bool empty() const { return string::empty(); } | ||
|
||
// __________________________________________________________ | ||
const char& operator[](size_t pos) const { return string::operator[](pos); } | ||
|
||
private: | ||
// private constructors and assignments internally used by the to and from | ||
// string conversions | ||
CompressedString(string&& other) : string(std::move(other)){}; | ||
|
||
// _____________________________________________________________ | ||
CompressedString(const string& other) : string(other){}; | ||
|
||
// _____________________________________________________________ | ||
CompressedString& operator=(string&& other) { | ||
*this = CompressedString(std::move(other)); | ||
return *this; | ||
} | ||
|
||
// _______________________________________________________________ | ||
CompressedString& operator=(const string& other) { | ||
*this = CompressedString(other); | ||
return *this; | ||
} | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.