Skip to content

Commit

Permalink
Fixed all bugs from the rebasing and ran clang-format.
Browse files Browse the repository at this point in the history
  • Loading branch information
joka921 committed Jan 11, 2020
1 parent 76aacfd commit 9c801b7
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 41 deletions.
8 changes: 5 additions & 3 deletions src/index/Index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1517,9 +1517,11 @@ void Index::initializeVocabularySettingsBuild() {
}

// ____
// TODO<joka921: are those unused now and can be removed?>_______________________________________________________________________
Id Index::assignNextId(Index::ItemMap* mapPtr, const string& key) {
ItemMap& map = *mapPtr;
// TODO<joka921: are those unused now and can be
// removed?>_______________________________________________________________________
template <class Map>
Id Index::assignNextId(Map* mapPtr, const string& key) {
Map& map = *mapPtr;
if (!map.count(key)) {
Id res = map.size();
map[key] = map.size();
Expand Down
10 changes: 7 additions & 3 deletions src/index/VocabularyGenerator.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,11 +114,12 @@ class VocabularyMerger {
};

// ______________
// TODO<joka921> is this even used anymore?___________________________________________________________________________
// TODO<joka921> is this even used
// anymore?___________________________________________________________________________
template <class Comp>
void writePartialIdMapToBinaryFileForMerging(
std::shared_ptr<const ItemMapArray> map, const string& fileName,
Comp comp, bool doParallelSort);
std::shared_ptr<const ItemMapArray> map, const string& fileName, Comp comp,
bool doParallelSort);

// _________________________________________________________________________________________
ad_utility::HashMap<Id, Id> IdMapFromPartialIdMapFile(
Expand Down Expand Up @@ -170,9 +171,12 @@ std::vector<std::pair<string, Id>> vocabMapsToVector(
/**
* @brief Sort the input in-place according to the strings as compared by the
* StringComparator
* @tparam A binary Function object to compare strings (e.g.
* std::less<std::string>())
* @param doParallelSort if true and USE_PARALLEL_SORT is true, use the gnu
* parallel extension for sorting.
*/
template <class StringSortComparator>
void sortVocabVector(std::vector<std::pair<string, Id>>* vecPtr,
StringSortComparator comp, bool doParallelSort);

Expand Down
54 changes: 21 additions & 33 deletions src/index/VocabularyGeneratorImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -216,8 +216,7 @@ void VocabularyMerger::doActualWrite(
}

// ____________________________________________________________________________________________________________
absl::flat_hash_map<Id, Id> createInternalMapping(
std::vector<std::pair<string, Id>>* elsPtr) {
absl::flat_hash_map<Id, Id> createInternalMapping(std::vector<std::pair<string, Id>>* elsPtr) {
auto& els = *elsPtr;
absl::flat_hash_map<Id, Id> res;
bool first = true;
Expand All @@ -237,34 +236,30 @@ absl::flat_hash_map<Id, Id> createInternalMapping(
}

// ________________________________________________________________________________________________________
void writeMappedIdsToExtVec(const Index::TripleVec& input,
const absl::flat_hash_map<Id, Id>& map,
Index::TripleVec::bufwriter_type* writePtr) {
void writeMappedIdsToExtVec(const TripleVec& input, const absl::flat_hash_map<Id, Id>& map,
TripleVec::bufwriter_type* writePtr) {
auto& writer = *writePtr;
for (const auto& curTriple : input) {
// for all triple elements find their mapping from partial to global ids
Index::HashMap<Id, Id>::const_iterator iterators[3];
absl::flat_hash_map<Id, Id>::const_iterator iterators[3];
for (size_t k = 0; k < 3; ++k) {
iterators[k] = map.find(curTriple[k]);
if (iterators[k] == map.end()) {
LOG(INFO) << "not found in partial local Vocab: " << curTriple[k]
<< '\n';
LOG(INFO) << "not found in partial local Vocab: " << curTriple[k] << '\n';
AD_CHECK(false);
}
}

// update the Element
writer << array<Id, 3>{
{iterators[0]->second, iterators[1]->second, iterators[2]->second}};
writer << array<Id, 3>{{iterators[0]->second, iterators[1]->second, iterators[2]->second}};
}
}

// _________________________________________________________________________________________________________
void writePartialVocabularyToFile(const std::vector<std::pair<string, Id>>& els,
const string& fileName) {
LOG(INFO) << "Writing vocabulary to binary file " << fileName << "\n";
std::ofstream out(fileName.c_str(),
std::ios_base::out | std::ios_base::binary);
std::ofstream out(fileName.c_str(), std::ios_base::out | std::ios_base::binary);
AD_CHECK(out.is_open());
for (const auto& el : els) {
std::string_view word = el.first;
Expand All @@ -280,23 +275,20 @@ void writePartialVocabularyToFile(const std::vector<std::pair<string, Id>>& els,

// ______________________________________________________________________________________________
template <class Pred>
void writePartialIdMapToBinaryFileForMerging(
std::shared_ptr<const ItemMapArray> map, const string& fileName,
Pred comp, const bool doParallelSort) {
void writePartialIdMapToBinaryFileForMerging(std::shared_ptr<const ItemMapArray> map,
const string& fileName, Pred comp,
const bool doParallelSort) {
LOG(INFO) << "Creating partial vocabulary from set ...\n";
std::vector<std::pair<string, Id>> els;
size_t totalEls = std::accumulate(
map->begin(), map->end(), 0,
[](const auto& x, const auto& y) { return x + y.size(); });
size_t totalEls = std::accumulate(map->begin(), map->end(), 0,
[](const auto& x, const auto& y) { return x + y.size(); });
els.reserve(totalEls);
for (const auto& singleMap : *map) {
els.insert(end(els), begin(singleMap), end(singleMap));
}
LOG(INFO) << "... sorting ...\n";

auto pred = [comp](const auto& p1, const auto& p2) {
return comp(p1.first, p2.first);
};
auto pred = [comp](const auto& p1, const auto& p2) { return comp(p1.first, p2.first); };
if constexpr (USE_PARALLEL_SORT) {
if (doParallelSort) {
__gnu_parallel::sort(begin(els), end(els), pred,
Expand All @@ -314,12 +306,10 @@ void writePartialIdMapToBinaryFileForMerging(
}

// __________________________________________________________________________________________________
std::vector<std::pair<string, Id>> vocabMapsToVector(
std::shared_ptr<const ItemMapArray> map) {
std::vector<std::pair<string, Id>> vocabMapsToVector(std::shared_ptr<const ItemMapArray> map) {
std::vector<std::pair<string, Id>> els;
size_t totalEls = std::accumulate(
map->begin(), map->end(), 0,
[](const auto& x, const auto& y) { return x + y.size(); });
size_t totalEls = std::accumulate(map->begin(), map->end(), 0,
[](const auto& x, const auto& y) { return x + y.size(); });
els.reserve(totalEls);
for (const auto& singleMap : *map) {
els.insert(end(els), begin(singleMap), end(singleMap));
Expand All @@ -328,12 +318,11 @@ std::vector<std::pair<string, Id>> vocabMapsToVector(
}

// _______________________________________________________________________________________________________________________
void sortVocabVector(std::vector<std::pair<string, Id>>* vecPtr,
StringSortComparator comp, const bool doParallelSort) {
template <class StringSortComparator>
void sortVocabVector(std::vector<std::pair<string, Id>>* vecPtr, StringSortComparator comp,
const bool doParallelSort) {
auto& els = *vecPtr;
auto pred = [comp](const auto& p1, const auto& p2) {
return comp(p1.first, p2.first);
};
auto pred = [comp](const auto& p1, const auto& p2) { return comp(p1.first, p2.first); };
if constexpr (USE_PARALLEL_SORT) {
if (doParallelSort) {
__gnu_parallel::sort(begin(els), end(els), pred,
Expand All @@ -348,8 +337,7 @@ void sortVocabVector(std::vector<std::pair<string, Id>>* vecPtr,
}

// _____________________________________________________________________
ad_utility::HashMap<Id, Id> IdMapFromPartialIdMapFile(
const string& mmapFilename) {
ad_utility::HashMap<Id, Id> IdMapFromPartialIdMapFile(const string& mmapFilename) {
ad_utility::HashMap<Id, Id> res;
IdPairMMapVecView vec(mmapFilename);
for (const auto [partialId, globalId] : vec) {
Expand Down
4 changes: 2 additions & 2 deletions test/VocabularyGeneratorTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -226,8 +226,8 @@ TEST(VocabularyGenerator, ReadAndWritePartial) {
std::string basename = "_tmp_testidx";
auto ptr = std::make_shared<const Index::ItemMapArray>(std::move(arr));
writePartialIdMapToBinaryFileForMerging(
ptr, basename + PARTIAL_VOCAB_FILE_NAME + "0",
std::less<std::string>());
ptr, basename + PARTIAL_VOCAB_FILE_NAME + "0", std::less<std::string>(),
false);

{
VocabularyMerger m;
Expand Down

0 comments on commit 9c801b7

Please sign in to comment.