Skip to content

Commit

Permalink
Added magic number to MetaData
Browse files Browse the repository at this point in the history
Integrated madvise wrapper into MmapVector

Added Tests for OSP scan + review

-small changes from Niklas' review
-Unit tests for readonly mode of MmapVector
-e2e-test for OSP scan

Added MmapVectorView and unit tests for it

Todo: Comment MmapVectorView
- Create MmapVectorView based MetaData type and integrate it into the Index
  class

Integration of MmmapVectorView to IndexMetaData. Not yet finished

waiting for C++17

Working version with MmapView and C++17
  • Loading branch information
joka921 committed Aug 10, 2018
1 parent 77bde8c commit 7a7ac0a
Show file tree
Hide file tree
Showing 25 changed files with 2,288 additions and 390 deletions.
3 changes: 3 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,8 @@ target_link_libraries (ServerMain engine ${CMAKE_THREAD_LIBS_INIT})
add_executable(WriteIndexListsMain src/WriteIndexListsMain.cpp)
target_link_libraries (WriteIndexListsMain engine ${CMAKE_THREAD_LIBS_INIT})

add_executable(MetaDataConverterMain src/MetaDataConverterMain.cpp)
target_link_libraries (MetaDataConverterMain metaConverter ${CMAKE_THREAD_LIBS_INIT})

#add_executable(TextFilterComparison src/experiments/TextFilterComparison.cpp)
#target_link_libraries (TextFilterComparison experiments)
Expand All @@ -126,3 +128,4 @@ add_test(QueryPlannerTest test/QueryPlannerTest)
add_test(ConversionsTest test/ConversionsTest)
add_test(SparsehashTest test/SparsehashTest)
add_test(VocabularyGeneratorTest test/VocabularyGeneratorTest)
add_test(MmapVectorTest test/MmapVectorTest)
25 changes: 21 additions & 4 deletions e2e/scientists_queries.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ queries:
checks:
- num_cols: 2
# The query returns to many rows, the current limit is 4096
# - num_rows: 5295
# - num_rows: 5295
- selected: ["?place", "?count2"]
- order_numeric: {"dir": "DESC", "var": "?count2"}
- query: scientists-order-by-aggregate-avg
Expand All @@ -109,8 +109,8 @@ queries:
GROUP BY ?profession
ORDER BY ASC((AVG(?height) as ?avg))
checks:
- num_cols: 2
- num_rows: 209
- num_cols: 2
- num_rows: 209
- selected: ["?profession", "?avg2"]
- order_numeric: {"dir": "ASC", "var": "?avg2"}
- query: group-by-profession-average-height
Expand Down Expand Up @@ -174,7 +174,7 @@ queries:
- selected: ["?r", "?count"]
- contains_row: ["<Religion>", 1185]
- order_numeric: {"dir": "DESC", "var": "?count"}
- query : has-predicate-full
- query : has-predicate-full
solutions:
- type: no-text
sparql: |
Expand All @@ -200,3 +200,20 @@ queries:
- num_cols: 2
- selected: ["?entity", "?r"]
- contains_row: ["<Geographer>", "<Profession>"]
- query : full-osp-scan
solutions:
- type: no-text
sparql: |
SELECT DISTINCT ?p WHERE {
?x <is-a> <Scientist> .
?y <is-a> <Scientist> .
?x ?p ?y .
}
checks:
- num_rows: 17
- num_cols: 1
- selected: ["?p"]
- contains_row: ["<Academic_advisor>"]
- contains_row: ["<Named_after>"]
- contains_row: ["<Influenced_By>"]
- contains_row: ["<Production_staff>"]
43 changes: 43 additions & 0 deletions src/MetaDataConverterMain.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
// Copyright 2018, University of Freiburg,
// Chair of Algorithms and Data Structures.
// Author: Johannes Kalmbach (johannes.kalmbach@gmail.com)
//
#include "./index/MetaDataConverter.h"
#include <array>
#include <iostream>
#include "./global/Constants.h"
#include "./util/File.h"

// _________________________________________________________
int main (int argc, char** argv) {
if (argc != 2) {
std::cerr << "Usage: ./MetaDataConverterMain <indexPrefix>\n";
exit(1);
}
std::string in = argv[1];
std::array<std::string, 4> sparseNames{".pso", ".pos", ".spo", ".sop"};
for (const auto& n : sparseNames) {
std::string permutName = in + ".index" + n;
if (!ad_utility::File::exists(permutName)) {
std::cerr << "Permutation file " << permutName
<< " was not found. Maybe not all permutations were built for "
"this index. Skipping\n";
continue;
}
addMagicNumberToSparseMetaDataPermutation(permutName,
permutName + ".converted");
}

std::array<std::string, 2> denseNames{".osp", ".ops"};
for (const auto& n : denseNames) {
std::string permutName = in + ".index" + n;
if (!ad_utility::File::exists(permutName)) {
std::cerr << "Permutation file " << permutName
<< " was not found. Maybe not all permutations were built for "
"this index. Skipping\n";
continue;
}
convertHmapBasedPermutatationToMmap(permutName, permutName + ".converted",
permutName + MMAP_FILE_SUFFIX);
}
}
4 changes: 4 additions & 0 deletions src/global/Constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
// Author: Björn Buchhold (buchhold@informatik.uni-freiburg.de)
#pragma once

#include <string>

static const int STXXL_MEMORY_TO_USE = 1024 * 1024 * 1024;
static const int STXXL_DISK_SIZE_INDEX_BUILDER = 500 * 1000;
static const int STXXL_DISK_SIZE_INDEX_TEST = 10;
Expand Down Expand Up @@ -47,3 +49,5 @@ static const int DEFAULT_NOF_VALUE_INTEGER_DIGITS = 50;
static const int DEFAULT_NOF_VALUE_EXPONENT_DIGITS = 20;
static const int DEFAULT_NOF_VALUE_MANTISSA_DIGITS = 30;
static const int DEFAULT_NOF_DATE_YEAR_DIGITS = 19;

static const std::string MMAP_FILE_SUFFIX = ".meta-mmap";
10 changes: 9 additions & 1 deletion src/index/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,18 @@ add_library(index
VocabularyGenerator.h VocabularyGenerator.cpp
ConstantsIndexCreation.h
ExternalVocabulary.h ExternalVocabulary.cpp
IndexMetaData.h IndexMetaData.cpp
IndexMetaData.h IndexMetaDataImpl.h
MetaDataTypes.h MetaDataTypes.cpp
MetaDataHandler.h
StxxlSortFunctors.h
TextMetaData.cpp TextMetaData.h
DocsDB.cpp DocsDB.h
FTSAlgorithms.cpp FTSAlgorithms.h)

target_link_libraries(index parser ${STXXL_LIBRARIES})

add_library(metaConverter
MetaDataConverter.cpp MetaDataConverter.h)

target_link_libraries(metaConverter index)

2 changes: 1 addition & 1 deletion src/index/ExternalVocabulary.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ class ExternalVocabulary {
private:
mutable ad_utility::File _file;
off_t _startOfOffsets;
size_t _size;
size_t _size = 0;

Id binarySearchInVocab(const string& word) const;
};
16 changes: 8 additions & 8 deletions src/index/Index.Text.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@
// Chair of Algorithms and Data Structures.
// Author: Björn Buchhold (buchhold@informatik.uni-freiburg.de)

#include "./Index.h"
#include <stxxl/algorithm>
#include <tuple>
#include <utility>
#include "../parser/ContextFileParser.h"
#include "../util/Simple8bCode.h"
#include "./FTSAlgorithms.h"
#include "./Index.h"

// _____________________________________________________________________________
void Index::addTextFromContextFile(const string& contextFile) {
Expand Down Expand Up @@ -469,11 +469,10 @@ void Index::createCodebooks(const vector<Index::Posting>& postings,
[](const std::pair<Id, size_t>& a, const std::pair<Id, size_t>& b) {
return a.second > b.second;
});
std::sort(
sfVec.begin(), sfVec.end(),
[](const std::pair<Score, size_t>& a, const std::pair<Score, size_t>& b) {
return a.second > b.second;
});
std::sort(sfVec.begin(), sfVec.end(), [](const std::pair<Score, size_t>& a,
const std::pair<Score, size_t>& b) {
return a.second > b.second;
});
for (size_t j = 0; j < wfVec.size(); ++j) {
wordCodebook.push_back(wfVec[j].first);
wordCodemap[wfVec[j].first] = j;
Expand Down Expand Up @@ -573,8 +572,9 @@ void Index::getWordPostingsForTerm(const string& term, vector<Id>& cids,
entityTerm
? _textMeta.getBlockInfoByEntityId(idRange._first)
: _textMeta.getBlockInfoByWordRange(idRange._first, idRange._last);
if (tbmd._cl.hasMultipleWords() && !(tbmd._firstWordId == idRange._first &&
tbmd._lastWordId == idRange._last)) {
if (tbmd._cl.hasMultipleWords() &&
!(tbmd._firstWordId == idRange._first &&
tbmd._lastWordId == idRange._last)) {
vector<Id> blockCids;
vector<Id> blockWids;
vector<Score> blockScores;
Expand Down

0 comments on commit 7a7ac0a

Please sign in to comment.