Skip to content

Commit

Permalink
Included removing of unused temporary files after Index creation
Browse files Browse the repository at this point in the history
  • Loading branch information
joka921 committed Jul 10, 2018
1 parent 617002e commit 70c845e
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 9 deletions.
23 changes: 20 additions & 3 deletions src/index/Index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ void Index::createFromTsvFile(const string& tsvFile, const string& onDiskBase,
// _____________________________________________________________________________________________
Index::ExtVec Index::createExtVecAndVocabFromNTriples(const string& ntFile,
const string& onDiskBase,
bool onDiskLiterals) {
bool onDiskLiterals, bool keepTempFiles) {
size_t nofLines = passNTriplesFileForVocabulary(ntFile, onDiskLiterals, NUM_TRIPLES_PER_PARTIAL_VOCAB);
if (onDiskLiterals) {
_vocab.externalizeLiteralsFromTextFile(onDiskBase + EXTERNAL_LITS_TEXT_FILE_NAME,
Expand All @@ -98,18 +98,35 @@ Index::ExtVec Index::createExtVecAndVocabFromNTriples(const string& ntFile,
_vocab = Vocabulary();
ExtVec v(nofLines);
passNTriplesFileIntoIdVector(ntFile, v, onDiskLiterals, NUM_TRIPLES_PER_PARTIAL_VOCAB);

if (!keepTempFiles) {
// remove temporary files only used during index creation
LOG(INFO) << "Removing temporary files (partial vocabulary and external text file...\n";
string removeCommand1 = "rm " + onDiskBase + EXTERNAL_LITS_TEXT_FILE_NAME;
bool w1 = system(removeCommand1.c_str());
string removeCommand2 = "rm " + onDiskBase + PARTIAL_VOCAB_FILE_NAME + "*";
bool w2 = system(removeCommand2.c_str());
if (w1 || w2) {
LOG(INFO) << "Warning. Deleting of temporary files probably not successful\n";
} else {
LOG(INFO) << "Done.\n";
}
} else {
LOG(INFO) << "Keeping temporary files (partial vocabulary and external text file...\n";
}
return v;
}

// _____________________________________________________________________________
void Index::createFromNTriplesFile(const string& ntFile,
const string& onDiskBase,
bool allPermutations, bool onDiskLiterals) {
bool allPermutations, bool onDiskLiterals,
bool keepTempFiles) {
_onDiskBase = onDiskBase;
_onDiskLiterals = onDiskLiterals;
string indexFilename = _onDiskBase + ".index";

ExtVec v = createExtVecAndVocabFromNTriples(ntFile, onDiskBase, onDiskLiterals);
ExtVec v = createExtVecAndVocabFromNTriples(ntFile, onDiskBase, onDiskLiterals, keepTempFiles);
LOG(INFO) << "Sorting for PSO permutation..." << std::endl;
stxxl::sort(begin(v), end(v), SortByPSO(), STXXL_MEMORY_TO_USE);
LOG(INFO) << "Sort done." << std::endl;
Expand Down
6 changes: 3 additions & 3 deletions src/index/Index.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ class Index {
// Will write vocabulary and on-disk index data.
// Also ends up with fully functional in-memory metadata.
void createFromNTriplesFile(const string& ntFile, const string& onDiskBase,
bool allPermutations,
bool onDiskLiterals = false);
bool allPermutations, bool onDiskLiterals = false,
bool keepTempFiles = false);

// Creates an index object from an on disk index
// that has previously been constructed.
Expand Down Expand Up @@ -311,7 +311,7 @@ class Index {
// creation once the ExtVec is set up and it would be a waste of RAM
ExtVec createExtVecAndVocabFromNTriples(const string& ntFile,
const string& onDiskBase,
bool onDiskLiterals);
bool onDiskLiterals, bool keepTempFiles);

// ___________________________________________________________________
size_t passNTriplesFileForVocabulary(const string& ntFile,
Expand Down
16 changes: 14 additions & 2 deletions src/index/IndexBuilderMain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ struct option options[] = {{"all-permutations", no_argument, NULL, 'a'},
{"text-index-name", required_argument, NULL, 'T'},
{"words-by-contexts", required_argument, NULL, 'w'},
{"add-text-index", no_argument, NULL, 'A'},
{"keep-temporary-files", no_argument, NULL, 'k'},
{NULL, 0, NULL, 0}};

string getStxxlDiskFileName(const string& location, const string& tail) {
Expand Down Expand Up @@ -97,6 +98,13 @@ void printUsage(char* execName) {
cout << " " << std::setw(20) << "w, words-by-contexts" << std::setw(1)
<< " "
<< "words-file to build text index from." << endl;
cout << " " << std::setw(20) << "A, add-text-index" << std::setw(1)
<< " "
<< "Add text index to already existing kb-index" << endl;
cout << " " << std::setw(20) << "k, keep-temporary-files" << std::setw(1)
<< " "
<< "Keep Temporary Files from IndexCreation (normally only for debugging)"
<< endl;
cout.copyfmt(coutState);
}

Expand All @@ -120,10 +128,11 @@ int main(int argc, char** argv) {
bool onDiskLiterals = false;
bool usePatterns = false;
bool onlyAddTextIndex = false;
bool keepTemporaryFiles = false;
optind = 1;
// Process command line arguments.
while (true) {
int c = getopt_long(argc, argv, "t:n:i:w:d:alT:K:PhA", options, NULL);
int c = getopt_long(argc, argv, "t:n:i:w:d:alT:K:PhAk", options, NULL);
if (c == -1) {
break;
}
Expand Down Expand Up @@ -165,6 +174,9 @@ int main(int argc, char** argv) {
case 'A':
onlyAddTextIndex = true;
break;
case 'k':
keepTemporaryFiles = true;
break;
default:
cout << endl
<< "! ERROR in processing options (getopt returned '" << c
Expand Down Expand Up @@ -233,7 +245,7 @@ int main(int argc, char** argv) {

if (ntFile.size() > 0) {
index.createFromNTriplesFile(ntFile, baseName, allPermutations,
onDiskLiterals);
onDiskLiterals, keepTemporaryFiles);
} else if (tsvFile.size() > 0) {
index.createFromTsvFile(tsvFile, baseName, allPermutations,
onDiskLiterals);
Expand Down
6 changes: 5 additions & 1 deletion test/VocabularyGeneratorTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include <string>
#include <fstream>
#include <cstdlib>
#include <iostream>
#include <ctime>

#include "../src/index/ConstantsIndexCreation.h"
Expand Down Expand Up @@ -43,7 +44,10 @@ class MergeVocabularyTest : public ::testing::Test {
// create random subdirectory in /tmp
std::string tempPath = "/tmp/";
_basePath = tempPath + _basePath + "/";
system(("mkdir -p " + _basePath).c_str());
if (system(("mkdir -p " + _basePath).c_str())) {
// system should return 0 on success
std::cerr << "Could not create subfolder of tmp for test. this might lead to test failures\n";
}

// make paths abolute under created tmp directory
_path0 = _basePath + _path0;
Expand Down

0 comments on commit 70c845e

Please sign in to comment.