Add a new loglevel called "Timing" (#458)

It includes information about start/end times of operations and their duration, which are only there for developing purposes and not understandable or useful for a typical user.
ad-freiburg · Sep 3, 2021 · d6957ff · d6957ff
1 parent 57da918
commit d6957ff
Show file tree

Hide file tree

Showing 4 changed files with 30 additions and 19 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -180,15 +180,16 @@ set(LOG_LEVEL_ERROR 1)
 set(LOG_LEVEL_WARN 2)
 set(LOG_LEVEL_INFO 3)
 set(LOG_LEVEL_DEBUG 4)
-set(LOG_LEVEL_TRACE 5)
+set(LOG_LEVEL_TIMING 5)
+set(LOG_LEVEL_TRACE 6)
 
 
 if(CMAKE_BUILD_TYPE MATCHES DEBUG)
   set(LOGLEVEL DEBUG CACHE STRING "The loglevel")
 else()
   set(LOGLEVEL INFO CACHE STRING "The loglevel")
 endif()
-set_property(CACHE LOGLEVEL PROPERTY STRINGS FATAL ERROR WARN INFO DEBUG TRACE)
+set_property(CACHE LOGLEVEL PROPERTY STRINGS FATAL ERROR WARN INFO DEBUG TIMING TRACE)
 add_definitions(-DLOGLEVEL=${LOG_LEVEL_${LOGLEVEL}})
 
 add_subdirectory(src/parser)

diff --git a/src/index/Index.cpp b/src/index/Index.cpp
@@ -191,13 +191,17 @@ VocabularyData Index::passFileForVocabulary(const string& filename,
             localWriter << innerOpt.value();
           }
         }
-        if (i % 10000000 == 0) {
-          LOG(INFO) << "Lines (from KB-file) processed: " << i << '\n';
+        if (i % 10'000'000 == 0) {
+          LOG(INFO) << "Lines (from KB-file) processed: " << i << std::endl;
         }
       }
-      LOG(INFO) << "WaitTimes for Pipeline in msecs\n";
+      LOG(TIMING) << "WaitTimes for Pipeline in msecs\n";
       for (const auto& t : p.getWaitingTime()) {
-        LOG(INFO) << t << " msecs\n";
+        LOG(TIMING) << t << " msecs" << std::endl;
+      }
+
+      if constexpr (requires(Parser p) { p.printAndResetQueueStatistics(); }) {
+        parser->printAndResetQueueStatistics();
       }
     }
 
@@ -314,7 +318,7 @@ void Index::convertPartialToGlobalIds(
           {iterators[0]->second, iterators[1]->second, iterators[2]->second}};
 
       ++i;
-      if (i % 10000000 == 0) {
+      if (i % 100'000'000 == 0) {
         LOG(INFO) << "Lines processed: " << i << '\n';
       }
     }
@@ -1544,12 +1548,12 @@ std::future<void> Index::writeNextPartialVocabulary(
       return c(a.second.m_splitVal, b.second.m_splitVal,
                decltype(_vocab)::SortLevel::TOTAL);
     };
-    LOG(INFO) << "Start sorting of vocabulary with #elements: " << vec.size()
-              << std::endl;
+    LOG(TIMING) << "Start sorting of vocabulary with #elements: " << vec.size()
+                << std::endl;
     sortVocabVector(&vec, identicalPred, true);
-    LOG(INFO) << "Finished sorting of vocabulary" << std::endl;
+    LOG(TIMING) << "Finished sorting of vocabulary" << std::endl;
     auto mapping = createInternalMapping(&vec);
-    LOG(INFO) << "Finished creating of Mapping vocabulary" << std::endl;
+    LOG(TIMING) << "Finished creating of Mapping vocabulary" << std::endl;
     auto sz = vec.size();
     // since now adjacent duplicates also have the same Ids, it suffices to
     // compare those
@@ -1564,16 +1568,17 @@ std::future<void> Index::writeNextPartialVocabulary(
     writePartialVocabularyToFile(vec, partialFilename);
     if (vocabPrefixCompressed) {
       // sort according to the actual byte values
-      LOG(INFO) << "Start sorting of vocabulary for prefix compression"
-                << std::endl;
+      LOG(TIMING) << "Start sorting of vocabulary for prefix compression"
+                  << std::endl;
       sortVocabVector(
           &vec, [](const auto& a, const auto& b) { return a.first < b.first; },
           false);
-      LOG(INFO) << "Finished sorting of vocabulary for prefix compression"
-                << std::endl;
+      LOG(TIMING) << "Finished sorting of vocabulary for prefix compression"
+                  << std::endl;
       writePartialVocabularyToFile(vec, partialCompressionFilename);
     }
-    LOG(INFO) << "Finished writing the partial vocabulary" << std::endl;
+    LOG(TIMING) << "Finished writing the partial vocabulary" << std::endl;
+    vec.clear();
   };
 
   return std::async(std::launch::async, std::move(lambda));

diff --git a/src/index/VocabularyGeneratorImpl.h b/src/index/VocabularyGeneratorImpl.h
@@ -91,8 +91,8 @@ VocabularyMerger::VocMergeRes VocabularyMerger::mergeVocabulary(
       sortedBuffer.reserve(_bufferSize);
       // wait for the last batch
 
+      LOG(TIMING) << "A new batch of words is ready" << std::endl;
       if (writeFuture.valid()) {
-        LOG(TRACE) << "Waiting for the asynchronous write to finish\n";
         writeFuture.get();
       }
       writeFuture = std::async(writeTask);
@@ -138,7 +138,7 @@ VocabularyMerger::VocMergeRes VocabularyMerger::mergeVocabulary(
 // ________________________________________________________________________________
 void VocabularyMerger::writeQueueWordsToIdVec(
     const std::vector<QueueWord>& buffer) {
-  LOG(TRACE) << "Start writing a batch of merged words\n";
+  LOG(TIMING) << "Start writing a batch of merged words\n";
 
   // smaller grained buffer for the actual inner write
   auto bufSize = _bufferSize / 5;

diff --git a/src/util/Log.h b/src/util/Log.h
@@ -25,11 +25,13 @@
     ;               \
   else              \
     ad_utility::Log::getLog<x>()  // NOLINT
+
 #define AD_POS_IN_CODE                                                     \
   '[' << ad_utility::getLastPartOfString(__FILE__, '/') << ':' << __LINE__ \
       << "] "  // NOLINT
 
-static constexpr size_t TRACE = 5;
+static constexpr size_t TRACE = 6;
+static constexpr size_t TIMING = 5;
 static constexpr size_t DEBUG = 4;
 static constexpr size_t INFO = 3;
 static constexpr size_t WARN = 2;
@@ -126,6 +128,9 @@ class Log {
     if (LEVEL == TRACE) {
       return "TRACE: ";
     }
+    if (LEVEL == TIMING) {
+      return "TIMING: ";
+    }
     if (LEVEL == DEBUG) {
       return "DEBUG: ";
     }