ad-freiburg · hannahbast · Jul 23, 2021 · Jul 19, 2021
diff --git a/src/index/Vocabulary.cpp b/src/index/Vocabulary.cpp
@@ -474,6 +474,44 @@ const std::optional<string> Vocabulary<S, C>::idToOptionalString(Id id) const {
     return _externalLiterals[id];
   }
 }
+
+// ___________________________________________________________________________
+template <typename S, typename C>
+ad_utility::HashMap<typename Vocabulary<S, C>::Datatypes, std::pair<Id, Id>>
+Vocabulary<S, C>::getRangesForDatatypes() const {
+  ad_utility::HashMap<Datatypes, std::pair<Id, Id>> result;
+  result[Datatypes::Float] = prefix_range(VALUE_FLOAT_PREFIX);
+  result[Datatypes::Date] = prefix_range(VALUE_DATE_PREFIX);
+  result[Datatypes::Literal] = prefix_range("\"");
+  result[Datatypes::Iri] = prefix_range("<");
+
+  return result;
+};
+
+template <typename S, typename C>
+template <typename, typename>
+void Vocabulary<S, C>::printRangesForDatatypes() {
+  auto ranges = getRangesForDatatypes();
+  auto logRange = [&](const auto& range) {
+    LOG(INFO) << range.first << " " << range.second << '\n';
+    if (range.second > range.first) {
+      LOG(INFO) << idToOptionalString(range.first).value() << '\n';
+      LOG(INFO) << idToOptionalString(range.second - 1).value() << '\n';
+    }
+    if (range.second < _words.size()) {
+      LOG(INFO) << idToOptionalString(range.second).value() << '\n';
+    }
+
+    if (range.first > 0) {
+      LOG(INFO) << idToOptionalString(range.first - 1).value() << '\n';
+    }
+  };
+
+  for (const auto& pair : ranges) {
+    logRange(pair.second);
+  }
+}
+
 template const std::optional<string>
 RdfsVocabulary::idToOptionalString<CompressedString, void>(Id id) const;
 
@@ -492,6 +530,8 @@ template void RdfsVocabulary::prefixCompressFile<CompressedString, void>(
     const string& infile, const string& outfile,
     const vector<string>& prefixes);
 
+template void RdfsVocabulary::printRangesForDatatypes();
+
 template void TextVocabulary::createFromSet<std::string, void>(
     const ad_utility::HashSet<std::string>& set);
 template void TextVocabulary::writeToFile<std::string, void>(

diff --git a/src/index/Vocabulary.h b/src/index/Vocabulary.h
@@ -74,6 +74,9 @@ struct Prefix {
 //! CompressedString -> prefix compression is applied
 template <class StringType, class ComparatorType>
 class Vocabulary {
+  // The different type of data that is stored in the vocabulary
+  enum class Datatypes { Literal, Iri, Float, Date };
+
   template <typename T, typename R = void>
   using enable_if_compressed =
       std::enable_if_t<std::is_same_v<T, CompressedString>>;
@@ -177,6 +180,12 @@ class Vocabulary {
   // consider using the prefixRange function.
   bool getIdRangeForFullTextPrefix(const string& word, IdRange* range) const;
 
+  ad_utility::HashMap<Datatypes, std::pair<Id, Id>> getRangesForDatatypes()
+      const;
+
+  template <typename U = StringType, typename = enable_if_compressed<U>>
+  void printRangesForDatatypes();
+
   // only used during Index building, not needed for compressed vocabulary
   template <typename U = StringType, typename = enable_if_uncompressed<U>>
   void createFromSet(const ad_utility::HashSet<StringType>& set);