First version of Id class for Date/Float/Integer Values in 64 bits us…

…in bit-fiddling. Some unit tests, but no integration yet. Next step: Use for the vocabularyFloats (increase performance)
ad-freiburg · Oct 13, 2020 · 86d7862 · 86d7862
1 parent 8cafcdd
commit 86d7862
Show file tree

Hide file tree

Showing 4 changed files with 388 additions and 4 deletions.
diff --git a/src/global/Id.h b/src/global/Id.h
@@ -4,11 +4,211 @@
 #pragma once
 
 #include <cstdint>
+#include <cstring>
 #include <limits>
+#include <cassert>
+#include <array>
+
+#include "../util/Exception.h"
+template< class To, class From >
+To bit_cast(const From& from) noexcept {
+  static_assert(sizeof(To) == sizeof(From));
+  To t;
+  std::memcpy(&t, &from, sizeof(From));
+  return t;
+}
+
+/// when we really just need an Id
+using Id = uint64_t;
+Id ID_NO_VALUE = std::numeric_limits<Id>::max() - 2;
 
-typedef uint64_t Id;
-typedef uint16_t Score;
 
+class FancyId {
+ public:
+  enum Type : uint8_t {
+    VOCAB = 0,
+    LOCAL_VOCAB = 1,
+    DATE = 2,
+    FLOAT = 3
+  };
+  static constexpr uint64_t INTERNAL_MAX_VAL =~ (3ull << 62);
+  static constexpr uint64_t MAX_VAL = INTERNAL_MAX_VAL - 1;
+  static constexpr uint32_t TAG_MASK = 3ull << 30;
 // A value to use when the result should be empty (e.g. due to an optional join)
-// The highest two values are used as sentinels.
-static const Id ID_NO_VALUE = std::numeric_limits<Id>::max() - 2;
+  static FancyId NoValue () { return FancyId(VOCAB, INTERNAL_MAX_VAL);}
+  // TODO<joka921, C++20> with std::bit_cast this can be constexpr
+  static uint32_t nan() {
+    return bit_cast<uint32_t>(std::numeric_limits<float>::quiet_NaN());
+  }
+
+  // unchecked, undefined behavior if we don't hold a float
+  constexpr float getFloat() const noexcept {
+    assert(type() == FLOAT);
+    return value_.un.f;
+  }
+  // unchecked, undefined behavior if we don't hold any of the uint64_t based types
+  constexpr uint64_t getUnsigned() const noexcept {
+    assert(type() != FLOAT);
+    // get the low order bits
+    uint64_t res = value_.un.rest;
+    res |= static_cast<uint64_t>(value_.tagAndHigh & (~TAG_MASK)) << 32u;
+    return res;
+  }
+
+  /// This constructor leaves the FancyId unitialized for performance reasons and is thus unsafe.
+  /// TODO<joka921> measure, if initializing to some kind of zero makes a difference
+  FancyId() noexcept = default;
+
+  explicit FancyId(float f) {
+    value_.un.f = f;
+    value_.tagAndHigh = std::numeric_limits<uint32_t>::max();
+  }
+
+
+
+  constexpr FancyId(Type t, uint64_t val) : value_() {
+    // low bits
+    value_.un.rest = static_cast<uint32_t>(val);
+    value_.tagAndHigh = val >> 32u;
+    if (t == FLOAT) {
+
+    }
+    if (t == FLOAT) {
+      throw std::runtime_error("Wrong fancyId constructor used, should never happen, please report");
+    }
+    if (val >= INTERNAL_MAX_VAL) {
+      throw std::runtime_error("Value is too big to be represented by a fancy Id");
+    }
+    value_.tagAndHigh |= static_cast<uint32_t>(t) << 30;
+  }
+
+  template <typename F>
+  static FancyId binFloatOp(FancyId a, FancyId b, F f) {
+  int isNan = a.type() != FLOAT || b.type() != FLOAT;
+  uint32_t bitmask = 0;
+  // set all bits either to 0 or to one;
+  for (size_t i = 0; i < 32u; ++i) {
+    bitmask |= isNan << i;
+  }
+
+  FancyId res;
+  res.value_.tagAndHigh = std::numeric_limits<uint32_t>::max();
+  res.value_.un.f = f(a.value_.un.f, b.value_.un.f);
+  auto nan = FancyId::nan();
+  res.value_.un.f = bit_cast<float>((bitmask & nan) | ((~bitmask) & bit_cast<uint32_t>(res.value_.un.f)));
+  return res;
+  }
+
+  constexpr Type type() const {
+    return static_cast<Type>(value_.tagAndHigh >> 30u);
+  }
+
+  friend FancyId operator+(FancyId a, FancyId b) {
+    return FancyId::binFloatOp(a, b, [](float x, float y){return x + y;});
+  }
+
+  friend FancyId operator-(FancyId a, FancyId b) {
+    return FancyId::binFloatOp(a, b, [](float x, float y){return x - y;});
+  }
+
+  friend FancyId operator*(FancyId a, FancyId b) {
+    return FancyId::binFloatOp(a, b, [](float x, float y){return x * y;});
+  }
+
+  friend FancyId operator/(FancyId a, FancyId b) {
+    return FancyId::binFloatOp(a, b, [](float x, float y){return x / y;});
+  }
+
+  template <typename F>
+  static bool compare(FancyId a, FancyId b, F f) {
+    if (a.type() != b.type()) {
+      return f(a.type(), b.type());
+    }
+    if (a.type() == FLOAT) {
+      return f(a.getFloat(), b.getFloat());
+    }
+    return f(a.getUnsigned(), b.getUnsigned());
+  }
+
+  friend bool operator==(FancyId a, FancyId b) {
+    if (a.type() != b.type()) {
+      return false;
+    }
+
+    if (a.type() == FLOAT) {
+      return a.getFloat() == b.getFloat();
+    }
+    return a.getUnsigned() == b.getUnsigned();
+  }
+
+  friend bool operator!=(FancyId a, FancyId b) {
+    if (a.type() != b.type()) {
+      return true;
+    }
+
+    if (a.type() == FLOAT) {
+      return a.getFloat() != b.getFloat();
+    }
+    return a.getUnsigned() != b.getUnsigned();
+  }
+
+  friend bool operator<(FancyId a, FancyId b) {
+    return FancyId::compare(a, b, [](const auto& a, const auto& b){return a < b;});
+  }
+  friend bool operator<=(FancyId a, FancyId b) {
+    return FancyId::compare(a, b, [](const auto& a, const auto& b){return a <= b;});
+  }
+  friend bool operator>(FancyId a, FancyId b) {
+    return FancyId::compare(a, b, [](const auto& a, const auto& b){return a > b;});
+  }
+  friend bool operator>=(FancyId a, FancyId b) {
+    return FancyId::compare(a, b, [](const auto& a, const auto& b){return a >= b;});
+  }
+
+  friend decltype(auto) operator<<(std::ostream& str, const FancyId id) {
+    auto tp = [](const FancyId::Type& t) {
+      switch (t) {
+        case FancyId::VOCAB : return "voc";
+        case FancyId::LOCAL_VOCAB : return "local";
+        case FancyId::DATE : return "date";
+        case FancyId::FLOAT : return "float";
+        default: AD_CHECK(false);
+      }
+    };
+    if (id.type() == FLOAT) {
+      return str << id.getFloat() << 'f';
+    } else {
+      return str << id.getUnsigned() << tp(id.type());
+    }
+  }
+
+
+ private:
+  struct {
+  union {
+    uint32_t rest;
+    float f;
+  } un;
+  uint32_t tagAndHigh;
+  } value_;
+
+
+};
+
+// helper function
+template<size_t I>
+std::array<FancyId, I> makeFancyArray(std::array<size_t, I> arr) {
+  std::array<FancyId, I> res;
+  for (size_t i = 0; i < I; ++i) {
+    res[i] = FancyId(FancyId::VOCAB, arr[i]);
+  }
+  return res;
+}
+
+// another temporary helper function
+inline FancyId fancy(size_t i) {
+  return FancyId(FancyId::VOCAB, i);
+}
+
+typedef uint16_t Score;
+
diff --git a/src/util/Random.h b/src/util/Random.h
@@ -0,0 +1,42 @@
+//
+// Created by johannes on 04.05.20.
+//
+#include<random>
+
+#ifndef QLEVER_RANDOM_H
+#define QLEVER_RANDOM_H
+
+namespace ad_utility {
+
+template <typename INT>
+  class RandomIntGenerator {
+  std::random_device r_;
+  std::default_random_engine engine_;
+  std::uniform_int_distribution<INT> dist_;
+ public:
+  RandomIntGenerator(INT min = std::numeric_limits<INT>::min(), INT max = std::numeric_limits<INT>::max()) :
+    r_(), engine_(r_()), dist_(min, max) {}
+
+  INT operator()() {
+    return dist_(engine_);
+  }
+
+};
+template <typename FLOAT>
+class RandomFloatGenerator {
+  std::random_device r_;
+  std::default_random_engine engine_;
+  std::uniform_real_distribution<FLOAT> dist_;
+
+ public:
+  RandomFloatGenerator(FLOAT min = std::numeric_limits<FLOAT>::min(), FLOAT max = std::numeric_limits<FLOAT>::max()) :
+      r_(), engine_(r_()), dist_(min, max) {}
+
+  FLOAT operator()() {
+    return dist_(engine_);
+  }
+};
+
+}
+
+#endif  // QLEVER_RANDOM_H
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
@@ -141,3 +141,7 @@ target_link_libraries(PriorityQueueTest gtest_main absl::flat_hash_map ${CMAKE_T
 add_executable(SynchronizedTest SynchronizedTest.cpp)
 add_test(SynchronizedTest SynchronizedTest)
 target_link_libraries(SynchronizedTest gtest_main ${CMAKE_THREAD_LIBS_INIT})
+
+add_executable(IdTest IdTest.cpp)
+add_test(IdTest SynchronizedTest)
+target_link_libraries(IdTest gtest_main ${CMAKE_THREAD_LIBS_INIT})