Skip to content

Commit

Permalink
First version of Id class for Date/Float/Integer Values in 64 bits us…
Browse files Browse the repository at this point in the history
…in bit-fiddling.

Some unit tests, but no integration yet.
Next step: Use for the vocabularyFloats (increase performance)
  • Loading branch information
joka921 committed Oct 13, 2020
1 parent 8cafcdd commit 86d7862
Show file tree
Hide file tree
Showing 4 changed files with 388 additions and 4 deletions.
208 changes: 204 additions & 4 deletions src/global/Id.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,211 @@
#pragma once

#include <cstdint>
#include <cstring>
#include <limits>
#include <cassert>
#include <array>

#include "../util/Exception.h"
template< class To, class From >
To bit_cast(const From& from) noexcept {
static_assert(sizeof(To) == sizeof(From));
To t;
std::memcpy(&t, &from, sizeof(From));
return t;
}

/// when we really just need an Id
using Id = uint64_t;
Id ID_NO_VALUE = std::numeric_limits<Id>::max() - 2;

typedef uint64_t Id;
typedef uint16_t Score;

class FancyId {
public:
enum Type : uint8_t {
VOCAB = 0,
LOCAL_VOCAB = 1,
DATE = 2,
FLOAT = 3
};
static constexpr uint64_t INTERNAL_MAX_VAL =~ (3ull << 62);
static constexpr uint64_t MAX_VAL = INTERNAL_MAX_VAL - 1;
static constexpr uint32_t TAG_MASK = 3ull << 30;
// A value to use when the result should be empty (e.g. due to an optional join)
// The highest two values are used as sentinels.
static const Id ID_NO_VALUE = std::numeric_limits<Id>::max() - 2;
static FancyId NoValue () { return FancyId(VOCAB, INTERNAL_MAX_VAL);}
// TODO<joka921, C++20> with std::bit_cast this can be constexpr
static uint32_t nan() {
return bit_cast<uint32_t>(std::numeric_limits<float>::quiet_NaN());
}

// unchecked, undefined behavior if we don't hold a float
constexpr float getFloat() const noexcept {
assert(type() == FLOAT);
return value_.un.f;
}
// unchecked, undefined behavior if we don't hold any of the uint64_t based types
constexpr uint64_t getUnsigned() const noexcept {
assert(type() != FLOAT);
// get the low order bits
uint64_t res = value_.un.rest;
res |= static_cast<uint64_t>(value_.tagAndHigh & (~TAG_MASK)) << 32u;
return res;
}

/// This constructor leaves the FancyId unitialized for performance reasons and is thus unsafe.
/// TODO<joka921> measure, if initializing to some kind of zero makes a difference
FancyId() noexcept = default;

explicit FancyId(float f) {
value_.un.f = f;
value_.tagAndHigh = std::numeric_limits<uint32_t>::max();
}



constexpr FancyId(Type t, uint64_t val) : value_() {
// low bits
value_.un.rest = static_cast<uint32_t>(val);
value_.tagAndHigh = val >> 32u;
if (t == FLOAT) {

}
if (t == FLOAT) {
throw std::runtime_error("Wrong fancyId constructor used, should never happen, please report");
}
if (val >= INTERNAL_MAX_VAL) {
throw std::runtime_error("Value is too big to be represented by a fancy Id");
}
value_.tagAndHigh |= static_cast<uint32_t>(t) << 30;
}

template <typename F>
static FancyId binFloatOp(FancyId a, FancyId b, F f) {
int isNan = a.type() != FLOAT || b.type() != FLOAT;
uint32_t bitmask = 0;
// set all bits either to 0 or to one;
for (size_t i = 0; i < 32u; ++i) {
bitmask |= isNan << i;
}

FancyId res;
res.value_.tagAndHigh = std::numeric_limits<uint32_t>::max();
res.value_.un.f = f(a.value_.un.f, b.value_.un.f);
auto nan = FancyId::nan();
res.value_.un.f = bit_cast<float>((bitmask & nan) | ((~bitmask) & bit_cast<uint32_t>(res.value_.un.f)));
return res;
}

constexpr Type type() const {
return static_cast<Type>(value_.tagAndHigh >> 30u);
}

friend FancyId operator+(FancyId a, FancyId b) {
return FancyId::binFloatOp(a, b, [](float x, float y){return x + y;});
}

friend FancyId operator-(FancyId a, FancyId b) {
return FancyId::binFloatOp(a, b, [](float x, float y){return x - y;});
}

friend FancyId operator*(FancyId a, FancyId b) {
return FancyId::binFloatOp(a, b, [](float x, float y){return x * y;});
}

friend FancyId operator/(FancyId a, FancyId b) {
return FancyId::binFloatOp(a, b, [](float x, float y){return x / y;});
}

template <typename F>
static bool compare(FancyId a, FancyId b, F f) {
if (a.type() != b.type()) {
return f(a.type(), b.type());
}
if (a.type() == FLOAT) {
return f(a.getFloat(), b.getFloat());
}
return f(a.getUnsigned(), b.getUnsigned());
}

friend bool operator==(FancyId a, FancyId b) {
if (a.type() != b.type()) {
return false;
}

if (a.type() == FLOAT) {
return a.getFloat() == b.getFloat();
}
return a.getUnsigned() == b.getUnsigned();
}

friend bool operator!=(FancyId a, FancyId b) {
if (a.type() != b.type()) {
return true;
}

if (a.type() == FLOAT) {
return a.getFloat() != b.getFloat();
}
return a.getUnsigned() != b.getUnsigned();
}

friend bool operator<(FancyId a, FancyId b) {
return FancyId::compare(a, b, [](const auto& a, const auto& b){return a < b;});
}
friend bool operator<=(FancyId a, FancyId b) {
return FancyId::compare(a, b, [](const auto& a, const auto& b){return a <= b;});
}
friend bool operator>(FancyId a, FancyId b) {
return FancyId::compare(a, b, [](const auto& a, const auto& b){return a > b;});
}
friend bool operator>=(FancyId a, FancyId b) {
return FancyId::compare(a, b, [](const auto& a, const auto& b){return a >= b;});
}

friend decltype(auto) operator<<(std::ostream& str, const FancyId id) {
auto tp = [](const FancyId::Type& t) {
switch (t) {
case FancyId::VOCAB : return "voc";
case FancyId::LOCAL_VOCAB : return "local";
case FancyId::DATE : return "date";
case FancyId::FLOAT : return "float";
default: AD_CHECK(false);
}
};
if (id.type() == FLOAT) {
return str << id.getFloat() << 'f';
} else {
return str << id.getUnsigned() << tp(id.type());
}
}


private:
struct {
union {
uint32_t rest;
float f;
} un;
uint32_t tagAndHigh;
} value_;


};

// helper function
template<size_t I>
std::array<FancyId, I> makeFancyArray(std::array<size_t, I> arr) {
std::array<FancyId, I> res;
for (size_t i = 0; i < I; ++i) {
res[i] = FancyId(FancyId::VOCAB, arr[i]);
}
return res;
}

// another temporary helper function
inline FancyId fancy(size_t i) {
return FancyId(FancyId::VOCAB, i);
}

typedef uint16_t Score;

42 changes: 42 additions & 0 deletions src/util/Random.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
//
// Created by johannes on 04.05.20.
//
#include<random>

#ifndef QLEVER_RANDOM_H
#define QLEVER_RANDOM_H

namespace ad_utility {

template <typename INT>
class RandomIntGenerator {
std::random_device r_;
std::default_random_engine engine_;
std::uniform_int_distribution<INT> dist_;
public:
RandomIntGenerator(INT min = std::numeric_limits<INT>::min(), INT max = std::numeric_limits<INT>::max()) :
r_(), engine_(r_()), dist_(min, max) {}

INT operator()() {
return dist_(engine_);
}

};
template <typename FLOAT>
class RandomFloatGenerator {
std::random_device r_;
std::default_random_engine engine_;
std::uniform_real_distribution<FLOAT> dist_;

public:
RandomFloatGenerator(FLOAT min = std::numeric_limits<FLOAT>::min(), FLOAT max = std::numeric_limits<FLOAT>::max()) :
r_(), engine_(r_()), dist_(min, max) {}

FLOAT operator()() {
return dist_(engine_);
}
};

}

#endif // QLEVER_RANDOM_H
4 changes: 4 additions & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -141,3 +141,7 @@ target_link_libraries(PriorityQueueTest gtest_main absl::flat_hash_map ${CMAKE_T
add_executable(SynchronizedTest SynchronizedTest.cpp)
add_test(SynchronizedTest SynchronizedTest)
target_link_libraries(SynchronizedTest gtest_main ${CMAKE_THREAD_LIBS_INIT})

add_executable(IdTest IdTest.cpp)
add_test(IdTest SynchronizedTest)
target_link_libraries(IdTest gtest_main ${CMAKE_THREAD_LIBS_INIT})

0 comments on commit 86d7862

Please sign in to comment.