Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 13 additions & 4 deletions builtin-functions/kphp-light/stdlib/vkext-functions.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,21 @@ function vk_sp_full_simplify ($str ::: string) ::: string;

function vk_json_encode_safe ($v ::: mixed) ::: string;

// ===== UNSUPPORTED =====

/** @kphp-extern-func-info stub generation-required */
function vk_stats_hll_merge($str ::: mixed) ::: string | false;
/** @kphp-extern-func-info stub generation-required */

function vk_stats_hll_count($hll ::: string) ::: float | false;

function vk_stats_hll_create($a ::: array = array(), $size ::: int = 256) ::: string | false;

function vk_stats_hll_add($hll ::: string, $a ::: array) ::: string | false;

function vk_stats_hll_pack($hll ::: string) ::: string | false;

function vk_stats_hll_unpack($hll ::: string) ::: string | false;

function vk_stats_hll_is_packed($hll ::: string) ::: bool;

// ===== UNSUPPORTED =====

/** @kphp-extern-func-info stub generation-required */
function vk_flex ($name ::: string, $case_name ::: string, $sex ::: int, $type ::: string, $lang_id ::: int = 0) ::: string;
2 changes: 1 addition & 1 deletion runtime-common/stdlib/stdlib.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ prepend(STDLIB_SYSTEM stdlib/system/ system-functions.cpp)
prepend(STDLIB_SERVER stdlib/server/ url-functions.cpp
net-functions.cpp)
prepend(STDLIB_VKEXT stdlib/vkext/ string-processing.cpp
vkext-functions.cpp)
vkext-functions.cpp vkext-stats.cpp)

if(COMPILER_CLANG)
set_source_files_properties(${RUNTIME_COMMON_DIR}/stdlib/vkext/string-processing.cpp PROPERTIES COMPILE_FLAGS -Wno-invalid-source-encoding)
Expand Down
240 changes: 126 additions & 114 deletions runtime/vkext_stats.cpp → runtime-common/stdlib/vkext/vkext-stats.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,93 +2,49 @@
// Copyright (c) 2020 LLC «V Kontakte»
// Distributed under the GPL v3 License, see LICENSE.notice.txt

#include "runtime/vkext_stats.h"
#include "runtime-common/stdlib/vkext/vkext-stats.h"

#include <assert.h>
#include <limits.h>
#include <string.h>
#include <climits>
#include <cstring>

#define HLL_FIRST_RANK_CHAR 0x30
#define HLL_PACK_CHAR '!'
#define HLL_PACK_CHAR_V2 '$'
#define TO_HALF_BYTE(c) ((int)(((c > '9') ? (c - 7) : c) - '0'))
#define MAX_HLL_SIZE (1 << 14)
#define HLL_BUF_SIZE (MAX_HLL_SIZE + 1000)
namespace {

static char hll_buf[HLL_BUF_SIZE];
Comment thread
Shamzik marked this conversation as resolved.
constexpr auto HLL_FIRST_RANK_CHAR = 0x30;
constexpr auto HLL_PACK_CHAR = '!';
constexpr auto HLL_PACK_CHAR_V2 = '$';
constexpr auto MAX_HLL_SIZE = (1 << 14);
constexpr auto HLL_BUF_SIZE = (MAX_HLL_SIZE + 1000);

int to_half_byte(char c) {
return (((c > '9') ? (c - 7) : c) - '0');
}

//////
// hll fuctions
//////

static bool is_hll_unpacked(const string& hll) {
bool is_hll_unpacked(const string& hll) noexcept {
return hll.empty() || (hll[0] != HLL_PACK_CHAR && hll[0] != HLL_PACK_CHAR_V2);
}

static int get_hll_size(const string& hll) {
int get_hll_size(const string& hll) noexcept {
if (is_hll_unpacked(hll)) {
return hll.size();
}
return hll[0] == HLL_PACK_CHAR ? (1 << 8) : (1 << (hll[1] - '0'));
}

Optional<string> f$vk_stats_hll_merge(const array<mixed>& a) {
string result;
char* result_buff = nullptr;
int result_len = -1;
for (array<mixed>::const_iterator it = a.begin(); it != a.end(); ++it) {
if (!it.get_value().is_string()) {
return false;
}
string cur = it.get_value().to_string();
if (result_len == -1) {
result_len = get_hll_size(cur);
result.assign((string::size_type)result_len, (char)HLL_FIRST_RANK_CHAR);
result_buff = result.buffer();
}
if (is_hll_unpacked(cur)) {
if (result_len != cur.size()) {
return false;
}
int i;
for (i = 0; i < result_len; i++) {
if (result_buff[i] < cur[i]) {
result_buff[i] = cur[i];
}
}
} else {
int i = 1 + (cur[0] == HLL_PACK_CHAR_V2);
while (i + 2 < cur.size()) {
int p;
if (cur[0] == HLL_PACK_CHAR) {
p = (TO_HALF_BYTE(cur[i]) << 4) + TO_HALF_BYTE(cur[i + 1]);
} else {
p = (((int)cur[i] - 1) & 0x7f) + (((int)cur[i + 1] - 1) << 7);
}
if (p >= result_len) {
return false;
}
if (result_buff[p] < cur[i + 2]) {
result_buff[p] = cur[i + 2];
}
i += 3;
}
}
}
return result;
}

static int unpack_hll(const string& hll, char* res) {
assert(!is_hll_unpacked(hll));
int unpack_hll(const string& hll, char* res) noexcept {
php_assert(!is_hll_unpacked(hll));
int m = get_hll_size(hll);
int pos = 1 + (hll[0] == HLL_PACK_CHAR_V2);
memset(res, HLL_FIRST_RANK_CHAR, (size_t)m);
memset(res, HLL_FIRST_RANK_CHAR, m);
while (pos + 2 < hll.size()) {
int p;
if (hll[0] == HLL_PACK_CHAR) {
p = (TO_HALF_BYTE(hll[pos]) << 4) + TO_HALF_BYTE(hll[pos + 1]);
p = (to_half_byte(hll[pos]) << 4) + to_half_byte(hll[pos + 1]);
} else {
p = (((int)hll[pos] - 1) & 0x7f) + (((int)hll[pos + 1] - 1) << 7);
p = ((hll[pos] - 1) & 0x7f) + ((hll[pos + 1] - 1) << 7);
}
if (p >= m) {
return -1;
Expand All @@ -104,7 +60,9 @@ static int unpack_hll(const string& hll, char* res) {
return m;
}

static Optional<double> hll_count(const string& hll, int m) {
Optional<double> hll_count(const string& hll, int m) noexcept {
char hll_buf[HLL_BUF_SIZE];

double pow_2_32 = (1LL << 32);
double alpha_m = 0.7213 / (1.0 + 1.079 / m);
char const* s;
Expand Down Expand Up @@ -137,7 +95,7 @@ static Optional<double> hll_count(const string& hll, int m) {
e -= e * (bias / 100.0);
}
} else {
assert(0);
php_assert(0);
}
}
return e;
Expand All @@ -147,17 +105,17 @@ static Optional<double> hll_count(const string& hll, int m) {
* Do not change implementation of this hash function, because hashes may be saved in a permanent storage.
* A full copy of the same function exists in vkext-stats.c in vkext.
*/
static long long dl_murmur64a_hash(const void* data, size_t len) {
assert((len & 7) == 0);
long long dl_murmur64a_hash(const void* data, size_t len) noexcept {
php_assert((len & 7) == 0);
unsigned long long m = 0xc6a4a7935bd1e995;
int r = 47;
unsigned long long h = 0xcafebabeull ^ (m * len);

const unsigned char* start = (const unsigned char*)data;
const unsigned char* start = static_cast<const unsigned char*>(data);
const unsigned char* end = start + len;

while (start != end) {
unsigned long long k = *(unsigned long long*)start;
unsigned long long k = *reinterpret_cast<const unsigned long long*>(start);
k *= m;
k ^= k >> r;
k *= m;
Expand All @@ -166,23 +124,24 @@ static long long dl_murmur64a_hash(const void* data, size_t len) {
start += 8;
}

start = (const unsigned char*)data;
start = static_cast<const unsigned char*>(data);

// It looks like `len & 7 == 0` here
switch (len & 7) {
case 7:
h ^= (unsigned long long)start[6] << 48; /* fallthrough */
h ^= static_cast<unsigned long long>(start[6]) << 48; /* fallthrough */
case 6:
h ^= (unsigned long long)start[5] << 40; /* fallthrough */
h ^= static_cast<unsigned long long>(start[5]) << 40; /* fallthrough */
case 5:
h ^= (unsigned long long)start[4] << 32; /* fallthrough */
h ^= static_cast<unsigned long long>(start[4]) << 32; /* fallthrough */
case 4:
h ^= (unsigned long long)start[3] << 24; /* fallthrough */
h ^= static_cast<unsigned long long>(start[3]) << 24; /* fallthrough */
case 3:
h ^= (unsigned long long)start[2] << 16; /* fallthrough */
h ^= static_cast<unsigned long long>(start[2]) << 16; /* fallthrough */
case 2:
h ^= (unsigned long long)start[1] << 8; /* fallthrough */
h ^= static_cast<unsigned long long>(start[1]) << 8; /* fallthrough */
case 1:
h ^= (unsigned long long)start[0];
h ^= static_cast<unsigned long long>(start[0]);
h *= m;
};

Expand All @@ -192,17 +151,93 @@ static long long dl_murmur64a_hash(const void* data, size_t len) {
return h;
}

static void hll_add_shifted(unsigned char* hll, int hll_size, long long value) {
void hll_add_shifted(unsigned char* hll, int hll_size, long long value) noexcept {
unsigned long long hash = dl_murmur64a_hash(&(value), sizeof(long long));
unsigned int idx = hash >> (64LL - hll_size);
unsigned char rank = (hash == 0) ? 0 : (unsigned char)fmin(__builtin_ctzll(hash) + 1, 64 - hll_size);
unsigned char rank = (hash == 0) ? 0 : static_cast<unsigned char>(fmin(__builtin_ctzll(hash) + 1, 64 - hll_size));
rank += HLL_FIRST_RANK_CHAR;
if (hll[idx] < rank) {
hll[idx] = rank;
}
}

Optional<string> f$vk_stats_hll_add(const string& hll, const array<mixed>& a) {
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ copypaste from common/statistics.c
string hll_pack(const string& s, int len) noexcept {
if (len > MAX_HLL_SIZE || len == 0 || s[0] == HLL_PACK_CHAR || s[0] == HLL_PACK_CHAR_V2) {
return s;
}
unsigned char buf[HLL_BUF_SIZE];
int p = 0;
buf[p++] = HLL_PACK_CHAR_V2;
buf[p++] = '0' + __builtin_ctz(len);
php_assert(__builtin_popcount(len) == 1);
for (int i = 0; i < len; i++) {
if (s[i] > HLL_FIRST_RANK_CHAR) {
if (p + 2 >= len) {
return s;
}
buf[p++] = static_cast<unsigned char>((i & 0x7f) + 1);
buf[p++] = (i >> 7) + 1;
buf[p++] = s[i];
}
php_assert(p < HLL_BUF_SIZE);
}
return {reinterpret_cast<char*>(buf), static_cast<string::size_type>(p)};
}
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

} // namespace

Optional<string> f$vk_stats_hll_merge(const array<mixed>& a) noexcept {
string result;
char* result_buff = nullptr;
int result_len = -1;
for (array<mixed>::const_iterator it = a.begin(); it != a.end(); ++it) {
if (!it.get_value().is_string()) {
return false;
}
string cur = it.get_value().to_string();
if (result_len == -1) {
result_len = get_hll_size(cur);
result.assign(result_len, static_cast<char>(HLL_FIRST_RANK_CHAR));
result_buff = result.buffer();
}
if (is_hll_unpacked(cur)) {
if (result_len != cur.size()) {
return false;
}
int i;
for (i = 0; i < result_len; i++) {
if (result_buff[i] < cur[i]) {
result_buff[i] = cur[i];
}
}
} else {
int i = 1 + (cur[0] == HLL_PACK_CHAR_V2);
while (i + 2 < cur.size()) {
int p;
if (cur[0] == HLL_PACK_CHAR) {
p = (to_half_byte(cur[i]) << 4) + to_half_byte(cur[i + 1]);
} else {
p = ((cur[i] - 1) & 0x7f) + ((cur[i + 1] - 1) << 7);
}
if (p >= result_len) {
return false;
}
if (result_buff[p] < cur[i + 2]) {
result_buff[p] = cur[i + 2];
}
i += 3;
}
}
}
return result;
}

Optional<string> f$vk_stats_hll_add(const string& hll, const array<mixed>& a) noexcept {
auto res = string(HLL_BUF_SIZE, false);
auto hll_buf = res.buffer();

if (!is_hll_unpacked(hll)) {
return false;
}
Expand All @@ -212,19 +247,21 @@ Optional<string> f$vk_stats_hll_add(const string& hll, const array<mixed>& a) {
int hll_size = __builtin_ctz(get_hll_size(hll));
memcpy(hll_buf, hll.c_str(), hll.size());
for (array<mixed>::const_iterator it = a.begin(); it != a.end(); ++it) {
hll_add_shifted((unsigned char*)hll_buf, hll_size, it.get_value().to_int());
hll_add_shifted(reinterpret_cast<unsigned char*>(hll_buf), hll_size, it.get_value().to_int());
}
return string(hll_buf, hll.size());

res.shrink(hll.size());
return res;
Comment thread
Shamzik marked this conversation as resolved.
}

Optional<string> f$vk_stats_hll_create(const array<mixed>& a, int64_t size) {
Optional<string> f$vk_stats_hll_create(const array<mixed>& a, int64_t size) noexcept {
if (size != (1 << 8) && size != (1 << 14)) {
return false;
}
return f$vk_stats_hll_add(string((string::size_type)size, (char)HLL_FIRST_RANK_CHAR), a);
return f$vk_stats_hll_add(string(size, static_cast<char>(HLL_FIRST_RANK_CHAR)), a);
}

Optional<double> f$vk_stats_hll_count(const string& hll) {
Optional<double> f$vk_stats_hll_count(const string& hll) noexcept {
int size = get_hll_size(hll);
if (size == (1 << 8) || size == (1 << 14)) {
return hll_count(hll, size);
Expand All @@ -233,39 +270,14 @@ Optional<double> f$vk_stats_hll_count(const string& hll) {
}
}

// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ copypaste from common/statistics.c
string hll_pack(const string& s, int len) {
if (len > MAX_HLL_SIZE || len == 0 || s[0] == HLL_PACK_CHAR || s[0] == HLL_PACK_CHAR_V2) {
return s;
}
unsigned char buf[HLL_BUF_SIZE];
int p = 0;
buf[p++] = HLL_PACK_CHAR_V2;
buf[p++] = (unsigned char)('0' + (unsigned char)(__builtin_ctz(len)));
assert(__builtin_popcount(len) == 1);
for (int i = 0; i < len; i++) {
if (s[i] > HLL_FIRST_RANK_CHAR) {
if (p + 2 >= len) {
return s;
}
buf[p++] = (unsigned char)((i & 0x7f) + 1);
buf[p++] = (unsigned char)((i >> 7) + 1);
buf[p++] = (unsigned char)s[i];
}
assert(p < HLL_BUF_SIZE);
}
return {(char*)buf, static_cast<string::size_type>(p)};
}
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Optional<string> f$vk_stats_hll_pack(const string& hll) {
Optional<string> f$vk_stats_hll_pack(const string& hll) noexcept {
if (!is_hll_unpacked(hll)) {
return false;
}
return hll_pack(hll, hll.size());
}

Optional<string> f$vk_stats_hll_unpack(const string& hll) {
Optional<string> f$vk_stats_hll_unpack(const string& hll) noexcept {
if (is_hll_unpacked(hll)) {
return false;
}
Expand All @@ -277,6 +289,6 @@ Optional<string> f$vk_stats_hll_unpack(const string& hll) {
return string(res, m);
}

bool f$vk_stats_hll_is_packed(const string& hll) {
bool f$vk_stats_hll_is_packed(const string& hll) noexcept {
return !is_hll_unpacked(hll);
}
Loading
Loading