Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,42 @@ This product includes code from Apache ORC.
Copyright: 2013 and onwards The Apache Software Foundation.
Home page: https://orc.apache.org/
License: https://www.apache.org/licenses/LICENSE-2.0

--------------------------------------------------------------------------------

This product includes code from xxHash.

* MMH_rotl32 utility in src/paimon/common/utils/murmurhash_utils.h

Copyright: 2012-2023 Yann Collet
Home page: https://www.xxhash.com
License: https://opensource.org/license/bsd-2-clause

BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:

* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

--------------------------------------------------------------------------------

This product includes code from cppjieba.
Expand Down
3 changes: 3 additions & 0 deletions NOTICE
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,8 @@ This product includes software from RocksDB project (Apache 2.0 and BSD 3-clause
Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
Copyright (c) 2011 The LevelDB Authors. All rights reserved.

This product includes software from xxHash project (BSD 2-clause)
Copyright (C) 2012-2023 Yann Collet

This product includes software from cppjieba project (MIT)
Copyright 2013
92 changes: 92 additions & 0 deletions src/paimon/common/utils/concurrent_hash_map.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#pragma once

#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <optional>
#include <string>
#include <vector>

#include "paimon/common/utils/murmurhash_utils.h"
#include "tbb/concurrent_hash_map.h"

namespace paimon {
template <typename Key, typename T, typename HashCompare = tbb::tbb_hash_compare<Key>>
class ConcurrentHashMap {
private:
using HashMap = tbb::concurrent_hash_map<Key, T, HashCompare>;

public:
ConcurrentHashMap() = default;
~ConcurrentHashMap() = default;

// No copying allowed
ConcurrentHashMap(const ConcurrentHashMap&) = delete;
void operator=(const ConcurrentHashMap&) = delete;
ConcurrentHashMap(ConcurrentHashMap&&) = delete;
ConcurrentHashMap& operator=(ConcurrentHashMap&&) = delete;

std::optional<T> Find(const Key& key) const {
typename HashMap::const_accessor accessor;
if (hash_map_.find(accessor, key)) {
return accessor->second;
}
return std::nullopt;
}

void Insert(const Key& key, const T& value) {
typename HashMap::accessor accessor;
hash_map_.insert(accessor, key);
accessor->second = value;
}

void Erase(const Key& key) {
typename HashMap::accessor accessor;
if (hash_map_.find(accessor, key)) {
hash_map_.erase(accessor);
}
}

size_t Size() const {
return hash_map_.size();
}

private:
HashMap hash_map_;
};

class VectorStringHashCompare {
public:
size_t hash(const std::vector<std::string>& key) const {
int32_t ret = MurmurHashUtils::DEFAULT_SEED;
for (const auto& s : key) {
ret = MurmurHashUtils::HashUnsafeBytes(reinterpret_cast<const void*>(s.data()),
/*offset=*/0, s.size(), ret);
}
return ret;
}

bool equal(const std::vector<std::string>& a, const std::vector<std::string>& b) const {
return a == b;
}
};
} // namespace paimon
157 changes: 157 additions & 0 deletions src/paimon/common/utils/concurrent_hash_map_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#include "paimon/common/utils/concurrent_hash_map.h"

#include <unistd.h>

#include <cstdlib>
#include <functional>
#include <thread>

#include "gtest/gtest.h"
#include "paimon/testing/utils/testharness.h"

namespace paimon::test {

TEST(ConcurrentHashMapTest, TestSimple) {
ConcurrentHashMap<int32_t, std::string> hash_map;
ASSERT_EQ(hash_map.Find(10), std::nullopt);
hash_map.Insert(1, "a");
hash_map.Insert(2, "b");
hash_map.Insert(3, "c");

ASSERT_EQ(hash_map.Find(1).value(), "a");
ASSERT_EQ(hash_map.Find(2).value(), "b");
ASSERT_EQ(hash_map.Find(3).value(), "c");
ASSERT_EQ(hash_map.Find(10), std::nullopt);
ASSERT_EQ(hash_map.Size(), 3);

hash_map.Erase(2);
ASSERT_EQ(hash_map.Find(2), std::nullopt);
ASSERT_EQ(hash_map.Size(), 2);

// non-exist key
hash_map.Erase(4);
ASSERT_EQ(hash_map.Find(1).value(), "a");
ASSERT_EQ(hash_map.Find(3).value(), "c");
ASSERT_EQ(hash_map.Size(), 2);
}

TEST(ConcurrentHashMapTest, TestVectorStringHashCompare) {
ConcurrentHashMap<std::vector<std::string>, int32_t, VectorStringHashCompare> hash_map;
ASSERT_EQ(hash_map.Find({}), std::nullopt);

hash_map.Insert({"a", "b"}, 1);
hash_map.Insert({"a", "c"}, 2);
hash_map.Insert({"b", "c"}, 3);
hash_map.Insert({}, 4);

ASSERT_EQ(hash_map.Find({"a", "b"}).value(), 1);
ASSERT_EQ(hash_map.Find({"a", "c"}).value(), 2);
ASSERT_EQ(hash_map.Find({"b", "c"}).value(), 3);
ASSERT_EQ(hash_map.Find({}), 4);
ASSERT_EQ(hash_map.Find({"non"}), std::nullopt);
ASSERT_EQ(hash_map.Size(), 4);
}

TEST(ConcurrentHashMapTest, TestMultiThreadInsertAndFindAndDelete) {
int32_t map_size = 1000;
auto insert_task = [&](ConcurrentHashMap<int32_t, std::string>& hash_map) {
for (int32_t i = 0; i < map_size; i++) {
usleep(paimon::test::RandomNumber(0, 9));
hash_map.Insert(i, std::to_string(i + 1));
}
};
auto find_task = [&](ConcurrentHashMap<int32_t, std::string>& hash_map) {
int32_t found = 0, not_found = 0;
for (int32_t i = 0; i < map_size; i++) {
usleep(paimon::test::RandomNumber(0, 9));
auto value = hash_map.Find(i);
if (value) {
ASSERT_EQ(value.value(), std::to_string(i + 1));
found++;
} else {
not_found++;
}
}
ASSERT_EQ(found + not_found, map_size);
};

auto delete_task = [&](ConcurrentHashMap<int32_t, std::string>& hash_map) {
for (int32_t i = 0; i < map_size; i++) {
usleep(paimon::test::RandomNumber(0, 9));
hash_map.Erase(i);
}
};

{
ConcurrentHashMap<int32_t, std::string> hash_map;
// insert
insert_task(hash_map);
// multi-thread find and delete
std::thread thread1(find_task, std::ref(hash_map));
std::thread thread2(delete_task, std::ref(hash_map));

thread1.join();
thread2.join();

// check final states
ASSERT_EQ(hash_map.Size(), 0);
}
{
ConcurrentHashMap<int32_t, std::string> hash_map;
// multi-thread insert and find
std::thread thread1(insert_task, std::ref(hash_map));
std::thread thread2(find_task, std::ref(hash_map));

thread1.join();
thread2.join();

// check final states
ASSERT_EQ(hash_map.Size(), map_size);
for (int32_t i = 0; i < map_size; i++) {
auto value = hash_map.Find(i);
ASSERT_TRUE(value);
ASSERT_EQ(value.value(), std::to_string(i + 1));
}
}
{
ConcurrentHashMap<int32_t, std::string> hash_map;
// multi-thread insert and find and delete
std::thread thread1(insert_task, std::ref(hash_map));
std::thread thread2(find_task, std::ref(hash_map));
std::thread thread3(delete_task, std::ref(hash_map));

thread1.join();
thread2.join();
thread3.join();

// check final states
ASSERT_TRUE(hash_map.Size() >= 0 && hash_map.Size() <= static_cast<size_t>(map_size));
for (int32_t i = 0; i < map_size; i++) {
auto value = hash_map.Find(i);
if (value) {
ASSERT_EQ(value.value(), std::to_string(i + 1));
}
}
}
}

} // namespace paimon::test
Loading