From aa3e13766e39f40c9ab28cc08b2cff88da5ab425 Mon Sep 17 00:00:00 2001 From: Marko Budiselic Date: Sat, 9 Mar 2024 19:21:53 +0000 Subject: [PATCH 01/25] Add custom once cursor --- src/flags/experimental.cpp | 1 + src/flags/experimental.hpp | 1 + src/query/CMakeLists.txt | 3 + src/query/custom_cursors/CMakeLists.txt | 7 +++ src/query/custom_cursors/all.hpp | 15 +++++ src/query/custom_cursors/once.cpp | 29 +++++++++ src/query/custom_cursors/once.hpp | 32 ++++++++++ src/query/custom_cursors/scanall.cpp | 27 +++++++++ src/query/custom_cursors/scanall.hpp | 32 ++++++++++ src/query/plan/cursor.hpp | 80 +++++++++++++++++++++++++ src/query/plan/operator.cpp | 6 ++ src/query/plan/operator.hpp | 52 +--------------- 12 files changed, 234 insertions(+), 51 deletions(-) create mode 100644 src/query/custom_cursors/CMakeLists.txt create mode 100644 src/query/custom_cursors/all.hpp create mode 100644 src/query/custom_cursors/once.cpp create mode 100644 src/query/custom_cursors/once.hpp create mode 100644 src/query/custom_cursors/scanall.cpp create mode 100644 src/query/custom_cursors/scanall.hpp create mode 100644 src/query/plan/cursor.hpp diff --git a/src/flags/experimental.cpp b/src/flags/experimental.cpp index 123903c9693..f1c61d3dbaf 100644 --- a/src/flags/experimental.cpp +++ b/src/flags/experimental.cpp @@ -26,6 +26,7 @@ using namespace std::string_view_literals; namespace memgraph::flags { auto const mapping = std::map{std::pair{"system-replication"sv, Experiments::SYSTEM_REPLICATION}, + std::pair{"alternative-storage"sv, Experiments::ALTERNATIVE_STORAGE}, std::pair{"high-availability"sv, Experiments::HIGH_AVAILABILITY}}; auto ExperimentsInstance() -> Experiments & { diff --git a/src/flags/experimental.hpp b/src/flags/experimental.hpp index 5a19889fe79..6785a4a1ece 100644 --- a/src/flags/experimental.hpp +++ b/src/flags/experimental.hpp @@ -24,6 +24,7 @@ namespace memgraph::flags { enum class Experiments : uint8_t { SYSTEM_REPLICATION = 1 << 0, HIGH_AVAILABILITY = 1 << 1, + ALTERNATIVE_STORAGE = 1 << 2, }; bool AreExperimentsEnabled(Experiments experiments); diff --git a/src/query/CMakeLists.txt b/src/query/CMakeLists.txt index d70ede48297..20ed028ef64 100644 --- a/src/query/CMakeLists.txt +++ b/src/query/CMakeLists.txt @@ -43,6 +43,8 @@ set(mg_query_sources query_user.cpp ) +add_subdirectory(custom_cursors) + add_library(mg-query STATIC ${mg_query_sources}) target_include_directories(mg-query PUBLIC ${CMAKE_SOURCE_DIR}/include) target_link_libraries(mg-query PUBLIC dl @@ -60,6 +62,7 @@ target_link_libraries(mg-query PUBLIC dl mg::system mg-flags mg-dbms + mg_custom_cursors mg-events) if(NOT "${MG_PYTHON_PATH}" STREQUAL "") diff --git a/src/query/custom_cursors/CMakeLists.txt b/src/query/custom_cursors/CMakeLists.txt new file mode 100644 index 00000000000..ddb479f9e9b --- /dev/null +++ b/src/query/custom_cursors/CMakeLists.txt @@ -0,0 +1,7 @@ +set(mg_custom_cursors + once.cpp + scanall.cpp +) +add_library(mg_custom_cursors STATIC ${mg_custom_cursors}) +# TODO(gitbuda): Having mg-storage-v2 here is broken. +target_link_libraries(mg_custom_cursors mg-utils mg-storage-v2) diff --git a/src/query/custom_cursors/all.hpp b/src/query/custom_cursors/all.hpp new file mode 100644 index 00000000000..daaf6de4825 --- /dev/null +++ b/src/query/custom_cursors/all.hpp @@ -0,0 +1,15 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#pragma once + +#include "query/custom_cursors/once.hpp" +#include "query/custom_cursors/scanall.hpp" diff --git a/src/query/custom_cursors/once.cpp b/src/query/custom_cursors/once.cpp new file mode 100644 index 00000000000..a5bfa6086eb --- /dev/null +++ b/src/query/custom_cursors/once.cpp @@ -0,0 +1,29 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#include "query/custom_cursors/once.hpp" +#include "query/context.hpp" +#include "query/interpret/frame.hpp" +#include "spdlog/spdlog.h" +#include "utils/logging.hpp" + +namespace memgraph::query::custom_cursors { + +bool OnceCursor::Pull(Frame & /*unused*/, ExecutionContext & /*unused*/) { + SPDLOG_WARN("Once"); + return false; +} + +void OnceCursor::Shutdown() {} + +void OnceCursor::Reset() {} + +} // namespace memgraph::query::custom_cursors diff --git a/src/query/custom_cursors/once.hpp b/src/query/custom_cursors/once.hpp new file mode 100644 index 00000000000..cf95617e008 --- /dev/null +++ b/src/query/custom_cursors/once.hpp @@ -0,0 +1,32 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#pragma once + +#include "query/plan/cursor.hpp" + +namespace memgraph::query { + +struct ExecutionContext; +class Frame; + +namespace custom_cursors { + +class OnceCursor : public memgraph::query::plan::Cursor { + public: + OnceCursor() = default; + bool Pull(Frame & /*unused*/, ExecutionContext & /*unused*/) override; + void Shutdown() override; + void Reset() override; +}; + +} // namespace custom_cursors +} // namespace memgraph::query diff --git a/src/query/custom_cursors/scanall.cpp b/src/query/custom_cursors/scanall.cpp new file mode 100644 index 00000000000..97a6c312fcf --- /dev/null +++ b/src/query/custom_cursors/scanall.cpp @@ -0,0 +1,27 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#include "query/custom_cursors/scanall.hpp" +#include "query/context.hpp" +#include "query/interpret/frame.hpp" + +namespace memgraph::query::custom_cursors { + +bool ScanAllCursor::Pull(Frame & /*unused*/, ExecutionContext & /*unused*/) { + SPDLOG_WARN("ScanAll"); + return false; +} + +void ScanAllCursor::Shutdown() {} + +void ScanAllCursor::Reset() {} + +} // namespace memgraph::query::custom_cursors diff --git a/src/query/custom_cursors/scanall.hpp b/src/query/custom_cursors/scanall.hpp new file mode 100644 index 00000000000..6cff6fa8fb0 --- /dev/null +++ b/src/query/custom_cursors/scanall.hpp @@ -0,0 +1,32 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#pragma once + +#include "query/plan/cursor.hpp" + +namespace memgraph::query { + +struct ExecutionContext; +class Frame; + +namespace custom_cursors { + +class ScanAllCursor : public memgraph::query::plan::Cursor { + public: + ScanAllCursor() = default; + bool Pull(Frame & /*unused*/, ExecutionContext & /*unused*/) override; + void Shutdown() override; + void Reset() override; +}; + +} // namespace custom_cursors +} // namespace memgraph::query diff --git a/src/query/plan/cursor.hpp b/src/query/plan/cursor.hpp new file mode 100644 index 00000000000..f665c030a92 --- /dev/null +++ b/src/query/plan/cursor.hpp @@ -0,0 +1,80 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#pragma once + +#include +#include + +#include "utils/memory.hpp" + +namespace memgraph::query { + +struct ExecutionContext; +class Frame; + +namespace plan { + +/// Base class for iteration cursors of @c LogicalOperator classes. +/// +/// Each @c LogicalOperator must produce a concrete @c Cursor, which provides +/// the iteration mechanism. +class Cursor { + public: + Cursor() = default; + Cursor(const Cursor &) = delete; + Cursor(Cursor &&) = delete; + Cursor &operator=(const Cursor &) = delete; + Cursor &operator=(Cursor &&) = delete; + virtual ~Cursor() = default; + + /// Run an iteration of a @c LogicalOperator. + /// + /// Since operators may be chained, the iteration may pull results from + /// multiple operators. + /// + /// @param Frame May be read from or written to while performing the + /// iteration. + /// @param ExecutionContext Used to get the position of symbols in frame and + /// other information. + /// + /// @throws QueryRuntimeException if something went wrong with execution + virtual bool Pull(Frame &, ExecutionContext &) = 0; + + /// Resets the Cursor to its initial state. + virtual void Reset() = 0; + + /// Perform cleanup which may throw an exception + virtual void Shutdown() = 0; +}; + +/// unique_ptr to Cursor managed with a custom deleter. +/// This allows us to use utils::MemoryResource for allocation. +using UniqueCursorPtr = std::unique_ptr>; + +template +std::unique_ptr> MakeUniqueCursorPtr(utils::Allocator allocator, + TArgs &&...args) { + auto *ptr = allocator.allocate(1); + try { + auto *cursor = new (ptr) TCursor(std::forward(args)...); + return std::unique_ptr>(cursor, [allocator](Cursor *base_ptr) mutable { + auto *p = static_cast(base_ptr); + p->~TCursor(); + allocator.deallocate(p, 1); + }); + } catch (...) { + allocator.deallocate(ptr, 1); + throw; + } +} +} // namespace plan +} // namespace memgraph::query diff --git a/src/query/plan/operator.cpp b/src/query/plan/operator.cpp index 7cd506050ac..b81d8d3ec0e 100644 --- a/src/query/plan/operator.cpp +++ b/src/query/plan/operator.cpp @@ -27,6 +27,7 @@ #include #include +#include "flags/experimental.hpp" #include "memory/query_memory_control.hpp" #include "query/common.hpp" #include "spdlog/spdlog.h" @@ -35,6 +36,7 @@ #include "license/license.hpp" #include "query/auth_checker.hpp" #include "query/context.hpp" +#include "query/custom_cursors/all.hpp" #include "query/db_accessor.hpp" #include "query/exceptions.hpp" #include "query/frontend/ast/ast.hpp" @@ -199,6 +201,10 @@ bool Once::OnceCursor::Pull(Frame &, ExecutionContext &context) { UniqueCursorPtr Once::MakeCursor(utils::MemoryResource *mem) const { memgraph::metrics::IncrementCounter(memgraph::metrics::OnceOperator); + if (memgraph::flags::AreExperimentsEnabled(memgraph::flags::Experiments::ALTERNATIVE_STORAGE)) { + return MakeUniqueCursorPtr(mem); + } + return MakeUniqueCursorPtr(mem); } diff --git a/src/query/plan/operator.hpp b/src/query/plan/operator.hpp index 6563c2bb0f8..827c5bff53d 100644 --- a/src/query/plan/operator.hpp +++ b/src/query/plan/operator.hpp @@ -22,6 +22,7 @@ #include "query/common.hpp" #include "query/frontend/ast/ast.hpp" #include "query/frontend/semantic/symbol.hpp" +#include "query/plan/cursor.hpp" #include "query/plan/preprocess.hpp" #include "query/typed_value.hpp" #include "storage/v2/id_types.hpp" @@ -34,62 +35,11 @@ namespace memgraph::query { -struct ExecutionContext; class ExpressionEvaluator; -class Frame; class SymbolTable; namespace plan { -/// Base class for iteration cursors of @c LogicalOperator classes. -/// -/// Each @c LogicalOperator must produce a concrete @c Cursor, which provides -/// the iteration mechanism. -class Cursor { - public: - /// Run an iteration of a @c LogicalOperator. - /// - /// Since operators may be chained, the iteration may pull results from - /// multiple operators. - /// - /// @param Frame May be read from or written to while performing the - /// iteration. - /// @param ExecutionContext Used to get the position of symbols in frame and - /// other information. - /// - /// @throws QueryRuntimeException if something went wrong with execution - virtual bool Pull(Frame &, ExecutionContext &) = 0; - - /// Resets the Cursor to its initial state. - virtual void Reset() = 0; - - /// Perform cleanup which may throw an exception - virtual void Shutdown() = 0; - - virtual ~Cursor() = default; -}; - -/// unique_ptr to Cursor managed with a custom deleter. -/// This allows us to use utils::MemoryResource for allocation. -using UniqueCursorPtr = std::unique_ptr>; - -template -std::unique_ptr> MakeUniqueCursorPtr(utils::Allocator allocator, - TArgs &&...args) { - auto *ptr = allocator.allocate(1); - try { - auto *cursor = new (ptr) TCursor(std::forward(args)...); - return std::unique_ptr>(cursor, [allocator](Cursor *base_ptr) mutable { - auto *p = static_cast(base_ptr); - p->~TCursor(); - allocator.deallocate(p, 1); - }); - } catch (...) { - allocator.deallocate(ptr, 1); - throw; - } -} - class Once; class CreateNode; class CreateExpand; From 96c6d65d4a98748de2a141251709665e32d4b47c Mon Sep 17 00:00:00 2001 From: Marko Budiselic Date: Sun, 10 Mar 2024 00:12:28 +0000 Subject: [PATCH 02/25] Add produce and dummy custom_storage --- src/CMakeLists.txt | 2 +- src/query/context.hpp | 2 ++ src/query/custom_cursors/CMakeLists.txt | 7 ++--- src/query/custom_cursors/all.hpp | 1 + src/query/custom_cursors/once.cpp | 13 ++++++--- src/query/custom_cursors/once.hpp | 15 +++++------ src/query/custom_cursors/produce.cpp | 33 +++++++++++++++++++++++ src/query/custom_cursors/produce.hpp | 29 ++++++++++++++++++++ src/query/custom_cursors/scanall.cpp | 16 ++++++++--- src/query/custom_cursors/scanall.hpp | 19 +++++++------ src/query/custom_cursors/utils.hpp | 22 +++++++++++++++ src/query/interpreter.cpp | 3 +++ src/query/plan/operator.cpp | 11 ++++++++ src/storage/CMakeLists.txt | 2 ++ src/storage/custom_storage/CMakeLists.txt | 5 ++++ src/storage/custom_storage/storage.cpp | 22 +++++++++++++++ src/storage/custom_storage/storage.hpp | 21 +++++++++++++++ 17 files changed, 193 insertions(+), 30 deletions(-) create mode 100644 src/query/custom_cursors/produce.cpp create mode 100644 src/query/custom_cursors/produce.hpp create mode 100644 src/query/custom_cursors/utils.hpp create mode 100644 src/storage/CMakeLists.txt create mode 100644 src/storage/custom_storage/CMakeLists.txt create mode 100644 src/storage/custom_storage/storage.cpp create mode 100644 src/storage/custom_storage/storage.hpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 4d5d523c652..bdf8ec065f0 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -9,7 +9,7 @@ add_subdirectory(kvstore) add_subdirectory(telemetry) add_subdirectory(communication) add_subdirectory(memory) -add_subdirectory(storage/v2) +add_subdirectory(storage) add_subdirectory(integrations) add_subdirectory(query) add_subdirectory(glue) diff --git a/src/query/context.hpp b/src/query/context.hpp index f1522053c13..d875195867e 100644 --- a/src/query/context.hpp +++ b/src/query/context.hpp @@ -23,6 +23,7 @@ #include "utils/async_timer.hpp" #include "query/frame_change.hpp" +#include "storage/custom_storage/storage.hpp" namespace memgraph::query { @@ -73,6 +74,7 @@ inline std::vector NamesToLabels(const std::vector custom_storage{nullptr}; DbAccessor *db_accessor{nullptr}; SymbolTable symbol_table; EvaluationContext evaluation_context; diff --git a/src/query/custom_cursors/CMakeLists.txt b/src/query/custom_cursors/CMakeLists.txt index ddb479f9e9b..06b13655bc9 100644 --- a/src/query/custom_cursors/CMakeLists.txt +++ b/src/query/custom_cursors/CMakeLists.txt @@ -1,7 +1,8 @@ -set(mg_custom_cursors +set(mg_custom_cursors_sources once.cpp scanall.cpp + produce.cpp ) -add_library(mg_custom_cursors STATIC ${mg_custom_cursors}) +add_library(mg_custom_cursors STATIC ${mg_custom_cursors_sources}) # TODO(gitbuda): Having mg-storage-v2 here is broken. -target_link_libraries(mg_custom_cursors mg-utils mg-storage-v2) +target_link_libraries(mg_custom_cursors mg-utils mg-storage-v2 mg_custom_storage) diff --git a/src/query/custom_cursors/all.hpp b/src/query/custom_cursors/all.hpp index daaf6de4825..b684d314773 100644 --- a/src/query/custom_cursors/all.hpp +++ b/src/query/custom_cursors/all.hpp @@ -12,4 +12,5 @@ #pragma once #include "query/custom_cursors/once.hpp" +#include "query/custom_cursors/produce.hpp" #include "query/custom_cursors/scanall.hpp" diff --git a/src/query/custom_cursors/once.cpp b/src/query/custom_cursors/once.cpp index a5bfa6086eb..9a310146a6d 100644 --- a/src/query/custom_cursors/once.cpp +++ b/src/query/custom_cursors/once.cpp @@ -11,19 +11,26 @@ #include "query/custom_cursors/once.hpp" #include "query/context.hpp" +#include "query/custom_cursors/utils.hpp" #include "query/interpret/frame.hpp" -#include "spdlog/spdlog.h" +#include "query/plan/scoped_profile.hpp" #include "utils/logging.hpp" namespace memgraph::query::custom_cursors { -bool OnceCursor::Pull(Frame & /*unused*/, ExecutionContext & /*unused*/) { +bool OnceCursor::Pull(Frame & /*unused*/, ExecutionContext &context) { + utils::MemoryTracker::OutOfMemoryExceptionEnabler oom_exception; + memgraph::query::plan::ScopedProfile profile{ComputeProfilingKey(this), "Once", &context}; SPDLOG_WARN("Once"); + if (!did_pull_) { + did_pull_ = true; + return true; + } return false; } void OnceCursor::Shutdown() {} -void OnceCursor::Reset() {} +void OnceCursor::Reset() { did_pull_ = false; } } // namespace memgraph::query::custom_cursors diff --git a/src/query/custom_cursors/once.hpp b/src/query/custom_cursors/once.hpp index cf95617e008..fe37393b7a7 100644 --- a/src/query/custom_cursors/once.hpp +++ b/src/query/custom_cursors/once.hpp @@ -13,20 +13,17 @@ #include "query/plan/cursor.hpp" -namespace memgraph::query { - -struct ExecutionContext; -class Frame; - -namespace custom_cursors { +namespace memgraph::query::custom_cursors { class OnceCursor : public memgraph::query::plan::Cursor { public: OnceCursor() = default; - bool Pull(Frame & /*unused*/, ExecutionContext & /*unused*/) override; + bool Pull(Frame & /*unused*/, ExecutionContext &context) override; void Shutdown() override; void Reset() override; + + private: + bool did_pull_{false}; }; -} // namespace custom_cursors -} // namespace memgraph::query +} // namespace memgraph::query::custom_cursors diff --git a/src/query/custom_cursors/produce.cpp b/src/query/custom_cursors/produce.cpp new file mode 100644 index 00000000000..b31fbd57aa7 --- /dev/null +++ b/src/query/custom_cursors/produce.cpp @@ -0,0 +1,33 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#include "query/custom_cursors/produce.hpp" +#include "query/context.hpp" +#include "query/custom_cursors/utils.hpp" +#include "query/interpret/frame.hpp" +#include "spdlog/spdlog.h" +#include "utils/logging.hpp" + +namespace memgraph::query::custom_cursors { + +ProduceCursor::ProduceCursor(plan::UniqueCursorPtr input_cursor) : input_cursor_(std::move(input_cursor)) {} + +bool ProduceCursor::Pull(Frame &frame, ExecutionContext &context) { + utils::MemoryTracker::OutOfMemoryExceptionEnabler oom_exception; + memgraph::query::plan::ScopedProfile profile{ComputeProfilingKey(this), "Produce", &context}; + SPDLOG_WARN("Produce"); + return input_cursor_->Pull(frame, context); +} + +void ProduceCursor::Shutdown() { input_cursor_->Shutdown(); } + +void ProduceCursor::Reset() { input_cursor_->Reset(); } +} // namespace memgraph::query::custom_cursors diff --git a/src/query/custom_cursors/produce.hpp b/src/query/custom_cursors/produce.hpp new file mode 100644 index 00000000000..1ce9dd1d76b --- /dev/null +++ b/src/query/custom_cursors/produce.hpp @@ -0,0 +1,29 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#pragma once + +#include "query/plan/cursor.hpp" + +namespace memgraph::query::custom_cursors { + +class ProduceCursor : public memgraph::query::plan::Cursor { + public: + explicit ProduceCursor(plan::UniqueCursorPtr input_cursor); + bool Pull(Frame &frame, ExecutionContext &context) override; + void Shutdown() override; + void Reset() override; + + private: + const plan::UniqueCursorPtr input_cursor_; +}; + +} // namespace memgraph::query::custom_cursors diff --git a/src/query/custom_cursors/scanall.cpp b/src/query/custom_cursors/scanall.cpp index 97a6c312fcf..f13066e1171 100644 --- a/src/query/custom_cursors/scanall.cpp +++ b/src/query/custom_cursors/scanall.cpp @@ -11,17 +11,25 @@ #include "query/custom_cursors/scanall.hpp" #include "query/context.hpp" +#include "query/custom_cursors/utils.hpp" #include "query/interpret/frame.hpp" +#include "query/plan/scoped_profile.hpp" namespace memgraph::query::custom_cursors { -bool ScanAllCursor::Pull(Frame & /*unused*/, ExecutionContext & /*unused*/) { +ScanAllCursor::ScanAllCursor(Symbol output_symbol, plan::UniqueCursorPtr input_cursor) + : output_symbol_(std::move(output_symbol)), input_cursor_(std::move(input_cursor)) {} + +bool ScanAllCursor::Pull(Frame &frame, ExecutionContext &context) { + utils::MemoryTracker::OutOfMemoryExceptionEnabler oom_exception; + memgraph::query::plan::ScopedProfile profile{ComputeProfilingKey(this), "ScanAll", &context}; SPDLOG_WARN("ScanAll"); - return false; + context.custom_storage->Call(); + return input_cursor_->Pull(frame, context); } -void ScanAllCursor::Shutdown() {} +void ScanAllCursor::Shutdown() { input_cursor_->Shutdown(); } -void ScanAllCursor::Reset() {} +void ScanAllCursor::Reset() { input_cursor_->Reset(); } } // namespace memgraph::query::custom_cursors diff --git a/src/query/custom_cursors/scanall.hpp b/src/query/custom_cursors/scanall.hpp index 6cff6fa8fb0..46db3be112b 100644 --- a/src/query/custom_cursors/scanall.hpp +++ b/src/query/custom_cursors/scanall.hpp @@ -11,22 +11,21 @@ #pragma once +#include "query/frontend/semantic/symbol.hpp" #include "query/plan/cursor.hpp" -namespace memgraph::query { - -struct ExecutionContext; -class Frame; - -namespace custom_cursors { +namespace memgraph::query::custom_cursors { class ScanAllCursor : public memgraph::query::plan::Cursor { public: - ScanAllCursor() = default; - bool Pull(Frame & /*unused*/, ExecutionContext & /*unused*/) override; + explicit ScanAllCursor(Symbol output_symbol, plan::UniqueCursorPtr input_cursor); + bool Pull(Frame &frame, ExecutionContext &context) override; void Shutdown() override; void Reset() override; + + private: + const Symbol output_symbol_; + const plan::UniqueCursorPtr input_cursor_; }; -} // namespace custom_cursors -} // namespace memgraph::query +} // namespace memgraph::query::custom_cursors diff --git a/src/query/custom_cursors/utils.hpp b/src/query/custom_cursors/utils.hpp new file mode 100644 index 00000000000..562c48ea590 --- /dev/null +++ b/src/query/custom_cursors/utils.hpp @@ -0,0 +1,22 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#pragma once + +#include + +#include "query/plan/scoped_profile.hpp" + +template +uint64_t ComputeProfilingKey(const T *obj) { + static_assert(sizeof(T *) == sizeof(uint64_t)); + return reinterpret_cast(obj); +} diff --git a/src/query/interpreter.cpp b/src/query/interpreter.cpp index ce74586d3d1..e14100386e9 100644 --- a/src/query/interpreter.cpp +++ b/src/query/interpreter.cpp @@ -79,6 +79,7 @@ #include "replication/config.hpp" #include "replication/state.hpp" #include "spdlog/spdlog.h" +#include "storage/custom_storage/storage.hpp" #include "storage/v2/disk/storage.hpp" #include "storage/v2/edge.hpp" #include "storage/v2/edge_import_mode.hpp" @@ -1716,6 +1717,8 @@ PullPlan::PullPlan(const std::shared_ptr plan, const Parameters &pa frame_(plan->symbol_table().max_position(), execution_memory), memory_limit_(memory_limit), use_monotonic_memory_(use_monotonic_memory) { + // TODO(gitbuda): Here shoule be ref to an object which can safely access the custom_storage. + ctx_.custom_storage = std::make_unique(); ctx_.db_accessor = dba; ctx_.symbol_table = plan->symbol_table(); ctx_.evaluation_context.timestamp = QueryTimestamp(); diff --git a/src/query/plan/operator.cpp b/src/query/plan/operator.cpp index b81d8d3ec0e..5808dcf4b03 100644 --- a/src/query/plan/operator.cpp +++ b/src/query/plan/operator.cpp @@ -30,6 +30,8 @@ #include "flags/experimental.hpp" #include "memory/query_memory_control.hpp" #include "query/common.hpp" +#include "query/custom_cursors/produce.hpp" +#include "query/custom_cursors/scanall.hpp" #include "spdlog/spdlog.h" #include "csv/parsing.hpp" @@ -586,6 +588,11 @@ ACCEPT_WITH_INPUT(ScanAll) UniqueCursorPtr ScanAll::MakeCursor(utils::MemoryResource *mem) const { memgraph::metrics::IncrementCounter(memgraph::metrics::ScanAllOperator); + if (memgraph::flags::AreExperimentsEnabled(memgraph::flags::Experiments::ALTERNATIVE_STORAGE)) { + return MakeUniqueCursorPtr(mem, output_symbol_, + input_->MakeCursor(mem)); + } + auto vertices = [this](Frame &, ExecutionContext &context) { auto *db = context.db_accessor; return std::make_optional(db->Vertices(view_)); @@ -2733,6 +2740,10 @@ ACCEPT_WITH_INPUT(Produce) UniqueCursorPtr Produce::MakeCursor(utils::MemoryResource *mem) const { memgraph::metrics::IncrementCounter(memgraph::metrics::ProduceOperator); + if (memgraph::flags::AreExperimentsEnabled(memgraph::flags::Experiments::ALTERNATIVE_STORAGE)) { + return MakeUniqueCursorPtr(mem, input_->MakeCursor(mem)); + } + return MakeUniqueCursorPtr(mem, *this, mem); } diff --git a/src/storage/CMakeLists.txt b/src/storage/CMakeLists.txt new file mode 100644 index 00000000000..a707bd4f3a7 --- /dev/null +++ b/src/storage/CMakeLists.txt @@ -0,0 +1,2 @@ +add_subdirectory(custom_storage) +add_subdirectory(v2) diff --git a/src/storage/custom_storage/CMakeLists.txt b/src/storage/custom_storage/CMakeLists.txt new file mode 100644 index 00000000000..f357bfa91ff --- /dev/null +++ b/src/storage/custom_storage/CMakeLists.txt @@ -0,0 +1,5 @@ +set(mg_custom_storage_sources + storage.cpp +) +add_library(mg_custom_storage STATIC ${mg_custom_storage_sources}) +target_link_libraries(mg_custom_storage mg-utils) diff --git a/src/storage/custom_storage/storage.cpp b/src/storage/custom_storage/storage.cpp new file mode 100644 index 00000000000..52773aeff7f --- /dev/null +++ b/src/storage/custom_storage/storage.cpp @@ -0,0 +1,22 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#pragma once + +#include "storage/custom_storage/storage.hpp" +#include "spdlog/spdlog.h" +#include "utils/logging.hpp" + +namespace memgraph::storage::custom_storage { + +void Storage::Call() { SPDLOG_WARN("Storage Call"); } + +} // namespace memgraph::storage::custom_storage diff --git a/src/storage/custom_storage/storage.hpp b/src/storage/custom_storage/storage.hpp new file mode 100644 index 00000000000..3f319f06d66 --- /dev/null +++ b/src/storage/custom_storage/storage.hpp @@ -0,0 +1,21 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#pragma once + +namespace memgraph::storage::custom_storage { + +class Storage { + public: + void Call(); +}; + +} // namespace memgraph::storage::custom_storage From 664b8d0d10db61fa1b847b849680762d944d73e6 Mon Sep 17 00:00:00 2001 From: Marko Budiselic Date: Sun, 10 Mar 2024 00:47:40 +0000 Subject: [PATCH 03/25] Move custom_storage to memgraph.cpp --- src/memgraph.cpp | 4 ++++ src/query/context.hpp | 2 +- src/query/custom_cursors/once.cpp | 2 +- src/query/interpreter.cpp | 3 +-- src/query/interpreter_context.cpp | 1 + src/query/interpreter_context.hpp | 3 +++ 6 files changed, 11 insertions(+), 4 deletions(-) diff --git a/src/memgraph.cpp b/src/memgraph.cpp index d896bcc4c18..0d54a6285ed 100644 --- a/src/memgraph.cpp +++ b/src/memgraph.cpp @@ -10,6 +10,7 @@ // licenses/APL.txt. #include +#include #include "audit/log.hpp" #include "auth/auth.hpp" #include "communication/websocket/auth.hpp" @@ -415,6 +416,7 @@ int main(int argc, char **argv) { auth_, FLAGS_data_recovery_on_startup #endif ); + auto custom_storage = std::make_unique(); // Note: Now that all system's subsystems are initialised (dbms & auth) // We can now initialise the recovery of replication (which will include those subsystems) @@ -443,6 +445,8 @@ int main(int argc, char **argv) { auth_handler.get(), auth_checker.get(), &replication_handler); MG_ASSERT(db_acc, "Failed to access the main database"); + // TODO(gitbuda): Init moved here because tests are constructing the interpreter context. + interpreter_context_.custom_storage = custom_storage.get(); memgraph::query::procedure::gModuleRegistry.SetModulesDirectory(memgraph::flags::ParseQueryModulesDirectory(), FLAGS_data_directory); diff --git a/src/query/context.hpp b/src/query/context.hpp index d875195867e..272a9f350f4 100644 --- a/src/query/context.hpp +++ b/src/query/context.hpp @@ -74,7 +74,7 @@ inline std::vector NamesToLabels(const std::vector custom_storage{nullptr}; + memgraph::storage::custom_storage::Storage *custom_storage{nullptr}; DbAccessor *db_accessor{nullptr}; SymbolTable symbol_table; EvaluationContext evaluation_context; diff --git a/src/query/custom_cursors/once.cpp b/src/query/custom_cursors/once.cpp index 9a310146a6d..53a6747b7ca 100644 --- a/src/query/custom_cursors/once.cpp +++ b/src/query/custom_cursors/once.cpp @@ -21,7 +21,7 @@ namespace memgraph::query::custom_cursors { bool OnceCursor::Pull(Frame & /*unused*/, ExecutionContext &context) { utils::MemoryTracker::OutOfMemoryExceptionEnabler oom_exception; memgraph::query::plan::ScopedProfile profile{ComputeProfilingKey(this), "Once", &context}; - SPDLOG_WARN("Once"); + SPDLOG_WARN("Once; storage ptr {}", reinterpret_cast(context.custom_storage)); if (!did_pull_) { did_pull_ = true; return true; diff --git a/src/query/interpreter.cpp b/src/query/interpreter.cpp index e14100386e9..f5efd406ac9 100644 --- a/src/query/interpreter.cpp +++ b/src/query/interpreter.cpp @@ -1717,8 +1717,7 @@ PullPlan::PullPlan(const std::shared_ptr plan, const Parameters &pa frame_(plan->symbol_table().max_position(), execution_memory), memory_limit_(memory_limit), use_monotonic_memory_(use_monotonic_memory) { - // TODO(gitbuda): Here shoule be ref to an object which can safely access the custom_storage. - ctx_.custom_storage = std::make_unique(); + ctx_.custom_storage = interpreter_context->custom_storage; ctx_.db_accessor = dba; ctx_.symbol_table = plan->symbol_table(); ctx_.evaluation_context.timestamp = QueryTimestamp(); diff --git a/src/query/interpreter_context.cpp b/src/query/interpreter_context.cpp index eb35dbf03a0..2bea929549a 100644 --- a/src/query/interpreter_context.cpp +++ b/src/query/interpreter_context.cpp @@ -12,6 +12,7 @@ #include "query/interpreter_context.hpp" #include "query/interpreter.hpp" +#include "storage/custom_storage/storage.hpp" #include "system/include/system/system.hpp" namespace memgraph::query { diff --git a/src/query/interpreter_context.hpp b/src/query/interpreter_context.hpp index 559ea3342e5..0c123226397 100644 --- a/src/query/interpreter_context.hpp +++ b/src/query/interpreter_context.hpp @@ -23,6 +23,7 @@ #include "query/replication_query_handler.hpp" #include "query/typed_value.hpp" #include "replication/state.hpp" +#include "storage/custom_storage/storage.hpp" #include "storage/v2/config.hpp" #include "storage/v2/transaction.hpp" #include "system/state.hpp" @@ -63,6 +64,8 @@ struct InterpreterContext { ReplicationQueryHandler *replication_handler = nullptr); memgraph::dbms::DbmsHandler *dbms_handler; + // TODO(gitbuda): The storage should be under multi-tenancy -> it figure out. + memgraph::storage::custom_storage::Storage *custom_storage; // Internal const InterpreterConfig config; From b8be5e0040e02df53454065c14fc80f8ebe87ca1 Mon Sep 17 00:00:00 2001 From: Marko Budiselic Date: Sun, 10 Mar 2024 02:04:58 +0000 Subject: [PATCH 04/25] Add graphar, not working yet --- libs/CMakeLists.txt | 6 ++++++ libs/setup.sh | 6 ++++++ tests/manual/CMakeLists.txt | 3 +++ tests/manual/graphar.cpp | 39 +++++++++++++++++++++++++++++++++++++ 4 files changed, 54 insertions(+) create mode 100644 tests/manual/graphar.cpp diff --git a/libs/CMakeLists.txt b/libs/CMakeLists.txt index 7d568d548a5..c9aef879dcb 100644 --- a/libs/CMakeLists.txt +++ b/libs/CMakeLists.txt @@ -301,3 +301,9 @@ import_external_library(nuraft STATIC ${CMAKE_CURRENT_SOURCE_DIR}/nuraft/include/) find_package(OpenSSL REQUIRED) target_link_libraries(nuraft INTERFACE ${OPENSSL_LIBRARIES}) + +# Setup GraphAr +import_external_library(graphar SHARED + ${CMAKE_CURRENT_SOURCE_DIR}/graphar/cpp/lib/libgar.so + ${CMAKE_CURRENT_SOURCE_DIR}/graphar/cpp/include + SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/graphar/cpp) diff --git a/libs/setup.sh b/libs/setup.sh index 9c2a38c47eb..251c7cbcd69 100755 --- a/libs/setup.sh +++ b/libs/setup.sh @@ -127,6 +127,7 @@ declare -A primary_urls=( ["jemalloc"]="http://$local_cache_host/git/jemalloc.git" ["range-v3"]="http://$local_cache_host/git/range-v3.git" ["nuraft"]="http://$local_cache_host/git/NuRaft.git" + ["graphar"]="http://$local_cache_host/git/GraphAr.git" ) # The goal of secondary urls is to have links to the "source of truth" of @@ -157,6 +158,7 @@ declare -A secondary_urls=( ["jemalloc"]="https://github.com/jemalloc/jemalloc.git" ["range-v3"]="https://github.com/ericniebler/range-v3.git" ["nuraft"]="https://github.com/eBay/NuRaft.git" + ["graphar"]="https://github.com/gitbuda/GraphAr.git" ) # antlr @@ -288,3 +290,7 @@ pushd nuraft git apply ../nuraft2.1.0.patch ./prepare.sh popd + +# GraphAr 2024-03-07 +graphar_tag="da86711944653ae4ed738a52e7fc1180d4cc40d5" +repo_clone_try_double "${primary_urls[graphar]}" "${secondary_urls[graphar]}" "graphar" "$graphar_tag" true diff --git a/tests/manual/CMakeLists.txt b/tests/manual/CMakeLists.txt index 0a46b8e60db..58cd6d6dc19 100644 --- a/tests/manual/CMakeLists.txt +++ b/tests/manual/CMakeLists.txt @@ -55,3 +55,6 @@ target_link_libraries(${test_prefix}ssl_client mg-communication) add_manual_test(ssl_server.cpp) target_link_libraries(${test_prefix}ssl_server mg-communication) + +add_manual_test(graphar.cpp) +target_link_libraries(${test_prefix}graphar graphar mg-utils) diff --git a/tests/manual/graphar.cpp b/tests/manual/graphar.cpp new file mode 100644 index 00000000000..dd5b4ceb97d --- /dev/null +++ b/tests/manual/graphar.cpp @@ -0,0 +1,39 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#include + +#include +#include + +#define GRAPH_NAME "manual_graph" +#define IS_DIRECTED false +#define SAVE_PATH "/tmp/" + graph_name + "/" +#define ADJLIST_TYPE GAR_NAMESPACE::AdjListType::ordered_by_source +#define PAYLOAD_TYPE GAR_NAMESPACE::FileType::CSV +#define VERTEX_CHUNK_SIZE 1024 +#define EDGE_CHUNK_SIZE 1024 * 1024 + +int main(int argc, char **argv) { + gflags::ParseCommandLineFlags(&argc, &argv, true); + + std::string graph_name = GRAPH_NAME; + std::string save_path = SAVE_PATH; + auto version = GAR_NAMESPACE::InfoVersion::Parse("gar/v1").value(); + std::string vertex_label = "node", vertex_prefix = "vertex/node/"; + auto vertex_info = GAR_NAMESPACE::CreateVertexInfo(vertex_label, VERTEX_CHUNK_SIZE, {}, vertex_prefix, version); + ASSERT(!vertex_info->Dump().has_error()); + ASSERT(vertex_info->Save(save_path + "node.vertex.yaml").ok()); + + std::cout << "GraphAr test" << std::endl; + + return 0; +} From e16b20238dff6725a3444e8694bc65c74b5166fc Mon Sep 17 00:00:00 2001 From: Marko Budiselic Date: Sun, 10 Mar 2024 04:02:01 +0000 Subject: [PATCH 05/25] Fix the basic code in the manual test --- tests/manual/graphar.cpp | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/tests/manual/graphar.cpp b/tests/manual/graphar.cpp index dd5b4ceb97d..0104d09c255 100644 --- a/tests/manual/graphar.cpp +++ b/tests/manual/graphar.cpp @@ -10,6 +10,7 @@ // licenses/APL.txt. #include +#include #include #include @@ -17,23 +18,25 @@ #define GRAPH_NAME "manual_graph" #define IS_DIRECTED false #define SAVE_PATH "/tmp/" + graph_name + "/" -#define ADJLIST_TYPE GAR_NAMESPACE::AdjListType::ordered_by_source -#define PAYLOAD_TYPE GAR_NAMESPACE::FileType::CSV +#define ADJLIST_TYPE GAR_NAMESPACE_INTERNAL::AdjListType::ordered_by_source +#define PAYLOAD_TYPE GAR_NAMESPACE_INTERNAL::FileType::CSV #define VERTEX_CHUNK_SIZE 1024 #define EDGE_CHUNK_SIZE 1024 * 1024 int main(int argc, char **argv) { gflags::ParseCommandLineFlags(&argc, &argv, true); + // TODO(gitbuda): Add logging. std::string graph_name = GRAPH_NAME; std::string save_path = SAVE_PATH; - auto version = GAR_NAMESPACE::InfoVersion::Parse("gar/v1").value(); - std::string vertex_label = "node", vertex_prefix = "vertex/node/"; - auto vertex_info = GAR_NAMESPACE::CreateVertexInfo(vertex_label, VERTEX_CHUNK_SIZE, {}, vertex_prefix, version); - ASSERT(!vertex_info->Dump().has_error()); - ASSERT(vertex_info->Save(save_path + "node.vertex.yaml").ok()); - - std::cout << "GraphAr test" << std::endl; + // TODO(gitbuda): Parsing of string version doesn't work for some reason, try with toolchain-v5. + auto version = std::make_shared(1); + std::string vertex_label = "node"; + std::string vertex_prefix = "vertex/node/"; + auto vertex_info = + GAR_NAMESPACE_INTERNAL::CreateVertexInfo(vertex_label, VERTEX_CHUNK_SIZE, {}, vertex_prefix, version); + vertex_info->Dump().error(); + vertex_info->Save(save_path + "node.vertex.yaml").ok(); return 0; } From 75cac2d4be4659ca270df1790fddc5f190bc1cfd Mon Sep 17 00:00:00 2001 From: Marko Budiselic Date: Mon, 11 Mar 2024 01:32:56 +0000 Subject: [PATCH 06/25] Add meaningful code for storing vertices --- tests/manual/graphar.cpp | 111 ++++++++++++++++++++++++++++++++------- 1 file changed, 92 insertions(+), 19 deletions(-) diff --git a/tests/manual/graphar.cpp b/tests/manual/graphar.cpp index 0104d09c255..ab0956b7c80 100644 --- a/tests/manual/graphar.cpp +++ b/tests/manual/graphar.cpp @@ -9,34 +9,107 @@ // by the Apache License, Version 2.0, included in the file // licenses/APL.txt. +#include #include #include #include +#include +#include #include -#define GRAPH_NAME "manual_graph" -#define IS_DIRECTED false -#define SAVE_PATH "/tmp/" + graph_name + "/" -#define ADJLIST_TYPE GAR_NAMESPACE_INTERNAL::AdjListType::ordered_by_source -#define PAYLOAD_TYPE GAR_NAMESPACE_INTERNAL::FileType::CSV -#define VERTEX_CHUNK_SIZE 1024 -#define EDGE_CHUNK_SIZE 1024 * 1024 +#include "utils/logging.hpp" + +struct GARDatabaseConfig { + std::filesystem::path root{std::filesystem::temp_directory_path()}; // single database root directory + std::string vertex_metadata_suffix{".vertex.yaml"}; + std::filesystem::path vertex_folder_prefix{"vertex"}; + uint64_t vertex_chunk_size{1024}; + uint64_t edge_chunk_size{1024 * 1024}; + bool is_directed{true}; + GAR_NAMESPACE_INTERNAL::AdjListType ordering; +}; + +struct GARVertexType { + GARDatabaseConfig base; + std::string label; + std::shared_ptr version; + GAR_NAMESPACE_INTERNAL::PropertyGroupVector properties; + std::string SavePath() const { return base.root / std::filesystem::path(label + base.vertex_metadata_suffix); } + std::filesystem::path Prefix() const { return base.vertex_folder_prefix / std::filesystem::path(label); } +}; + +auto InitVertexType(const GARVertexType &type) { + auto vertex_info = GAR_NAMESPACE_INTERNAL::CreateVertexInfo(type.label, type.base.vertex_chunk_size, type.properties, + type.Prefix(), type.version); + MG_ASSERT(!vertex_info->Dump().has_error()); + MG_ASSERT(vertex_info->Save(type.SavePath()).ok()); + return vertex_info; +} int main(int argc, char **argv) { gflags::ParseCommandLineFlags(&argc, &argv, true); - // TODO(gitbuda): Add logging. - - std::string graph_name = GRAPH_NAME; - std::string save_path = SAVE_PATH; - // TODO(gitbuda): Parsing of string version doesn't work for some reason, try with toolchain-v5. - auto version = std::make_shared(1); - std::string vertex_label = "node"; - std::string vertex_prefix = "vertex/node/"; - auto vertex_info = - GAR_NAMESPACE_INTERNAL::CreateVertexInfo(vertex_label, VERTEX_CHUNK_SIZE, {}, vertex_prefix, version); - vertex_info->Dump().error(); - vertex_info->Save(save_path + "node.vertex.yaml").ok(); + spdlog::set_level(spdlog::level::trace); + + // schema + const auto database = GARDatabaseConfig{.root = "/tmp/gar/"}; + auto property_vector_1 = {GAR_NAMESPACE_INTERNAL::Property("id", GAR_NAMESPACE_INTERNAL::int64(), true)}; + auto property_vector_2 = {GAR_NAMESPACE_INTERNAL::Property("domain", GAR_NAMESPACE_INTERNAL::string(), false), + GAR_NAMESPACE_INTERNAL::Property("extra", GAR_NAMESPACE_INTERNAL::string(), false)}; + auto group1 = GAR_NAMESPACE_INTERNAL::CreatePropertyGroup(property_vector_1, GAR_NAMESPACE_INTERNAL::FileType::CSV); + auto group2 = GAR_NAMESPACE_INTERNAL::CreatePropertyGroup(property_vector_2, GAR_NAMESPACE_INTERNAL::FileType::CSV); + const auto vertex_type = GARVertexType{.base = database, + .label = "node", + .version = std::make_shared(1), + .properties = {group1, group2}}; + + // data partition 1 + auto vertex_info = InitVertexType(vertex_type); + GAR_NAMESPACE_INTERNAL::IdType start_index = 0; + GAR_NAMESPACE_INTERNAL::builder::VerticesBuilder builder(vertex_info, vertex_type.base.root, 0); + builder.SetValidateLevel(GAR_NAMESPACE_INTERNAL::ValidateLevel::strong_validate); + int vertex_count = 2; + std::vector property_names = {"id", "domain"}; + std::vector id = {0, 1}; + std::vector domain = {"google.com", "memgraph.com"}; + for (int i = 0; i < vertex_count; i++) { + GAR_NAMESPACE_INTERNAL::builder::Vertex v; + v.AddProperty(property_names[0], id[i]); + v.AddProperty(property_names[1], domain[i]); + MG_ASSERT(builder.AddVertex(v).ok()); + } + MG_ASSERT(builder.GetNum() == vertex_count); + spdlog::info("vertex_count={}", builder.GetNum()); + MG_ASSERT(builder.Dump().ok()); + spdlog::info("dump vertices collection successfully!"); + builder.Clear(); + MG_ASSERT(builder.GetNum() == 0); + // data partition 2 -> IMPORTANT: controlling start_vertex_index means partitioning & parallelization. + GAR_NAMESPACE_INTERNAL::builder::VerticesBuilder builder2(vertex_info, vertex_type.base.root, + vertex_type.base.vertex_chunk_size * 1); + builder.SetValidateLevel(GAR_NAMESPACE_INTERNAL::ValidateLevel::strong_validate); + vertex_count = 2; + property_names = {"id", "domain", "extra"}; + id = {2, 3}; + domain = {"nvidia.com", "facebook.com"}; + std::vector extra = {"{key:value}", "{}"}; + for (int i = 0; i < vertex_count; i++) { + GAR_NAMESPACE_INTERNAL::builder::Vertex v; + v.AddProperty(property_names[0], id[i]); + v.AddProperty(property_names[1], domain[i]); + if (i == 0) { + v.AddProperty(property_names[2], extra[0]); + } else { + v.AddProperty(property_names[2], extra[1]); + } + MG_ASSERT(builder2.AddVertex(v).ok()); + } + MG_ASSERT(builder2.GetNum() == vertex_count); + spdlog::info("vertex_count={}", builder2.GetNum()); + MG_ASSERT(builder2.Dump().ok()); + spdlog::info("dump vertices collection successfully!"); + builder2.Clear(); + MG_ASSERT(builder2.GetNum() == 0); return 0; } From 8256a20a30a4565c6636d9609b450822a73e5379 Mon Sep 17 00:00:00 2001 From: Marko Budiselic Date: Mon, 11 Mar 2024 02:27:02 +0000 Subject: [PATCH 07/25] Add edge storage example --- tests/manual/graphar.cpp | 90 +++++++++++++++++++++++++++++++++------- 1 file changed, 76 insertions(+), 14 deletions(-) diff --git a/tests/manual/graphar.cpp b/tests/manual/graphar.cpp index ab0956b7c80..83a9c34cf03 100644 --- a/tests/manual/graphar.cpp +++ b/tests/manual/graphar.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -22,50 +23,89 @@ struct GARDatabaseConfig { std::filesystem::path root{std::filesystem::temp_directory_path()}; // single database root directory + std::shared_ptr version; std::string vertex_metadata_suffix{".vertex.yaml"}; + std::string edge_metadata_suffix{".edge.yaml"}; std::filesystem::path vertex_folder_prefix{"vertex"}; + std::filesystem::path edge_folder_prefix{"edge"}; uint64_t vertex_chunk_size{1024}; - uint64_t edge_chunk_size{1024 * 1024}; - bool is_directed{true}; + uint64_t edge_chunk_size{1024}; + uint64_t edge_src_chunk_size{1024}; + uint64_t edge_dst_chunk_size{1024}; + bool is_directed{false}; GAR_NAMESPACE_INTERNAL::AdjListType ordering; }; struct GARVertexType { GARDatabaseConfig base; std::string label; - std::shared_ptr version; GAR_NAMESPACE_INTERNAL::PropertyGroupVector properties; - std::string SavePath() const { return base.root / std::filesystem::path(label + base.vertex_metadata_suffix); } std::filesystem::path Prefix() const { return base.vertex_folder_prefix / std::filesystem::path(label); } + std::string SavePath() const { return base.root / std::filesystem::path(label + base.vertex_metadata_suffix); } +}; + +struct GAREdgeType { + GARDatabaseConfig base; + std::string src_label; + std::string edge_type; + std::string dst_label; + GAR_NAMESPACE_INTERNAL::PropertyGroupVector properties; + std::vector> adjacent_lists; + std::string src_type_dst{src_label + "__" + edge_type + "__" + dst_label}; + std::filesystem::path Prefix() const { return base.edge_folder_prefix / std::filesystem::path(src_type_dst); } + std::string SavePath() const { return base.root / std::filesystem::path(src_type_dst + base.edge_metadata_suffix); } }; auto InitVertexType(const GARVertexType &type) { auto vertex_info = GAR_NAMESPACE_INTERNAL::CreateVertexInfo(type.label, type.base.vertex_chunk_size, type.properties, - type.Prefix(), type.version); + type.Prefix(), type.base.version); MG_ASSERT(!vertex_info->Dump().has_error()); MG_ASSERT(vertex_info->Save(type.SavePath()).ok()); return vertex_info; } +auto InitEdgeType(const GAREdgeType &edge_type) { + auto edge_info = GAR_NAMESPACE_INTERNAL::CreateEdgeInfo( + edge_type.src_label, edge_type.edge_type, edge_type.dst_label, edge_type.base.edge_chunk_size, + edge_type.base.edge_src_chunk_size, edge_type.base.edge_dst_chunk_size, edge_type.base.is_directed, + edge_type.adjacent_lists, edge_type.properties, edge_type.Prefix(), edge_type.base.version); + MG_ASSERT(!edge_info->Dump().has_error()); + MG_ASSERT(edge_info->Save(edge_type.SavePath()).ok()); + return edge_info; +} + int main(int argc, char **argv) { gflags::ParseCommandLineFlags(&argc, &argv, true); spdlog::set_level(spdlog::level::trace); // schema - const auto database = GARDatabaseConfig{.root = "/tmp/gar/"}; + const auto database = GARDatabaseConfig{ + .root = "/tmp/gar/", + .version = std::make_shared(1), + }; auto property_vector_1 = {GAR_NAMESPACE_INTERNAL::Property("id", GAR_NAMESPACE_INTERNAL::int64(), true)}; auto property_vector_2 = {GAR_NAMESPACE_INTERNAL::Property("domain", GAR_NAMESPACE_INTERNAL::string(), false), GAR_NAMESPACE_INTERNAL::Property("extra", GAR_NAMESPACE_INTERNAL::string(), false)}; auto group1 = GAR_NAMESPACE_INTERNAL::CreatePropertyGroup(property_vector_1, GAR_NAMESPACE_INTERNAL::FileType::CSV); auto group2 = GAR_NAMESPACE_INTERNAL::CreatePropertyGroup(property_vector_2, GAR_NAMESPACE_INTERNAL::FileType::CSV); - const auto vertex_type = GARVertexType{.base = database, - .label = "node", - .version = std::make_shared(1), - .properties = {group1, group2}}; - - // data partition 1 + const auto vertex_type = GARVertexType{.base = database, .label = "node", .properties = {group1, group2}}; auto vertex_info = InitVertexType(vertex_type); - GAR_NAMESPACE_INTERNAL::IdType start_index = 0; + auto adjacent_lists = { + GAR_NAMESPACE_INTERNAL::CreateAdjacentList(GAR_NAMESPACE_INTERNAL::AdjListType::ordered_by_source, + GAR_NAMESPACE_INTERNAL::FileType::CSV), + GAR_NAMESPACE_INTERNAL::CreateAdjacentList(GAR_NAMESPACE_INTERNAL::AdjListType::ordered_by_dest, + GAR_NAMESPACE_INTERNAL::FileType::CSV)}; + auto property_vector_3 = {GAR_NAMESPACE_INTERNAL::Property("created", GAR_NAMESPACE_INTERNAL::string(), false)}; + auto group3 = GAR_NAMESPACE_INTERNAL::CreatePropertyGroup(property_vector_3, GAR_NAMESPACE_INTERNAL::FileType::CSV); + const auto edge_type = GAREdgeType{.base = database, + .src_label = "node", + .edge_type = "LINK", + .dst_label = "node", + .properties = {group3}, + .adjacent_lists = adjacent_lists}; + auto edge_info = InitEdgeType(edge_type); + + // vertex data partition 1 GAR_NAMESPACE_INTERNAL::builder::VerticesBuilder builder(vertex_info, vertex_type.base.root, 0); builder.SetValidateLevel(GAR_NAMESPACE_INTERNAL::ValidateLevel::strong_validate); int vertex_count = 2; @@ -84,7 +124,8 @@ int main(int argc, char **argv) { spdlog::info("dump vertices collection successfully!"); builder.Clear(); MG_ASSERT(builder.GetNum() == 0); - // data partition 2 -> IMPORTANT: controlling start_vertex_index means partitioning & parallelization. + + // vertex data partition 2 -> IMPORTANT: controlling start_vertex_index means partitioning & parallelization. GAR_NAMESPACE_INTERNAL::builder::VerticesBuilder builder2(vertex_info, vertex_type.base.root, vertex_type.base.vertex_chunk_size * 1); builder.SetValidateLevel(GAR_NAMESPACE_INTERNAL::ValidateLevel::strong_validate); @@ -111,5 +152,26 @@ int main(int argc, char **argv) { builder2.Clear(); MG_ASSERT(builder2.GetNum() == 0); + // edge data + GAR_NAMESPACE_INTERNAL::builder::EdgesBuilder builder3(edge_info, edge_type.base.root, + GraphArchive::AdjListType::ordered_by_dest, 1025); + builder.SetValidateLevel(GAR_NAMESPACE_INTERNAL::ValidateLevel::strong_validate); + int edge_count = 4; + property_names = {"created"}; + std::vector src = {1, 0, 0, 2}; + std::vector dst = {0, 1, 2, 1}; + std::vector creationDate = {"2010-01-01", "2011-01-01", "2012-01-01", "2013-01-01"}; + for (int i = 0; i < edge_count; i++) { + GAR_NAMESPACE_INTERNAL::builder::Edge e(src[i], dst[i]); + e.AddProperty("created", creationDate[i]); + MG_ASSERT(builder3.AddEdge(e).ok()); + } + MG_ASSERT(builder3.GetNum() == edge_count); + spdlog::info("edge_count={}", builder3.GetNum()); + MG_ASSERT(builder3.Dump().ok()); + spdlog::info("dump edges collection successfully!"); + builder3.Clear(); + MG_ASSERT(builder3.GetNum() == 0); + return 0; } From ed6fe4bad7fd6abde75f2a504ed75868aab48c8e Mon Sep 17 00:00:00 2001 From: Marko Budiselic Date: Tue, 12 Mar 2024 00:46:54 +0000 Subject: [PATCH 08/25] Add storage high-level ideas, part 1 --- src/storage/custom_storage/storage.hpp | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/storage/custom_storage/storage.hpp b/src/storage/custom_storage/storage.hpp index 3f319f06d66..cd1142a450d 100644 --- a/src/storage/custom_storage/storage.hpp +++ b/src/storage/custom_storage/storage.hpp @@ -13,6 +13,27 @@ namespace memgraph::storage::custom_storage { +// Desing ideas: +// * try to follow existing API design +// * one of the issue is granular and SYNC API design +// * try to reuse exisitng in-memory data structures +// * PropertyStore + all PropertyValues seem very reusable +// * Vertex/Edge seems not very reusable because (delta*, edges*) +// * parallelization +// * ASYNC disk/network access +// * per database, maybe even isolated data cache + +// Target queries: +// CREATE (:Label {props}); // single vertex create +// UNWIND vertices_props AS props CREATE (n:Label) SET n += props; // batch vertex create +// MATCH (n:Label {id:X}) RETURN n; // single vertex lookup +// MATCH (n1:Label {id:X}) MATCH (n2:Label {id:Y}) CREATE (n1)-[r:Type {props}]->(n2); // single edge create +// // batch edge create +// // iterate all verteices with limited memory usage -> "global" graph algos possible +// // iterate all edges with limited memory usage -> "global" graph algos possible +// // get IN/OUT/ALL edges for a given vertex -> "global" graph algos possible +// // BFS with filter lambda + class Storage { public: void Call(); From 308bb0deed89098845d0fd3adc70ce44682470b3 Mon Sep 17 00:00:00 2001 From: Marko Budiselic Date: Wed, 13 Mar 2024 03:25:10 +0000 Subject: [PATCH 09/25] Add prep for CreateNodeCursor implementation --- src/query/custom_cursors/CMakeLists.txt | 3 +- src/query/custom_cursors/all.hpp | 1 + src/query/custom_cursors/create_node.cpp | 76 ++++++++++++++++++++++ src/query/custom_cursors/create_node.hpp | 39 +++++++++++ src/query/custom_cursors/once.cpp | 2 +- src/query/custom_cursors/produce.cpp | 1 + src/query/plan/operator.cpp | 15 ++++- src/storage/v2/constraints/constraints.cpp | 5 +- src/storage/v2/storage_mode.cpp | 4 +- src/storage/v2/storage_mode.hpp | 8 ++- 10 files changed, 146 insertions(+), 8 deletions(-) create mode 100644 src/query/custom_cursors/create_node.cpp create mode 100644 src/query/custom_cursors/create_node.hpp diff --git a/src/query/custom_cursors/CMakeLists.txt b/src/query/custom_cursors/CMakeLists.txt index 06b13655bc9..226a82fee13 100644 --- a/src/query/custom_cursors/CMakeLists.txt +++ b/src/query/custom_cursors/CMakeLists.txt @@ -1,8 +1,9 @@ set(mg_custom_cursors_sources + create_node.cpp once.cpp scanall.cpp produce.cpp ) add_library(mg_custom_cursors STATIC ${mg_custom_cursors_sources}) # TODO(gitbuda): Having mg-storage-v2 here is broken. -target_link_libraries(mg_custom_cursors mg-utils mg-storage-v2 mg_custom_storage) +target_link_libraries(mg_custom_cursors mg-utils mg-storage-v2 mg_custom_storage mg-query) diff --git a/src/query/custom_cursors/all.hpp b/src/query/custom_cursors/all.hpp index b684d314773..a73e2dca48d 100644 --- a/src/query/custom_cursors/all.hpp +++ b/src/query/custom_cursors/all.hpp @@ -11,6 +11,7 @@ #pragma once +#include "query/custom_cursors/create_node.hpp" #include "query/custom_cursors/once.hpp" #include "query/custom_cursors/produce.hpp" #include "query/custom_cursors/scanall.hpp" diff --git a/src/query/custom_cursors/create_node.cpp b/src/query/custom_cursors/create_node.cpp new file mode 100644 index 00000000000..c2b3738138a --- /dev/null +++ b/src/query/custom_cursors/create_node.cpp @@ -0,0 +1,76 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#include "query/custom_cursors/create_node.hpp" +#include "query/context.hpp" +#include "query/custom_cursors/utils.hpp" +#include "query/interpret/eval.hpp" +#include "query/interpret/frame.hpp" +#include "query/plan/operator.hpp" +#include "query/plan/scoped_profile.hpp" +#include "utils/logging.hpp" + +namespace memgraph::query::custom_cursors { + +struct QueryVertex {}; + +// Creates a vertex on this GraphDb. Returns a reference to vertex placed on the +// frame. +QueryVertex CreateVertex(const plan::NodeCreationInfo &node_info, Frame *frame, ExecutionContext &context) { + auto &dba = *context.db_accessor; + for (auto label : node_info.labels) { + // TODO(gitbuda): Collect labels. + } + // NOTE: Evaluator should use the latest accessors, as modified in this query, when + // setting properties on new nodes. + // NOTE: Evaluator is using query::DBAccessor of default storage mode (IN_MEM_TX), for props mapping & storage mode. + ExpressionEvaluator evaluator(frame, context.symbol_table, context.evaluation_context, context.db_accessor, + storage::View::NEW); + // TODO: PropsSetChecked allocates a PropertyValue, make it use context.memory + // when we update PropertyValue with custom allocator. + std::map properties; + if (const auto *node_info_properties = std::get_if(&node_info.properties)) { + for (const auto &[key, value_expression] : *node_info_properties) { + properties.emplace(key, value_expression->Accept(evaluator)); + } + } else { + auto property_map = evaluator.Visit(*std::get(node_info.properties)); + for (const auto &[key, value] : property_map.ValueMap()) { + properties.emplace(dba.NameToProperty(key), value); + } + } + // TODO(gitbuda): Set labels and properties. + // TODO(gitbuda): Put vertex on the frame. (*frame)[node_info.symbol] = new_node; + // TODO(gitbuda): Return vertex to the cursor, needed to update the trigger. + context.custom_storage->Call(); + return QueryVertex{}; +} + +CreateNodeCursor::CreateNodeCursor(const plan::CreateNode &logical_operator, plan::UniqueCursorPtr input_cursor) + : logical_operator_(logical_operator), input_cursor_(std::move(input_cursor)) {} + +bool CreateNodeCursor::Pull(Frame &frame, ExecutionContext &context) { + utils::MemoryTracker::OutOfMemoryExceptionEnabler oom_exception; + memgraph::query::plan::ScopedProfile profile{ComputeProfilingKey(this), "CreateNode", &context}; + SPDLOG_WARN("CreateNodeCursor::Pull"); + if (input_cursor_->Pull(frame, context)) { + // TODO(gitbuda): Take data from the operator and create the node. + CreateVertex(logical_operator_.node_info_, &frame, context); + return true; + } + return false; +} + +void CreateNodeCursor::Shutdown() { input_cursor_->Shutdown(); } + +void CreateNodeCursor::Reset() { input_cursor_->Reset(); } + +} // namespace memgraph::query::custom_cursors diff --git a/src/query/custom_cursors/create_node.hpp b/src/query/custom_cursors/create_node.hpp new file mode 100644 index 00000000000..6217abe8c98 --- /dev/null +++ b/src/query/custom_cursors/create_node.hpp @@ -0,0 +1,39 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#pragma once + +#include "query/plan/cursor.hpp" + +// TASKS: +// 1. check label creation access rule +// 2. create vertex +// 3. inform the trigger + +namespace memgraph::query::plan { +class CreateNode; +} + +namespace memgraph::query::custom_cursors { + +class CreateNodeCursor : public memgraph::query::plan::Cursor { + public: + explicit CreateNodeCursor(const plan::CreateNode &logical_operator, plan::UniqueCursorPtr input_cursor); + bool Pull(Frame &frame, ExecutionContext &context) override; + void Shutdown() override; + void Reset() override; + + private: + const plan::CreateNode &logical_operator_; + const plan::UniqueCursorPtr input_cursor_; +}; + +} // namespace memgraph::query::custom_cursors diff --git a/src/query/custom_cursors/once.cpp b/src/query/custom_cursors/once.cpp index 53a6747b7ca..352e25dbf56 100644 --- a/src/query/custom_cursors/once.cpp +++ b/src/query/custom_cursors/once.cpp @@ -21,7 +21,7 @@ namespace memgraph::query::custom_cursors { bool OnceCursor::Pull(Frame & /*unused*/, ExecutionContext &context) { utils::MemoryTracker::OutOfMemoryExceptionEnabler oom_exception; memgraph::query::plan::ScopedProfile profile{ComputeProfilingKey(this), "Once", &context}; - SPDLOG_WARN("Once; storage ptr {}", reinterpret_cast(context.custom_storage)); + SPDLOG_WARN("OnceCursor::Pull"); if (!did_pull_) { did_pull_ = true; return true; diff --git a/src/query/custom_cursors/produce.cpp b/src/query/custom_cursors/produce.cpp index b31fbd57aa7..42c66cf66d0 100644 --- a/src/query/custom_cursors/produce.cpp +++ b/src/query/custom_cursors/produce.cpp @@ -30,4 +30,5 @@ bool ProduceCursor::Pull(Frame &frame, ExecutionContext &context) { void ProduceCursor::Shutdown() { input_cursor_->Shutdown(); } void ProduceCursor::Reset() { input_cursor_->Reset(); } + } // namespace memgraph::query::custom_cursors diff --git a/src/query/plan/operator.cpp b/src/query/plan/operator.cpp index 5808dcf4b03..7cb5c68a638 100644 --- a/src/query/plan/operator.cpp +++ b/src/query/plan/operator.cpp @@ -269,6 +269,10 @@ ACCEPT_WITH_INPUT(CreateNode) UniqueCursorPtr CreateNode::MakeCursor(utils::MemoryResource *mem) const { memgraph::metrics::IncrementCounter(memgraph::metrics::CreateNodeOperator); + if (memgraph::flags::AreExperimentsEnabled(memgraph::flags::Experiments::ALTERNATIVE_STORAGE)) { + return MakeUniqueCursorPtr(mem, *this, input_->MakeCursor(mem)); + } + return MakeUniqueCursorPtr(mem, *this, mem); } @@ -2740,9 +2744,10 @@ ACCEPT_WITH_INPUT(Produce) UniqueCursorPtr Produce::MakeCursor(utils::MemoryResource *mem) const { memgraph::metrics::IncrementCounter(memgraph::metrics::ProduceOperator); - if (memgraph::flags::AreExperimentsEnabled(memgraph::flags::Experiments::ALTERNATIVE_STORAGE)) { - return MakeUniqueCursorPtr(mem, input_->MakeCursor(mem)); - } + // NOTE(gitbuda): Since Produce is a stateless cursor -> it's possible to reuse it! + // if (memgraph::flags::AreExperimentsEnabled(memgraph::flags::Experiments::ALTERNATIVE_STORAGE)) { + // return MakeUniqueCursorPtr(mem, input_->MakeCursor(mem)); + // } return MakeUniqueCursorPtr(mem, *this, mem); } @@ -3567,6 +3572,8 @@ class EmptyResultCursor : public Cursor { UniqueCursorPtr EmptyResult::MakeCursor(utils::MemoryResource *mem) const { memgraph::metrics::IncrementCounter(memgraph::metrics::EmptyResultOperator); + // NOTE(gitbuda): Needed in plain CREATE query -> reused. + return MakeUniqueCursorPtr(mem, *this, mem); } @@ -3634,6 +3641,8 @@ class AccumulateCursor : public Cursor { UniqueCursorPtr Accumulate::MakeCursor(utils::MemoryResource *mem) const { memgraph::metrics::IncrementCounter(memgraph::metrics::AccumulateOperator); + // NOTE(gitbuda): Also needed in CREATE RETURN -> reused. + return MakeUniqueCursorPtr(mem, *this, mem); } diff --git a/src/storage/v2/constraints/constraints.cpp b/src/storage/v2/constraints/constraints.cpp index 6a6554db47a..14b744283ad 100644 --- a/src/storage/v2/constraints/constraints.cpp +++ b/src/storage/v2/constraints/constraints.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -26,6 +26,9 @@ Constraints::Constraints(const Config &config, StorageMode storage_mode) { case StorageMode::ON_DISK_TRANSACTIONAL: unique_constraints_ = std::make_unique(config); break; + case StorageMode::ALTERNATIVE_STORAGE: + // TODO(gitbuda): Don't pass + break; }; }); } diff --git a/src/storage/v2/storage_mode.cpp b/src/storage/v2/storage_mode.cpp index 06764685489..0b39a5fbc80 100644 --- a/src/storage/v2/storage_mode.cpp +++ b/src/storage/v2/storage_mode.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -25,6 +25,8 @@ std::string_view StorageModeToString(memgraph::storage::StorageMode storage_mode return "IN_MEMORY_TRANSACTIONAL"; case memgraph::storage::StorageMode::ON_DISK_TRANSACTIONAL: return "ON_DISK_TRANSACTIONAL"; + case memgraph::storage::StorageMode::ALTERNATIVE_STORAGE: + return "ALTERNATIVE_STORAGE"; } } diff --git a/src/storage/v2/storage_mode.hpp b/src/storage/v2/storage_mode.hpp index f4a133f386a..98f209fa376 100644 --- a/src/storage/v2/storage_mode.hpp +++ b/src/storage/v2/storage_mode.hpp @@ -16,9 +16,15 @@ #include namespace memgraph::storage { -enum class StorageMode : std::uint8_t { IN_MEMORY_ANALYTICAL, IN_MEMORY_TRANSACTIONAL, ON_DISK_TRANSACTIONAL }; +enum class StorageMode : std::uint8_t { + IN_MEMORY_ANALYTICAL, + IN_MEMORY_TRANSACTIONAL, + ON_DISK_TRANSACTIONAL, + ALTERNATIVE_STORAGE +}; inline constexpr std::array storage_mode_mappings{ + std::pair{std::string_view{"ALTERNATIVE_STORAGE"}, memgraph::storage::StorageMode::ALTERNATIVE_STORAGE}, std::pair{std::string_view{"IN_MEMORY_TRANSACTIONAL"}, memgraph::storage::StorageMode::IN_MEMORY_TRANSACTIONAL}, std::pair{std::string_view{"IN_MEMORY_ANALYTICAL"}, memgraph::storage::StorageMode::IN_MEMORY_ANALYTICAL}, std::pair{std::string_view{"ON_DISK_TRANSACTIONAL"}, memgraph::storage::StorageMode::ON_DISK_TRANSACTIONAL}}; From 157c012bc53f2fc5d24e3838f87080e183044185 Mon Sep 17 00:00:00 2001 From: Marko Budiselic Date: Sun, 17 Mar 2024 02:45:13 +0000 Subject: [PATCH 10/25] Add custom vertex --- src/query/custom_cursors/create_node.cpp | 17 +++++++++------ src/storage/custom_storage/storage.cpp | 8 +++++-- src/storage/custom_storage/storage.hpp | 11 ++++++++++ src/storage/custom_storage/vertex.hpp | 27 ++++++++++++++++++++++++ 4 files changed, 55 insertions(+), 8 deletions(-) create mode 100644 src/storage/custom_storage/vertex.hpp diff --git a/src/query/custom_cursors/create_node.cpp b/src/query/custom_cursors/create_node.cpp index c2b3738138a..95f7b76333c 100644 --- a/src/query/custom_cursors/create_node.cpp +++ b/src/query/custom_cursors/create_node.cpp @@ -16,6 +16,7 @@ #include "query/interpret/frame.hpp" #include "query/plan/operator.hpp" #include "query/plan/scoped_profile.hpp" +#include "storage/custom_storage/vertex.hpp" #include "utils/logging.hpp" namespace memgraph::query::custom_cursors { @@ -26,8 +27,10 @@ struct QueryVertex {}; // frame. QueryVertex CreateVertex(const plan::NodeCreationInfo &node_info, Frame *frame, ExecutionContext &context) { auto &dba = *context.db_accessor; - for (auto label : node_info.labels) { - // TODO(gitbuda): Collect labels. + if (node_info.labels.size() != 1) { + throw QueryRuntimeException( + "0 or multiple labels not yet supported under CreateNode. You have to provide exactly 1 lable for any given " + "vertex/node."); } // NOTE: Evaluator should use the latest accessors, as modified in this query, when // setting properties on new nodes. @@ -47,10 +50,13 @@ QueryVertex CreateVertex(const plan::NodeCreationInfo &node_info, Frame *frame, properties.emplace(dba.NameToProperty(key), value); } } - // TODO(gitbuda): Set labels and properties. // TODO(gitbuda): Put vertex on the frame. (*frame)[node_info.symbol] = new_node; - // TODO(gitbuda): Return vertex to the cursor, needed to update the trigger. - context.custom_storage->Call(); + // (*frame)[node_info.symbol] = new_node; + // return (*frame)[node_info.symbol].ValueVertex(); + + auto new_node = memgraph::storage::custom_storage::Vertex{.labels = node_info.labels, .properties = properties}; + auto *vertex_ptr = context.custom_storage->AddVertex(std::move(new_node)); + SPDLOG_WARN("{}", context.custom_storage->VerticesNo()); return QueryVertex{}; } @@ -62,7 +68,6 @@ bool CreateNodeCursor::Pull(Frame &frame, ExecutionContext &context) { memgraph::query::plan::ScopedProfile profile{ComputeProfilingKey(this), "CreateNode", &context}; SPDLOG_WARN("CreateNodeCursor::Pull"); if (input_cursor_->Pull(frame, context)) { - // TODO(gitbuda): Take data from the operator and create the node. CreateVertex(logical_operator_.node_info_, &frame, context); return true; } diff --git a/src/storage/custom_storage/storage.cpp b/src/storage/custom_storage/storage.cpp index 52773aeff7f..74732aec24a 100644 --- a/src/storage/custom_storage/storage.cpp +++ b/src/storage/custom_storage/storage.cpp @@ -9,8 +9,6 @@ // by the Apache License, Version 2.0, included in the file // licenses/APL.txt. -#pragma once - #include "storage/custom_storage/storage.hpp" #include "spdlog/spdlog.h" #include "utils/logging.hpp" @@ -19,4 +17,10 @@ namespace memgraph::storage::custom_storage { void Storage::Call() { SPDLOG_WARN("Storage Call"); } +Vertex *Storage::AddVertex(Vertex &&vertex) { + return vertices_.emplace_back(std::make_unique(std::move(vertex))).get(); +} + +uint64_t Storage::VerticesNo() const { return vertices_.size(); } + } // namespace memgraph::storage::custom_storage diff --git a/src/storage/custom_storage/storage.hpp b/src/storage/custom_storage/storage.hpp index cd1142a450d..de21ca906a5 100644 --- a/src/storage/custom_storage/storage.hpp +++ b/src/storage/custom_storage/storage.hpp @@ -11,6 +11,10 @@ #pragma once +#include + +#include "storage/custom_storage/vertex.hpp" + namespace memgraph::storage::custom_storage { // Desing ideas: @@ -37,6 +41,13 @@ namespace memgraph::storage::custom_storage { class Storage { public: void Call(); + // TODO(gitbuda): Make AddVertex thread-safe / concurrent. + Vertex *AddVertex(Vertex &&vertex); + uint64_t VerticesNo() const; + + private: + // TODO(gitbuda): vector is a horrible choice here -> on resize -> :boom: -> list is here just TMP + std::list> vertices_; }; } // namespace memgraph::storage::custom_storage diff --git a/src/storage/custom_storage/vertex.hpp b/src/storage/custom_storage/vertex.hpp new file mode 100644 index 00000000000..f40981d7238 --- /dev/null +++ b/src/storage/custom_storage/vertex.hpp @@ -0,0 +1,27 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#pragma once + +#include +#include + +#include "storage/v2/id_types.hpp" +#include "storage/v2/property_value.hpp" + +namespace memgraph::storage::custom_storage { + +struct Vertex { + std::vector labels; + std::map properties; +}; + +} // namespace memgraph::storage::custom_storage From cdd841fb0279169cd00072185b91d0544904e3d1 Mon Sep 17 00:00:00 2001 From: Marko Budiselic Date: Tue, 2 Apr 2024 19:41:30 +0000 Subject: [PATCH 11/25] Sort the graphar schema a bit --- tests/manual/graphar.cpp | 176 +++++++++++++++++++++++++-------------- 1 file changed, 112 insertions(+), 64 deletions(-) diff --git a/tests/manual/graphar.cpp b/tests/manual/graphar.cpp index 89bd4fee195..fb868b59509 100644 --- a/tests/manual/graphar.cpp +++ b/tests/manual/graphar.cpp @@ -21,94 +21,142 @@ #include "utils/logging.hpp" -// TODO(gitbuda): Reiterate GARDatabase/GraphConfig -> make it complete -> just make a struct with sub-structs. - struct GARDatabaseConfig { - std::filesystem::path root{std::filesystem::temp_directory_path()}; // single database root directory - std::shared_ptr version; - std::string vertex_metadata_suffix{".vertex.yaml"}; - std::string edge_metadata_suffix{".edge.yaml"}; - std::filesystem::path vertex_folder_prefix{"vertex"}; - std::filesystem::path edge_folder_prefix{"edge"}; - uint64_t vertex_chunk_size{1024}; - uint64_t edge_chunk_size{1024}; - uint64_t edge_src_chunk_size{1024}; - uint64_t edge_dst_chunk_size{1024}; - bool is_directed{false}; - graphar::AdjListType ordering; -}; + struct PerDatabase { + std::filesystem::path root{std::filesystem::temp_directory_path()}; // single database root directory + std::shared_ptr version; + std::string vertex_metadata_suffix{".vertex.yaml"}; + std::string edge_metadata_suffix{".edge.yaml"}; + std::filesystem::path vertex_folder_prefix{"vertex"}; + std::filesystem::path edge_folder_prefix{"edge"}; + uint64_t vertex_chunk_size{1024}; + uint64_t edge_chunk_size{1024}; + uint64_t edge_src_chunk_size{1024}; + uint64_t edge_dst_chunk_size{1024}; + bool is_directed{false}; + graphar::AdjListType ordering; + } * base; -struct GARVertexType { - GARDatabaseConfig base; - std::string label; - graphar::PropertyGroupVector properties; - std::filesystem::path Prefix() const { return base.vertex_folder_prefix / std::filesystem::path(label); } - std::string SavePath() const { return base.root / std::filesystem::path(label + base.vertex_metadata_suffix); } -}; + struct GARVertexType { + PerDatabase *base{nullptr}; + void CheckBase() const { MG_ASSERT(base != nullptr); } + std::string label; + graphar::PropertyGroupVector properties; + std::filesystem::path Prefix() const { + CheckBase(); + return base->vertex_folder_prefix / std::filesystem::path(label); + } + std::string SavePath() const { + CheckBase(); + return base->root / std::filesystem::path(label + base->vertex_metadata_suffix); + } + }; + std::vector vertex_types; -struct GAREdgeType { - GARDatabaseConfig base; - std::string src_label; - std::string edge_type; - std::string dst_label; - graphar::PropertyGroupVector properties; - std::vector> adjacent_lists; - std::string src_type_dst{src_label + "__" + edge_type + "__" + dst_label}; - std::filesystem::path Prefix() const { return base.edge_folder_prefix / std::filesystem::path(src_type_dst); } - std::string SavePath() const { return base.root / std::filesystem::path(src_type_dst + base.edge_metadata_suffix); } + struct GAREdgeType { + PerDatabase *base{nullptr}; + void CheckBase() const { MG_ASSERT(base != nullptr); } + std::string src_label; + std::string edge_type; + std::string dst_label; + graphar::PropertyGroupVector properties; + std::vector> adjacent_lists; + std::string src_type_dst{src_label + "__" + edge_type + "__" + dst_label}; + std::filesystem::path Prefix() const { + CheckBase(); + return base->edge_folder_prefix / std::filesystem::path(src_type_dst); + } + std::string SavePath() const { + CheckBase(); + return base->root / std::filesystem::path(src_type_dst + base->edge_metadata_suffix); + } + }; + std::vector edge_types; }; -auto InitVertexType(const GARVertexType &type) { - auto vertex_info = graphar::CreateVertexInfo(type.label, type.base.vertex_chunk_size, type.properties, - type.Prefix(), type.base.version); +auto InitVertexType(const GARDatabaseConfig::GARVertexType &vertex_type) { + auto vertex_info = graphar::CreateVertexInfo(vertex_type.label, vertex_type.base->vertex_chunk_size, + vertex_type.properties, vertex_type.Prefix(), vertex_type.base->version); MG_ASSERT(!vertex_info->Dump().has_error()); - MG_ASSERT(vertex_info->Save(type.SavePath()).ok()); + MG_ASSERT(vertex_info->Save(vertex_type.SavePath()).ok()); return vertex_info; } -auto InitEdgeType(const GAREdgeType &edge_type) { +auto InitVertexTypes(const GARDatabaseConfig &config) { + std::vector> vertex_infos; + for (const auto &vertex_type : config.vertex_types) { + auto vertex_info = InitVertexType(vertex_type); + vertex_infos.push_back(vertex_info); + } + return vertex_infos; +} + +auto InitEdgeType(const GARDatabaseConfig::GAREdgeType &edge_type) { auto edge_info = graphar::CreateEdgeInfo( - edge_type.src_label, edge_type.edge_type, edge_type.dst_label, edge_type.base.edge_chunk_size, - edge_type.base.edge_src_chunk_size, edge_type.base.edge_dst_chunk_size, edge_type.base.is_directed, - edge_type.adjacent_lists, edge_type.properties, edge_type.Prefix(), edge_type.base.version); + edge_type.src_label, edge_type.edge_type, edge_type.dst_label, edge_type.base->edge_chunk_size, + edge_type.base->edge_src_chunk_size, edge_type.base->edge_dst_chunk_size, edge_type.base->is_directed, + edge_type.adjacent_lists, edge_type.properties, edge_type.Prefix(), edge_type.base->version); MG_ASSERT(!edge_info->Dump().has_error()); MG_ASSERT(edge_info->Save(edge_type.SavePath()).ok()); return edge_info; } +auto InitEdgeTypes(const GARDatabaseConfig &config) { + std::vector> edge_infos; + for (const auto &edge_type : config.edge_types) { + auto edge_info = InitEdgeType(edge_type); + edge_infos.push_back(edge_info); + } + return edge_infos; +} + int main(int argc, char **argv) { gflags::ParseCommandLineFlags(&argc, &argv, true); spdlog::set_level(spdlog::level::trace); - // schema - const auto database = GARDatabaseConfig{ - .root = "/tmp/gar/", - .version = std::make_shared(1), - }; + // "runtime" schema spec START + // node metadata auto property_vector_1 = {graphar::Property("id", graphar::int64(), true)}; auto property_vector_2 = {graphar::Property("domain", graphar::string(), false), graphar::Property("extra", graphar::string(), false)}; auto group1 = graphar::CreatePropertyGroup(property_vector_1, graphar::FileType::CSV); auto group2 = graphar::CreatePropertyGroup(property_vector_2, graphar::FileType::CSV); - const auto vertex_type = GARVertexType{.base = database, .label = "node", .properties = {group1, group2}}; - auto vertex_info = InitVertexType(vertex_type); - auto adjacent_lists = { - graphar::CreateAdjacentList(graphar::AdjListType::ordered_by_source, - graphar::FileType::CSV), - graphar::CreateAdjacentList(graphar::AdjListType::ordered_by_dest, - graphar::FileType::CSV)}; + // edge metadata + auto adjacent_lists = {graphar::CreateAdjacentList(graphar::AdjListType::ordered_by_source, graphar::FileType::CSV), + graphar::CreateAdjacentList(graphar::AdjListType::ordered_by_dest, graphar::FileType::CSV)}; auto property_vector_3 = {graphar::Property("created", graphar::string(), false)}; auto group3 = graphar::CreatePropertyGroup(property_vector_3, graphar::FileType::CSV); - const auto edge_type = GAREdgeType{.base = database, - .src_label = "node", - .edge_type = "LINK", - .dst_label = "node", - .properties = {group3}, - .adjacent_lists = adjacent_lists}; - auto edge_info = InitEdgeType(edge_type); + + GARDatabaseConfig::PerDatabase per_database = { + .root = "/tmp/gar/", + .version = std::make_shared(1), + }; + const auto db_config = GARDatabaseConfig{ + .base = &per_database, + .vertex_types = {{ + .base = &per_database, + .label = "node", + .properties = {group1, group2}, + }}, + .edge_types = {{ + .base = &per_database, + .src_label = "node", + .edge_type = "LINK", + .dst_label = "node", + .properties = {group3}, + .adjacent_lists = adjacent_lists, + }}, + }; + // "runtime" schema spec START + + // init GAR + auto &vertex_type = db_config.vertex_types[0]; + auto vertex_info = InitVertexTypes(db_config)[0]; + auto &edge_type = db_config.edge_types[0]; + auto edge_info = InitEdgeTypes(db_config)[0]; // vertex data partition 1 - graphar::builder::VerticesBuilder builder(vertex_info, vertex_type.base.root, 0); + graphar::builder::VerticesBuilder builder(vertex_info, vertex_type.base->root, 0); builder.SetValidateLevel(graphar::ValidateLevel::strong_validate); int vertex_count = 2; std::vector property_names = {"id", "domain"}; @@ -126,10 +174,11 @@ int main(int argc, char **argv) { spdlog::info("dump vertices collection successfully!"); builder.Clear(); MG_ASSERT(builder.GetNum() == 0); + // "runtime" schema spec START // vertex data partition 2 -> IMPORTANT: controlling start_vertex_index means partitioning & parallelization. - graphar::builder::VerticesBuilder builder2(vertex_info, vertex_type.base.root, - vertex_type.base.vertex_chunk_size * 1); + graphar::builder::VerticesBuilder builder2(vertex_info, vertex_type.base->root, + vertex_type.base->vertex_chunk_size * 1); builder.SetValidateLevel(graphar::ValidateLevel::strong_validate); vertex_count = 2; property_names = {"id", "domain", "extra"}; @@ -155,8 +204,7 @@ int main(int argc, char **argv) { MG_ASSERT(builder2.GetNum() == 0); // edge data - graphar::builder::EdgesBuilder builder3(edge_info, edge_type.base.root, - graphar::AdjListType::ordered_by_dest, 1025); + graphar::builder::EdgesBuilder builder3(edge_info, edge_type.base->root, graphar::AdjListType::ordered_by_dest, 1025); builder.SetValidateLevel(graphar::ValidateLevel::strong_validate); int edge_count = 4; property_names = {"created"}; From 5ea313a993e0d6661ad84c89dcae2c33e09e9808 Mon Sep 17 00:00:00 2001 From: Marko Budiselic Date: Tue, 2 Apr 2024 20:20:21 +0000 Subject: [PATCH 12/25] Put config under custom storage --- src/storage/custom_storage/CMakeLists.txt | 2 +- src/storage/custom_storage/gar_database.hpp | 113 ++++++++++++++++++++ src/storage/custom_storage/storage.hpp | 2 + tests/manual/graphar.cpp | 99 +---------------- 4 files changed, 121 insertions(+), 95 deletions(-) create mode 100644 src/storage/custom_storage/gar_database.hpp diff --git a/src/storage/custom_storage/CMakeLists.txt b/src/storage/custom_storage/CMakeLists.txt index f357bfa91ff..52ab2491102 100644 --- a/src/storage/custom_storage/CMakeLists.txt +++ b/src/storage/custom_storage/CMakeLists.txt @@ -2,4 +2,4 @@ set(mg_custom_storage_sources storage.cpp ) add_library(mg_custom_storage STATIC ${mg_custom_storage_sources}) -target_link_libraries(mg_custom_storage mg-utils) +target_link_libraries(mg_custom_storage mg-utils graphar) diff --git a/src/storage/custom_storage/gar_database.hpp b/src/storage/custom_storage/gar_database.hpp new file mode 100644 index 00000000000..741a28b155c --- /dev/null +++ b/src/storage/custom_storage/gar_database.hpp @@ -0,0 +1,113 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#pragma once + +#include +#include +#include + +#include + +#include "utils/logging.hpp" + +namespace memgraph::storage::custom_storage { + +struct GARDatabaseConfig { + struct PerDatabase { + std::filesystem::path root{std::filesystem::temp_directory_path()}; // single database root directory + std::shared_ptr version; + std::string vertex_metadata_suffix{".vertex.yaml"}; + std::string edge_metadata_suffix{".edge.yaml"}; + std::filesystem::path vertex_folder_prefix{"vertex"}; + std::filesystem::path edge_folder_prefix{"edge"}; + uint64_t vertex_chunk_size{1024}; + uint64_t edge_chunk_size{1024}; + uint64_t edge_src_chunk_size{1024}; + uint64_t edge_dst_chunk_size{1024}; + bool is_directed{false}; + graphar::AdjListType ordering; + } * base; + + struct GARVertexType { + PerDatabase *base{nullptr}; + void CheckBase() const { MG_ASSERT(base != nullptr); } + std::string label; + graphar::PropertyGroupVector properties; + std::filesystem::path Prefix() const { + CheckBase(); + return base->vertex_folder_prefix / std::filesystem::path(label); + } + std::string SavePath() const { + CheckBase(); + return base->root / std::filesystem::path(label + base->vertex_metadata_suffix); + } + }; + std::vector vertex_types; + + struct GAREdgeType { + PerDatabase *base{nullptr}; + void CheckBase() const { MG_ASSERT(base != nullptr); } + std::string src_label; + std::string edge_type; + std::string dst_label; + graphar::PropertyGroupVector properties; + std::vector> adjacent_lists; + std::string src_type_dst{src_label + "__" + edge_type + "__" + dst_label}; + std::filesystem::path Prefix() const { + CheckBase(); + return base->edge_folder_prefix / std::filesystem::path(src_type_dst); + } + std::string SavePath() const { + CheckBase(); + return base->root / std::filesystem::path(src_type_dst + base->edge_metadata_suffix); + } + }; + std::vector edge_types; +}; + +inline auto InitVertexType(const GARDatabaseConfig::GARVertexType &vertex_type) { + auto vertex_info = graphar::CreateVertexInfo(vertex_type.label, vertex_type.base->vertex_chunk_size, + vertex_type.properties, vertex_type.Prefix(), vertex_type.base->version); + MG_ASSERT(!vertex_info->Dump().has_error()); + MG_ASSERT(vertex_info->Save(vertex_type.SavePath()).ok()); + return vertex_info; +} + +inline auto InitVertexTypes(const GARDatabaseConfig &config) { + std::vector> vertex_infos; + for (const auto &vertex_type : config.vertex_types) { + auto vertex_info = InitVertexType(vertex_type); + vertex_infos.push_back(vertex_info); + } + return vertex_infos; +} + +inline auto InitEdgeType(const GARDatabaseConfig::GAREdgeType &edge_type) { + auto edge_info = graphar::CreateEdgeInfo( + edge_type.src_label, edge_type.edge_type, edge_type.dst_label, edge_type.base->edge_chunk_size, + edge_type.base->edge_src_chunk_size, edge_type.base->edge_dst_chunk_size, edge_type.base->is_directed, + edge_type.adjacent_lists, edge_type.properties, edge_type.Prefix(), edge_type.base->version); + MG_ASSERT(!edge_info->Dump().has_error()); + MG_ASSERT(edge_info->Save(edge_type.SavePath()).ok()); + return edge_info; +} + +inline auto InitEdgeTypes(const GARDatabaseConfig &config) { + std::vector> edge_infos; + for (const auto &edge_type : config.edge_types) { + auto edge_info = InitEdgeType(edge_type); + edge_infos.push_back(edge_info); + } + return edge_infos; +} + +} // namespace memgraph::storage::custom_storage diff --git a/src/storage/custom_storage/storage.hpp b/src/storage/custom_storage/storage.hpp index de21ca906a5..5b46f3236d1 100644 --- a/src/storage/custom_storage/storage.hpp +++ b/src/storage/custom_storage/storage.hpp @@ -13,6 +13,7 @@ #include +#include "storage/custom_storage/gar_database.hpp" #include "storage/custom_storage/vertex.hpp" namespace memgraph::storage::custom_storage { @@ -48,6 +49,7 @@ class Storage { private: // TODO(gitbuda): vector is a horrible choice here -> on resize -> :boom: -> list is here just TMP std::list> vertices_; + GARDatabaseConfig config_; }; } // namespace memgraph::storage::custom_storage diff --git a/tests/manual/graphar.cpp b/tests/manual/graphar.cpp index fb868b59509..a1f665b10c5 100644 --- a/tests/manual/graphar.cpp +++ b/tests/manual/graphar.cpp @@ -10,7 +10,6 @@ // licenses/APL.txt. #include -#include #include #include @@ -19,97 +18,9 @@ #include #include +#include "storage/custom_storage/gar_database.hpp" #include "utils/logging.hpp" -struct GARDatabaseConfig { - struct PerDatabase { - std::filesystem::path root{std::filesystem::temp_directory_path()}; // single database root directory - std::shared_ptr version; - std::string vertex_metadata_suffix{".vertex.yaml"}; - std::string edge_metadata_suffix{".edge.yaml"}; - std::filesystem::path vertex_folder_prefix{"vertex"}; - std::filesystem::path edge_folder_prefix{"edge"}; - uint64_t vertex_chunk_size{1024}; - uint64_t edge_chunk_size{1024}; - uint64_t edge_src_chunk_size{1024}; - uint64_t edge_dst_chunk_size{1024}; - bool is_directed{false}; - graphar::AdjListType ordering; - } * base; - - struct GARVertexType { - PerDatabase *base{nullptr}; - void CheckBase() const { MG_ASSERT(base != nullptr); } - std::string label; - graphar::PropertyGroupVector properties; - std::filesystem::path Prefix() const { - CheckBase(); - return base->vertex_folder_prefix / std::filesystem::path(label); - } - std::string SavePath() const { - CheckBase(); - return base->root / std::filesystem::path(label + base->vertex_metadata_suffix); - } - }; - std::vector vertex_types; - - struct GAREdgeType { - PerDatabase *base{nullptr}; - void CheckBase() const { MG_ASSERT(base != nullptr); } - std::string src_label; - std::string edge_type; - std::string dst_label; - graphar::PropertyGroupVector properties; - std::vector> adjacent_lists; - std::string src_type_dst{src_label + "__" + edge_type + "__" + dst_label}; - std::filesystem::path Prefix() const { - CheckBase(); - return base->edge_folder_prefix / std::filesystem::path(src_type_dst); - } - std::string SavePath() const { - CheckBase(); - return base->root / std::filesystem::path(src_type_dst + base->edge_metadata_suffix); - } - }; - std::vector edge_types; -}; - -auto InitVertexType(const GARDatabaseConfig::GARVertexType &vertex_type) { - auto vertex_info = graphar::CreateVertexInfo(vertex_type.label, vertex_type.base->vertex_chunk_size, - vertex_type.properties, vertex_type.Prefix(), vertex_type.base->version); - MG_ASSERT(!vertex_info->Dump().has_error()); - MG_ASSERT(vertex_info->Save(vertex_type.SavePath()).ok()); - return vertex_info; -} - -auto InitVertexTypes(const GARDatabaseConfig &config) { - std::vector> vertex_infos; - for (const auto &vertex_type : config.vertex_types) { - auto vertex_info = InitVertexType(vertex_type); - vertex_infos.push_back(vertex_info); - } - return vertex_infos; -} - -auto InitEdgeType(const GARDatabaseConfig::GAREdgeType &edge_type) { - auto edge_info = graphar::CreateEdgeInfo( - edge_type.src_label, edge_type.edge_type, edge_type.dst_label, edge_type.base->edge_chunk_size, - edge_type.base->edge_src_chunk_size, edge_type.base->edge_dst_chunk_size, edge_type.base->is_directed, - edge_type.adjacent_lists, edge_type.properties, edge_type.Prefix(), edge_type.base->version); - MG_ASSERT(!edge_info->Dump().has_error()); - MG_ASSERT(edge_info->Save(edge_type.SavePath()).ok()); - return edge_info; -} - -auto InitEdgeTypes(const GARDatabaseConfig &config) { - std::vector> edge_infos; - for (const auto &edge_type : config.edge_types) { - auto edge_info = InitEdgeType(edge_type); - edge_infos.push_back(edge_info); - } - return edge_infos; -} - int main(int argc, char **argv) { gflags::ParseCommandLineFlags(&argc, &argv, true); spdlog::set_level(spdlog::level::trace); @@ -127,11 +38,11 @@ int main(int argc, char **argv) { auto property_vector_3 = {graphar::Property("created", graphar::string(), false)}; auto group3 = graphar::CreatePropertyGroup(property_vector_3, graphar::FileType::CSV); - GARDatabaseConfig::PerDatabase per_database = { + memgraph::storage::custom_storage::GARDatabaseConfig::PerDatabase per_database = { .root = "/tmp/gar/", .version = std::make_shared(1), }; - const auto db_config = GARDatabaseConfig{ + const auto db_config = memgraph::storage::custom_storage::GARDatabaseConfig{ .base = &per_database, .vertex_types = {{ .base = &per_database, @@ -151,9 +62,9 @@ int main(int argc, char **argv) { // init GAR auto &vertex_type = db_config.vertex_types[0]; - auto vertex_info = InitVertexTypes(db_config)[0]; + auto vertex_info = memgraph::storage::custom_storage::InitVertexTypes(db_config)[0]; auto &edge_type = db_config.edge_types[0]; - auto edge_info = InitEdgeTypes(db_config)[0]; + auto edge_info = memgraph::storage::custom_storage::InitEdgeTypes(db_config)[0]; // vertex data partition 1 graphar::builder::VerticesBuilder builder(vertex_info, vertex_type.base->root, 0); From 9c871808e71e2c9762f7e15277db23fd718ff56e Mon Sep 17 00:00:00 2001 From: Marko Budiselic Date: Thu, 4 Apr 2024 14:14:15 +0000 Subject: [PATCH 13/25] Merge master and update clang tidy --- .clang-tidy | 1 + 1 file changed, 1 insertion(+) diff --git a/.clang-tidy b/.clang-tidy index c02b7152fc2..de39b7d11dd 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -47,6 +47,7 @@ Checks: '*, -llvmlibc-callee-namespace, -llvmlibc-implementation-in-namespace, -llvmlibc-restrict-system-libc-headers, + -llvmlibc-inline-function-decl, -misc-non-private-member-variables-in-classes, -modernize-avoid-c-arrays, -modernize-concat-nested-namespaces, From f79e131348621d1b4a79a2795fd583790af7be92 Mon Sep 17 00:00:00 2001 From: Marko Budiselic Date: Mon, 8 Apr 2024 08:58:59 +0000 Subject: [PATCH 14/25] Wire GARDatabase and custom storage part 1 --- src/storage/custom_storage/gar_database.hpp | 8 ++++---- src/storage/custom_storage/vertex.hpp | 5 +++++ tests/manual/graphar.cpp | 11 ++++++++++- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/src/storage/custom_storage/gar_database.hpp b/src/storage/custom_storage/gar_database.hpp index 741a28b155c..9ea66bb355e 100644 --- a/src/storage/custom_storage/gar_database.hpp +++ b/src/storage/custom_storage/gar_database.hpp @@ -29,10 +29,10 @@ struct GARDatabaseConfig { std::string edge_metadata_suffix{".edge.yaml"}; std::filesystem::path vertex_folder_prefix{"vertex"}; std::filesystem::path edge_folder_prefix{"edge"}; - uint64_t vertex_chunk_size{1024}; - uint64_t edge_chunk_size{1024}; - uint64_t edge_src_chunk_size{1024}; - uint64_t edge_dst_chunk_size{1024}; + int64_t vertex_chunk_size{1024}; + int64_t edge_chunk_size{1024}; + int64_t edge_src_chunk_size{1024}; + int64_t edge_dst_chunk_size{1024}; bool is_directed{false}; graphar::AdjListType ordering; } * base; diff --git a/src/storage/custom_storage/vertex.hpp b/src/storage/custom_storage/vertex.hpp index f40981d7238..fcfaa32cbae 100644 --- a/src/storage/custom_storage/vertex.hpp +++ b/src/storage/custom_storage/vertex.hpp @@ -11,6 +11,7 @@ #pragma once +#include #include #include @@ -24,4 +25,8 @@ struct Vertex { std::map properties; }; +struct GARVertex { + graphar::Vertex *vertex; +}; + } // namespace memgraph::storage::custom_storage diff --git a/tests/manual/graphar.cpp b/tests/manual/graphar.cpp index a1f665b10c5..ee75b801a46 100644 --- a/tests/manual/graphar.cpp +++ b/tests/manual/graphar.cpp @@ -9,11 +9,11 @@ // by the Apache License, Version 2.0, included in the file // licenses/APL.txt. -#include #include #include #include +#include #include #include #include @@ -134,5 +134,14 @@ int main(int argc, char **argv) { builder3.Clear(); MG_ASSERT(builder3.GetNum() == 0); + // std::string path = "/tmp/todo/data.yml"; + // auto graph_info = graphar::GraphInfo::Load(path).value(); + // std::string label = "person"; + // auto maybe_vertices_collection = + // graphar::VerticesCollection::Make(graph_info, label); + // MG_ASSERT(!maybe_vertices_collection.has_error()); + // auto vertices = maybe_vertices_collection.value(); + // auto tmp = vertices->find(0); + return 0; } From 5a0da20d1d81b23e1689c2bfee1c0a9afc65cbfb Mon Sep 17 00:00:00 2001 From: Marko Budiselic Date: Tue, 23 Apr 2024 13:49:27 +0000 Subject: [PATCH 15/25] Merge master --- src/storage/custom_storage/storage.cpp | 1 - src/storage/custom_storage/storage.hpp | 2 +- src/storage/custom_storage/types.hpp | 39 +++++++++++++++++++ tests/manual/CMakeLists.txt | 3 ++ .../vertex.hpp => tests/manual/pmr.cpp | 30 ++++++-------- 5 files changed, 55 insertions(+), 20 deletions(-) create mode 100644 src/storage/custom_storage/types.hpp rename src/storage/custom_storage/vertex.hpp => tests/manual/pmr.cpp (56%) diff --git a/src/storage/custom_storage/storage.cpp b/src/storage/custom_storage/storage.cpp index 74732aec24a..0e86a341ac1 100644 --- a/src/storage/custom_storage/storage.cpp +++ b/src/storage/custom_storage/storage.cpp @@ -11,7 +11,6 @@ #include "storage/custom_storage/storage.hpp" #include "spdlog/spdlog.h" -#include "utils/logging.hpp" namespace memgraph::storage::custom_storage { diff --git a/src/storage/custom_storage/storage.hpp b/src/storage/custom_storage/storage.hpp index 5b46f3236d1..aa25e7e7b31 100644 --- a/src/storage/custom_storage/storage.hpp +++ b/src/storage/custom_storage/storage.hpp @@ -14,7 +14,7 @@ #include #include "storage/custom_storage/gar_database.hpp" -#include "storage/custom_storage/vertex.hpp" +#include "storage/custom_storage/types.hpp" namespace memgraph::storage::custom_storage { diff --git a/src/storage/custom_storage/types.hpp b/src/storage/custom_storage/types.hpp new file mode 100644 index 00000000000..9fbebf0e079 --- /dev/null +++ b/src/storage/custom_storage/types.hpp @@ -0,0 +1,39 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#pragma once + +#include +#include +#include +#include + +// TODO(gitbuda): What is the right type for IDs? +// TODO(gitbuda): How to safely create all PMR values with minimal code and maximal flexibility? + +namespace memgraph::storage::custom_storage { + +using PropertyValue = std::variant; + +struct Vertex { + PropertyValue id; + std::pmr::vector labels; + std::pmr::unordered_map properties; +}; + +struct Edge { + PropertyValue src_id; + PropertyValue dst_id; + std::pmr::string edge_type; + std::pmr::unordered_map properties; +}; + +} // namespace memgraph::storage::custom_storage diff --git a/tests/manual/CMakeLists.txt b/tests/manual/CMakeLists.txt index 58cd6d6dc19..bc96ce03e3d 100644 --- a/tests/manual/CMakeLists.txt +++ b/tests/manual/CMakeLists.txt @@ -58,3 +58,6 @@ target_link_libraries(${test_prefix}ssl_server mg-communication) add_manual_test(graphar.cpp) target_link_libraries(${test_prefix}graphar graphar mg-utils) + +add_manual_test(pmr.cpp) +target_link_libraries(${test_prefix}pmr mg-utils) diff --git a/src/storage/custom_storage/vertex.hpp b/tests/manual/pmr.cpp similarity index 56% rename from src/storage/custom_storage/vertex.hpp rename to tests/manual/pmr.cpp index fcfaa32cbae..6b6e14c9ad8 100644 --- a/src/storage/custom_storage/vertex.hpp +++ b/tests/manual/pmr.cpp @@ -9,24 +9,18 @@ // by the Apache License, Version 2.0, included in the file // licenses/APL.txt. -#pragma once +#include +#include +#include -#include -#include -#include +int main() { + std::pmr::monotonic_buffer_resource pool; + std::pmr::vector data{&pool}; -#include "storage/v2/id_types.hpp" -#include "storage/v2/property_value.hpp" + data.emplace_back("bla"); -namespace memgraph::storage::custom_storage { - -struct Vertex { - std::vector labels; - std::map properties; -}; - -struct GARVertex { - graphar::Vertex *vertex; -}; - -} // namespace memgraph::storage::custom_storage + for (const auto &item : data) { + std::cout << item << std::endl; + } + return 0; +} From ebce4fae783905a1ab1f4da537d8e33128669b6e Mon Sep 17 00:00:00 2001 From: Marko Budiselic Date: Tue, 23 Apr 2024 13:58:38 +0000 Subject: [PATCH 16/25] Switch to the incubator URL --- libs/setup.sh | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/libs/setup.sh b/libs/setup.sh index ac1867193da..25636124eb1 100755 --- a/libs/setup.sh +++ b/libs/setup.sh @@ -162,7 +162,7 @@ declare -A secondary_urls=( ["nuraft"]="https://github.com/eBay/NuRaft.git" ["asio"]="https://github.com/chriskohlhoff/asio.git" ["mgcxx"]="http://github.com/memgraph/mgcxx.git" - ["graphar"]="https://github.com/alibaba/GraphAr.git" + ["graphar"]="https://github.com/apache/incubator-graphar.git" ) # antlr @@ -299,11 +299,10 @@ repo_clone_try_double "${primary_urls[asio]}" "${secondary_urls[asio]}" "asio" " ./prepare.sh popd - # mgcxx (text search) mgcxx_tag="v0.0.6" repo_clone_try_double "${primary_urls[mgcxx]}" "${secondary_urls[mgcxx]}" "mgcxx" "$mgcxx_tag" true -# + # GraphAr 2024-03 graphar_tag="v0.11.4" repo_clone_try_double "${primary_urls[graphar]}" "${secondary_urls[graphar]}" "graphar" "$graphar_tag" true From 5bff3833f9880f824ffaba6560b0f35fa41686c2 Mon Sep 17 00:00:00 2001 From: Marko Budiselic Date: Fri, 26 Apr 2024 13:47:27 +0000 Subject: [PATCH 17/25] Add more comments --- src/query/custom_cursors/bfs.hpp | 14 ++++++++++++++ src/query/custom_cursors/create_edge.hpp | 15 +++++++++++++++ src/query/custom_cursors/create_node.hpp | 2 +- src/storage/custom_storage/gar_database.hpp | 2 ++ src/storage/custom_storage/storage.hpp | 6 ++++++ src/storage/custom_storage/types.hpp | 9 ++++++++- 6 files changed, 46 insertions(+), 2 deletions(-) create mode 100644 src/query/custom_cursors/bfs.hpp create mode 100644 src/query/custom_cursors/create_edge.hpp diff --git a/src/query/custom_cursors/bfs.hpp b/src/query/custom_cursors/bfs.hpp new file mode 100644 index 00000000000..750496c3dc0 --- /dev/null +++ b/src/query/custom_cursors/bfs.hpp @@ -0,0 +1,14 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#pragma once + +// TODO(gitbuda): To make BFS fast -> global cache required. diff --git a/src/query/custom_cursors/create_edge.hpp b/src/query/custom_cursors/create_edge.hpp new file mode 100644 index 00000000000..3dfb8644a67 --- /dev/null +++ b/src/query/custom_cursors/create_edge.hpp @@ -0,0 +1,15 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#pragma once + +// TODO(gitbuda): To make this Cypher CREATE edge fast -> add global cache. +// TODO(gitbuda): To make create edge fast -> introduce a new semantic. diff --git a/src/query/custom_cursors/create_node.hpp b/src/query/custom_cursors/create_node.hpp index 6217abe8c98..62016382ca9 100644 --- a/src/query/custom_cursors/create_node.hpp +++ b/src/query/custom_cursors/create_node.hpp @@ -20,7 +20,7 @@ namespace memgraph::query::plan { class CreateNode; -} +} // namespace memgraph::query::plan namespace memgraph::query::custom_cursors { diff --git a/src/storage/custom_storage/gar_database.hpp b/src/storage/custom_storage/gar_database.hpp index 9ea66bb355e..723d415fd3d 100644 --- a/src/storage/custom_storage/gar_database.hpp +++ b/src/storage/custom_storage/gar_database.hpp @@ -19,6 +19,8 @@ #include "utils/logging.hpp" +// TODO(gitbuda): Add simple implementation of transactional support. + namespace memgraph::storage::custom_storage { struct GARDatabaseConfig { diff --git a/src/storage/custom_storage/storage.hpp b/src/storage/custom_storage/storage.hpp index aa25e7e7b31..bd3da8a40c2 100644 --- a/src/storage/custom_storage/storage.hpp +++ b/src/storage/custom_storage/storage.hpp @@ -27,6 +27,12 @@ namespace memgraph::storage::custom_storage { // * parallelization // * ASYNC disk/network access // * per database, maybe even isolated data cache +// * global cache required for both fast Cypher create_edge + BFS +// * https://www.boost.org/doc/libs/1_85_0/boost/compute/detail/lru_cache.hpp -> is there a concurrent one? +// * https://github.com/facebook/hhvm/blob/master/hphp/util/concurrent-lru-cache.h +// * https://github.com/tstarling/thread-safe-lru... +// * in ON_DISK we have Transaction -> Skiplist +// * take a look under utils/cache.hpp and utils/lru_cache.hpp // Target queries: // CREATE (:Label {props}); // single vertex create diff --git a/src/storage/custom_storage/types.hpp b/src/storage/custom_storage/types.hpp index 9fbebf0e079..6a61c520631 100644 --- a/src/storage/custom_storage/types.hpp +++ b/src/storage/custom_storage/types.hpp @@ -21,11 +21,18 @@ namespace memgraph::storage::custom_storage { +// TODO(gitbuda): Use memgraph::storage::PropertyValue here beucase it will be allocator aware. using PropertyValue = std::variant; +// NOTE: This should be allocator aware because after import, all that could be deleted. +// * C++Weekly#235 -> https://www.youtube.com/watch?v=vXJ1dwJ9QkI +// * C++Weekly#236 -> https://www.youtube.com/watch?v=2LAsqp7UrNs + +// TODO(gitbuda): Make and test Vertex being allocator aware. struct Vertex { + // This is here because of the hybrid-schema option (having different type of IDs) PropertyValue id; - std::pmr::vector labels; + std::pmr::vector labels; // NOTE: GAR only supports one label per vertex! std::pmr::unordered_map properties; }; From eb905f49715cd5262179b67dbc338d26b1df387c Mon Sep 17 00:00:00 2001 From: Marko Budiselic Date: Fri, 3 May 2024 19:12:18 +0000 Subject: [PATCH 18/25] Patch GraphAR because it doesn't compile under the latest toolchain --- libs/.gitignore | 1 + libs/graphar0.11.4.patch | 15 +++++++++++++++ src/query/custom_cursors/create_node.cpp | 2 +- src/storage/custom_storage/types.hpp | 10 ++++++++-- 4 files changed, 25 insertions(+), 3 deletions(-) create mode 100644 libs/graphar0.11.4.patch diff --git a/libs/.gitignore b/libs/.gitignore index 4e8fa965533..ef246310e4c 100644 --- a/libs/.gitignore +++ b/libs/.gitignore @@ -8,3 +8,4 @@ !antlr4.10.1.patch !rocksdb8.1.1.patch !nuraft.patch +!graphar0.11.4.patch diff --git a/libs/graphar0.11.4.patch b/libs/graphar0.11.4.patch new file mode 100644 index 00000000000..4de6297c4db --- /dev/null +++ b/libs/graphar0.11.4.patch @@ -0,0 +1,15 @@ +diff --git a/cpp/src/filesystem.cc b/cpp/src/filesystem.cc +index cd55a6e..2692121 100644 +--- a/cpp/src/filesystem.cc ++++ b/cpp/src/filesystem.cc +@@ -76,8 +76,8 @@ static Status CastToLargeOffsetArray( + return Status::OK(); + } + +-Result ParseFileSystemUri(const std::string& uri_string) { +- arrow::internal::Uri uri; ++Result ParseFileSystemUri(const std::string& uri_string) { ++ arrow::util::Uri uri; + RETURN_NOT_ARROW_OK(uri.Parse(uri_string)); + return std::move(uri); + } diff --git a/src/query/custom_cursors/create_node.cpp b/src/query/custom_cursors/create_node.cpp index e3dbe99b70a..030f0182536 100644 --- a/src/query/custom_cursors/create_node.cpp +++ b/src/query/custom_cursors/create_node.cpp @@ -16,7 +16,7 @@ #include "query/interpret/frame.hpp" #include "query/plan/operator.hpp" #include "query/plan/scoped_profile.hpp" -#include "storage/custom_storage/vertex.hpp" +#include "storage/custom_storage/types.hpp" #include "utils/logging.hpp" namespace memgraph::query::custom_cursors { diff --git a/src/storage/custom_storage/types.hpp b/src/storage/custom_storage/types.hpp index 6a61c520631..9b952a2f66a 100644 --- a/src/storage/custom_storage/types.hpp +++ b/src/storage/custom_storage/types.hpp @@ -12,10 +12,14 @@ #pragma once #include +#include #include #include #include +#include "storage/v2/id_types.hpp" +#include "storage/v2/property_value.hpp" + // TODO(gitbuda): What is the right type for IDs? // TODO(gitbuda): How to safely create all PMR values with minimal code and maximal flexibility? @@ -32,8 +36,10 @@ using PropertyValue = std::variant; struct Vertex { // This is here because of the hybrid-schema option (having different type of IDs) PropertyValue id; - std::pmr::vector labels; // NOTE: GAR only supports one label per vertex! - std::pmr::unordered_map properties; + std::vector labels; + std::map properties; + // std::pmr::vector labels; // NOTE: GAR only supports one label per vertex! + // std::pmr::unordered_map properties; }; struct Edge { From 1ac84d13c973b3d60bef28ad793331462111a65e Mon Sep 17 00:00:00 2001 From: Marko Budiselic Date: Fri, 31 May 2024 14:29:30 +0000 Subject: [PATCH 19/25] Merge master and a few new comments --- libs/setup.sh | 1 + src/storage/custom_storage/gar_database.hpp | 1 + src/storage/custom_storage/types.hpp | 6 ++++-- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/libs/setup.sh b/libs/setup.sh index fa822c7cf53..31e92a2c7cc 100755 --- a/libs/setup.sh +++ b/libs/setup.sh @@ -309,3 +309,4 @@ repo_clone_try_double "${primary_urls[mgcxx]}" "${secondary_urls[mgcxx]}" "mgcxx # GraphAr 2024-03 graphar_tag="v0.11.4" repo_clone_try_double "${primary_urls[graphar]}" "${secondary_urls[graphar]}" "graphar" "$graphar_tag" true +git apply ../graphar0.11.4.patch diff --git a/src/storage/custom_storage/gar_database.hpp b/src/storage/custom_storage/gar_database.hpp index 723d415fd3d..10675fe3559 100644 --- a/src/storage/custom_storage/gar_database.hpp +++ b/src/storage/custom_storage/gar_database.hpp @@ -20,6 +20,7 @@ #include "utils/logging.hpp" // TODO(gitbuda): Add simple implementation of transactional support. +// TODO(gitbuda): Since only one label per node is supported -> make a reserved property for other labels. namespace memgraph::storage::custom_storage { diff --git a/src/storage/custom_storage/types.hpp b/src/storage/custom_storage/types.hpp index 9b952a2f66a..79709538fef 100644 --- a/src/storage/custom_storage/types.hpp +++ b/src/storage/custom_storage/types.hpp @@ -11,8 +11,8 @@ #pragma once -#include #include +#include #include #include #include @@ -20,12 +20,14 @@ #include "storage/v2/id_types.hpp" #include "storage/v2/property_value.hpp" +// TODO(gitbuda): To create edges, GAR internal vertex id for a given label is required -> calculate of propagate. // TODO(gitbuda): What is the right type for IDs? // TODO(gitbuda): How to safely create all PMR values with minimal code and maximal flexibility? namespace memgraph::storage::custom_storage { -// TODO(gitbuda): Use memgraph::storage::PropertyValue here beucase it will be allocator aware. +// TODO(gitbuda): Use memgraph::storage::PropertyValue here beucase it will be allocator aware under +// https://github.com/memgraph/memgraph/pull/1895. using PropertyValue = std::variant; // NOTE: This should be allocator aware because after import, all that could be deleted. From 4780d1afdd67de2f5f0fa965d33c78fca4735634 Mon Sep 17 00:00:00 2001 From: Marko Budiselic Date: Sun, 1 Sep 2024 20:20:28 +0000 Subject: [PATCH 20/25] Upgrade to GraphAr v0.12 --- libs/.gitignore | 1 - libs/CMakeLists.txt | 2 +- libs/graphar0.11.4.patch | 15 --------------- libs/setup.sh | 5 ++--- src/storage/custom_storage/gar_database.hpp | 2 +- 5 files changed, 4 insertions(+), 21 deletions(-) delete mode 100644 libs/graphar0.11.4.patch diff --git a/libs/.gitignore b/libs/.gitignore index ef246310e4c..4e8fa965533 100644 --- a/libs/.gitignore +++ b/libs/.gitignore @@ -8,4 +8,3 @@ !antlr4.10.1.patch !rocksdb8.1.1.patch !nuraft.patch -!graphar0.11.4.patch diff --git a/libs/CMakeLists.txt b/libs/CMakeLists.txt index 874ae44d9dd..b09cf0ba7d5 100644 --- a/libs/CMakeLists.txt +++ b/libs/CMakeLists.txt @@ -341,6 +341,6 @@ import_library(mgcxx_text_search STATIC ${MGCXX_TEXT_SEARCH_LIBRARY} ${MGCXX_INC # Setup GraphAr import_external_library(graphar SHARED - ${CMAKE_CURRENT_SOURCE_DIR}/graphar/cpp/lib/libgar.so + ${CMAKE_CURRENT_SOURCE_DIR}/graphar/cpp/lib/libgraphar.so ${CMAKE_CURRENT_SOURCE_DIR}/graphar/cpp/include SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/graphar/cpp) diff --git a/libs/graphar0.11.4.patch b/libs/graphar0.11.4.patch deleted file mode 100644 index 4de6297c4db..00000000000 --- a/libs/graphar0.11.4.patch +++ /dev/null @@ -1,15 +0,0 @@ -diff --git a/cpp/src/filesystem.cc b/cpp/src/filesystem.cc -index cd55a6e..2692121 100644 ---- a/cpp/src/filesystem.cc -+++ b/cpp/src/filesystem.cc -@@ -76,8 +76,8 @@ static Status CastToLargeOffsetArray( - return Status::OK(); - } - --Result ParseFileSystemUri(const std::string& uri_string) { -- arrow::internal::Uri uri; -+Result ParseFileSystemUri(const std::string& uri_string) { -+ arrow::util::Uri uri; - RETURN_NOT_ARROW_OK(uri.Parse(uri_string)); - return std::move(uri); - } diff --git a/libs/setup.sh b/libs/setup.sh index 31e92a2c7cc..eebef7bf952 100755 --- a/libs/setup.sh +++ b/libs/setup.sh @@ -306,7 +306,6 @@ popd mgcxx_tag="v0.0.6" repo_clone_try_double "${primary_urls[mgcxx]}" "${secondary_urls[mgcxx]}" "mgcxx" "$mgcxx_tag" true -# GraphAr 2024-03 -graphar_tag="v0.11.4" +# GraphAr 2024-07-02 +graphar_tag="v0.12.0" repo_clone_try_double "${primary_urls[graphar]}" "${secondary_urls[graphar]}" "graphar" "$graphar_tag" true -git apply ../graphar0.11.4.patch diff --git a/src/storage/custom_storage/gar_database.hpp b/src/storage/custom_storage/gar_database.hpp index 10675fe3559..e2f0f537287 100644 --- a/src/storage/custom_storage/gar_database.hpp +++ b/src/storage/custom_storage/gar_database.hpp @@ -15,7 +15,7 @@ #include #include -#include +#include #include "utils/logging.hpp" From 10bf29de097ecdb2c6e120fa3493a390a8d79bf3 Mon Sep 17 00:00:00 2001 From: Marko Budiselic Date: Sat, 14 Sep 2024 13:47:48 +0000 Subject: [PATCH 21/25] Add example GAR graph metadata and reading --- src/storage/custom_storage/gar_database.hpp | 28 ++++-- tests/manual/graphar.cpp | 101 ++++++++++++-------- 2 files changed, 80 insertions(+), 49 deletions(-) diff --git a/src/storage/custom_storage/gar_database.hpp b/src/storage/custom_storage/gar_database.hpp index e2f0f537287..05ac5db9917 100644 --- a/src/storage/custom_storage/gar_database.hpp +++ b/src/storage/custom_storage/gar_database.hpp @@ -15,19 +15,19 @@ #include #include -#include +#include "graphar/api.h" +#include "graphar/fwd.h" #include "utils/logging.hpp" -// TODO(gitbuda): Add simple implementation of transactional support. -// TODO(gitbuda): Since only one label per node is supported -> make a reserved property for other labels. - namespace memgraph::storage::custom_storage { struct GARDatabaseConfig { struct PerDatabase { std::filesystem::path root{std::filesystem::temp_directory_path()}; // single database root directory std::shared_ptr version; + std::string graph_name{"graph"}; + std::string graph_metadata_suffix{".graph.yaml"}; std::string vertex_metadata_suffix{".vertex.yaml"}; std::string edge_metadata_suffix{".edge.yaml"}; std::filesystem::path vertex_folder_prefix{"vertex"}; @@ -38,10 +38,12 @@ struct GARDatabaseConfig { int64_t edge_dst_chunk_size{1024}; bool is_directed{false}; graphar::AdjListType ordering; - } * base; + std::string SavePath() const { return root / std::filesystem::path(graph_name + graph_metadata_suffix); } + }; + std::shared_ptr base{nullptr}; struct GARVertexType { - PerDatabase *base{nullptr}; + std::shared_ptr base{nullptr}; void CheckBase() const { MG_ASSERT(base != nullptr); } std::string label; graphar::PropertyGroupVector properties; @@ -57,7 +59,7 @@ struct GARDatabaseConfig { std::vector vertex_types; struct GAREdgeType { - PerDatabase *base{nullptr}; + std::shared_ptr base{nullptr}; void CheckBase() const { MG_ASSERT(base != nullptr); } std::string src_label; std::string edge_type; @@ -86,7 +88,7 @@ inline auto InitVertexType(const GARDatabaseConfig::GARVertexType &vertex_type) } inline auto InitVertexTypes(const GARDatabaseConfig &config) { - std::vector> vertex_infos; + graphar::VertexInfoVector vertex_infos; for (const auto &vertex_type : config.vertex_types) { auto vertex_info = InitVertexType(vertex_type); vertex_infos.push_back(vertex_info); @@ -105,7 +107,7 @@ inline auto InitEdgeType(const GARDatabaseConfig::GAREdgeType &edge_type) { } inline auto InitEdgeTypes(const GARDatabaseConfig &config) { - std::vector> edge_infos; + graphar::EdgeInfoVector edge_infos; for (const auto &edge_type : config.edge_types) { auto edge_info = InitEdgeType(edge_type); edge_infos.push_back(edge_info); @@ -113,4 +115,12 @@ inline auto InitEdgeTypes(const GARDatabaseConfig &config) { return edge_infos; } +inline auto InitGraph(const GARDatabaseConfig &config, const graphar::VertexInfoVector &vertex_infos, + const graphar::EdgeInfoVector &edge_infos) { + auto graph_info = graphar::CreateGraphInfo(config.base->graph_name, vertex_infos, edge_infos, config.base->root); + MG_ASSERT(!graph_info->Dump().has_error()); + MG_ASSERT(graph_info->Save(config.base->SavePath()).ok()); + return graph_info; +} + } // namespace memgraph::storage::custom_storage diff --git a/tests/manual/graphar.cpp b/tests/manual/graphar.cpp index ee75b801a46..cb5f2b32638 100644 --- a/tests/manual/graphar.cpp +++ b/tests/manual/graphar.cpp @@ -12,20 +12,18 @@ #include #include -#include -#include -#include -#include -#include +#include "gflags/gflags.h" +#include "graphar/graph.h" +#include "graphar/writer/edges_builder.h" +#include "graphar/writer/vertices_builder.h" #include "storage/custom_storage/gar_database.hpp" #include "utils/logging.hpp" -int main(int argc, char **argv) { - gflags::ParseCommandLineFlags(&argc, &argv, true); - spdlog::set_level(spdlog::level::trace); +// https://github.com/apache/incubator-graphar +// https://graphar.apache.org/docs/specification/implementation-status - // "runtime" schema spec START +auto GraphSchema() { // node metadata auto property_vector_1 = {graphar::Property("id", graphar::int64(), true)}; auto property_vector_2 = {graphar::Property("domain", graphar::string(), false), @@ -38,19 +36,22 @@ int main(int argc, char **argv) { auto property_vector_3 = {graphar::Property("created", graphar::string(), false)}; auto group3 = graphar::CreatePropertyGroup(property_vector_3, graphar::FileType::CSV); - memgraph::storage::custom_storage::GARDatabaseConfig::PerDatabase per_database = { + using PerDatabase = memgraph::storage::custom_storage::GARDatabaseConfig::PerDatabase; + auto per_database = std::make_shared(PerDatabase{ .root = "/tmp/gar/", .version = std::make_shared(1), - }; - const auto db_config = memgraph::storage::custom_storage::GARDatabaseConfig{ - .base = &per_database, + .graph_name = "test", + .vertex_chunk_size = 4, + }); + return memgraph::storage::custom_storage::GARDatabaseConfig{ + .base = per_database, .vertex_types = {{ - .base = &per_database, + .base = per_database, .label = "node", .properties = {group1, group2}, }}, .edge_types = {{ - .base = &per_database, + .base = per_database, .src_label = "node", .edge_type = "LINK", .dst_label = "node", @@ -58,25 +59,36 @@ int main(int argc, char **argv) { .adjacent_lists = adjacent_lists, }}, }; - // "runtime" schema spec START +} + +int main(int argc, char **argv) { + gflags::ParseCommandLineFlags(&argc, &argv, true); + spdlog::set_level(spdlog::level::trace); - // init GAR + auto db_config = GraphSchema(); auto &vertex_type = db_config.vertex_types[0]; - auto vertex_info = memgraph::storage::custom_storage::InitVertexTypes(db_config)[0]; + auto vertex_infos = memgraph::storage::custom_storage::InitVertexTypes(db_config); + auto vertex_info = vertex_infos[0]; auto &edge_type = db_config.edge_types[0]; - auto edge_info = memgraph::storage::custom_storage::InitEdgeTypes(db_config)[0]; + auto edge_infos = memgraph::storage::custom_storage::InitEdgeTypes(db_config); + auto edge_info = edge_infos[0]; + auto graph_info = memgraph::storage::custom_storage::InitGraph(db_config, vertex_infos, edge_infos); + spdlog::info("GAR initialized"); - // vertex data partition 1 + spdlog::info("== CREATE =="); + // CREATE vertex data partition 1 graphar::builder::VerticesBuilder builder(vertex_info, vertex_type.base->root, 0); builder.SetValidateLevel(graphar::ValidateLevel::strong_validate); - int vertex_count = 2; - std::vector property_names = {"id", "domain"}; - std::vector id = {0, 1}; - std::vector domain = {"google.com", "memgraph.com"}; + int vertex_count = 4; + std::vector property_names = {"id", "domain", "extra"}; + std::vector id = {0, 1, 2, 3}; + std::vector domain = {"google.com", "memgraph.com", "nvidia.com", "facebook.com"}; + std::vector extra = {"{key:value}", "{}", "", ""}; for (int i = 0; i < vertex_count; i++) { graphar::builder::Vertex v; - v.AddProperty(property_names[0], id[i]); - v.AddProperty(property_names[1], domain[i]); + v.AddProperty(property_names[0], id[i % 4]); + v.AddProperty(property_names[1], domain[i % 4]); + v.AddProperty(property_names[2], extra[i % 4]); MG_ASSERT(builder.AddVertex(v).ok()); } MG_ASSERT(builder.GetNum() == vertex_count); @@ -85,9 +97,14 @@ int main(int argc, char **argv) { spdlog::info("dump vertices collection successfully!"); builder.Clear(); MG_ASSERT(builder.GetNum() == 0); - // "runtime" schema spec START - // vertex data partition 2 -> IMPORTANT: controlling start_vertex_index means partitioning & parallelization. + // TODO: CREATE vertex data partition 2 + // IMPORTANT: controlling start_vertex_index means partitioning & parallelization, + // BUT it only works if all chunks are monotonically populated + // (from 0 to total vertex_count, no missing vertices). + // The way how how VerticesBuilder is storing size is limiting + // because it just overrides the number of nodes during the Dump call. + // NOTE: It's possible to use low-level primitives to achieve parallelization. graphar::builder::VerticesBuilder builder2(vertex_info, vertex_type.base->root, vertex_type.base->vertex_chunk_size * 1); builder.SetValidateLevel(graphar::ValidateLevel::strong_validate); @@ -95,15 +112,15 @@ int main(int argc, char **argv) { property_names = {"id", "domain", "extra"}; id = {2, 3}; domain = {"nvidia.com", "facebook.com"}; - std::vector extra = {"{key:value}", "{}"}; + std::vector extra2 = {"{key:value}", "{}"}; for (int i = 0; i < vertex_count; i++) { graphar::builder::Vertex v; v.AddProperty(property_names[0], id[i]); v.AddProperty(property_names[1], domain[i]); if (i == 0) { - v.AddProperty(property_names[2], extra[0]); + v.AddProperty(property_names[2], extra2[0]); } else { - v.AddProperty(property_names[2], extra[1]); + v.AddProperty(property_names[2], extra2[1]); } MG_ASSERT(builder2.AddVertex(v).ok()); } @@ -114,7 +131,7 @@ int main(int argc, char **argv) { builder2.Clear(); MG_ASSERT(builder2.GetNum() == 0); - // edge data + // CREATE edge data graphar::builder::EdgesBuilder builder3(edge_info, edge_type.base->root, graphar::AdjListType::ordered_by_dest, 1025); builder.SetValidateLevel(graphar::ValidateLevel::strong_validate); int edge_count = 4; @@ -134,14 +151,18 @@ int main(int argc, char **argv) { builder3.Clear(); MG_ASSERT(builder3.GetNum() == 0); - // std::string path = "/tmp/todo/data.yml"; - // auto graph_info = graphar::GraphInfo::Load(path).value(); - // std::string label = "person"; - // auto maybe_vertices_collection = - // graphar::VerticesCollection::Make(graph_info, label); - // MG_ASSERT(!maybe_vertices_collection.has_error()); - // auto vertices = maybe_vertices_collection.value(); - // auto tmp = vertices->find(0); + spdlog::info("== READ =="); + // MATCH vertex data + std::string graph_metadata_path = vertex_type.SavePath(); + auto maybe_vertices = graphar::VerticesCollection::Make(graph_info, vertex_type.label); + auto vertices = maybe_vertices.value(); + spdlog::info(vertices->size()); + auto v_it_begin = vertices->begin(), v_it_end = vertices->end(); + for (auto it = v_it_begin; it != v_it_end; ++it) { + auto vertex = *it; + spdlog::info(std::to_string(vertex.property("id").value()) + " " + + vertex.property("domain").value() + " " + vertex.property("extra").value()); + } return 0; } From ab7e4346d3aca582acebe24fff199a5be28d63b4 Mon Sep 17 00:00:00 2001 From: Marko Budiselic Date: Sun, 22 Sep 2024 13:35:25 +0000 Subject: [PATCH 22/25] Play with the PMR (WIP) --- tests/manual/pmr.cpp | 89 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 84 insertions(+), 5 deletions(-) diff --git a/tests/manual/pmr.cpp b/tests/manual/pmr.cpp index 6b6e14c9ad8..754253a5351 100644 --- a/tests/manual/pmr.cpp +++ b/tests/manual/pmr.cpp @@ -9,18 +9,97 @@ // by the Apache License, Version 2.0, included in the file // licenses/APL.txt. +#include +#include #include #include #include +#include + +// Credits to https://github.com/lefticus/cpp_weekly/blob/master/PMR/1_experiments.cpp +template +void print_line(int offset, ItrBegin begin, const ItrEnd end) { + fmt::print("(dec) {:02x}: {:3}\n", offset, fmt::join(begin, end, " ")); + fmt::print("(hex) {:02x}: {:02x}\n", offset, fmt::join(begin, end, " ")); + fmt::print("(asc) {:02x}:", offset); + std::for_each(begin, end, [](const auto c) { + if (std::isgraph(c)) { + fmt::print(" {} ", static_cast(c)); + } else { + fmt::print(" \\{:03o}", c); + } + }); + fmt::print("\n"); +} +template +void print_buffer(const std::string_view title, const Buffer &buffer, const Container &container) { + fmt::print("==============={:^10}==============\n", title); + auto begin = buffer.begin(); + fmt::print("Buffer Address Start: {}\n", static_cast(buffer.data())); + fmt::print("Buffer Address End: {}\n", static_cast(buffer.data() + buffer.size())); + for (const auto &elem : container) { + fmt::print(" Item Address: {}\n", static_cast(&elem)); + } + for (std::size_t offset = 0; offset < buffer.size(); offset += 16) { + print_line(offset, std::next(begin, offset), std::next(begin, offset + 16)); + } + fmt::print("\n"); +} + +// https://github.com/lefticus/cpp_weekly/blob/master/PMR/2_aa_type.cpp +// NOTE: For primitive fields allocator doesn't matter. +struct Node { + int64_t id; + int64_t label; + Node(int64_t id, int64_t label) : id(id), label(label) {} + + // std::pmr::string label; + // using allocator_type = std::pmr::polymorphic_allocator<>; + // Node() : Node(allocator_type{}) {} + // explicit Node(allocator_type alloc) {} + // + // Node(const Node &other, allocator_type alloc = {}) : id(other.id), label(other.label) { + // } + // Node(Node &&) = default; + // Node(Node &&other, allocator_type alloc) : id(other.id), label(other.label) { + // other.id = 99; + // other.label = 99; + // } + // Node &operator=(const Node &rhs) = default; + // Node &operator=(Node &&rhs) = default; + // + // ~Node() = default; + // explicit Node(int64_t id, int64_t label) : id(id), label(label) {} +}; + int main() { - std::pmr::monotonic_buffer_resource pool; - std::pmr::vector data{&pool}; + std::array buffer{}; + std::pmr::monotonic_buffer_resource pool(buffer.data(), buffer.size()); - data.emplace_back("bla"); + // // NOTE: vector doesn't live in the buffer, only the data itself is in the buffer. + // // NOTE: pmr objects are longer (std::string 32B, std::pmr::string 40B) + // // NOTE: + // // * std::string => ptr_data + size + data + null + // // * pmr::string => ptr_alloc + ptr_data + size + data + null + // std::pmr::vector data1{&pool}; + // data1.reserve(2); + // print_buffer("initial", buffer, ""); + // data1.emplace_back("foo"); + // print_buffer("data - foo", buffer, data1); + // data1.emplace_back("a very long long bar string"); + // print_buffer("data - foo & bar", buffer, data1); - for (const auto &item : data) { - std::cout << item << std::endl; + std::pmr::vector data2{&pool}; + data2.reserve(2); + print_buffer("initial", buffer, ""); + data2.emplace_back(Node{77, 78}); + // data2[0].id = 77; + // data2[0].label = 78; + data2.emplace_back(std::move(data2[0])); + print_buffer("data", buffer, data2); + for (const auto &item : data2) { + std::cout << item.id << " " << item.label << std::endl; } return 0; } From 3e8fb3d93bf01a2047f3445d367b97ea29c5b9b6 Mon Sep 17 00:00:00 2001 From: Marko Budiselic Date: Sun, 22 Sep 2024 18:13:04 +0000 Subject: [PATCH 23/25] Add a bit more complex PMR example --- tests/manual/pmr.cpp | 78 +++++++++++++++++++------------------------- 1 file changed, 33 insertions(+), 45 deletions(-) diff --git a/tests/manual/pmr.cpp b/tests/manual/pmr.cpp index 754253a5351..dbafecde6c2 100644 --- a/tests/manual/pmr.cpp +++ b/tests/manual/pmr.cpp @@ -11,7 +11,6 @@ #include #include -#include #include #include @@ -51,55 +50,44 @@ void print_buffer(const std::string_view title, const Buffer &buffer, const Cont // NOTE: For primitive fields allocator doesn't matter. struct Node { int64_t id; - int64_t label; - Node(int64_t id, int64_t label) : id(id), label(label) {} + std::pmr::string label; + using allocator_type = std::pmr::polymorphic_allocator<>; - // std::pmr::string label; - // using allocator_type = std::pmr::polymorphic_allocator<>; - // Node() : Node(allocator_type{}) {} - // explicit Node(allocator_type alloc) {} - // - // Node(const Node &other, allocator_type alloc = {}) : id(other.id), label(other.label) { - // } - // Node(Node &&) = default; - // Node(Node &&other, allocator_type alloc) : id(other.id), label(other.label) { - // other.id = 99; - // other.label = 99; - // } - // Node &operator=(const Node &rhs) = default; - // Node &operator=(Node &&rhs) = default; - // - // ~Node() = default; - // explicit Node(int64_t id, int64_t label) : id(id), label(label) {} + explicit Node(const int64_t id, const std::string_view label, allocator_type alloc = {}) + : id(id), label(label, alloc) {} + Node(const Node &other, allocator_type alloc = {}) : id(other.id), label(other.label, alloc) {} + Node(Node &&) = default; + Node(Node &&other, allocator_type alloc) : id(other.id), label(std::move(other.label), alloc) {} + Node &operator=(const Node &rhs) = default; + Node &operator=(Node &&rhs) = default; + ~Node() = default; + + allocator_type get_allocator() const { return label.get_allocator(); } }; int main() { - std::array buffer{}; - std::pmr::monotonic_buffer_resource pool(buffer.data(), buffer.size()); - - // // NOTE: vector doesn't live in the buffer, only the data itself is in the buffer. - // // NOTE: pmr objects are longer (std::string 32B, std::pmr::string 40B) - // // NOTE: - // // * std::string => ptr_data + size + data + null - // // * pmr::string => ptr_alloc + ptr_data + size + data + null - // std::pmr::vector data1{&pool}; - // data1.reserve(2); - // print_buffer("initial", buffer, ""); - // data1.emplace_back("foo"); - // print_buffer("data - foo", buffer, data1); - // data1.emplace_back("a very long long bar string"); - // print_buffer("data - foo & bar", buffer, data1); + std::array buffer1{}; + std::pmr::monotonic_buffer_resource pool1(buffer1.data(), buffer1.size()); + // NOTE: vector doesn't live in the buffer, only the data itself is inside the buffer. + // NOTE: pmr objects are longer (std::string 32B, std::pmr::string 40B) + // NOTE: + // * std::string => ptr_data + size + data + null + // * pmr::string => ptr_alloc + ptr_data + size + data + null + std::pmr::vector data1{&pool1}; + data1.reserve(2); + print_buffer("initial", buffer1, ""); + data1.emplace_back("foo"); + print_buffer("data - foo", buffer1, data1); + data1.emplace_back("a very long long bar string"); + print_buffer("data - foo & bar", buffer1, data1); - std::pmr::vector data2{&pool}; + std::array buffer2{}; + std::pmr::monotonic_buffer_resource pool2(buffer2.data(), buffer2.size()); + std::pmr::vector data2{&pool2}; data2.reserve(2); - print_buffer("initial", buffer, ""); - data2.emplace_back(Node{77, 78}); - // data2[0].id = 77; - // data2[0].label = 78; - data2.emplace_back(std::move(data2[0])); - print_buffer("data", buffer, data2); - for (const auto &item : data2) { - std::cout << item.id << " " << item.label << std::endl; - } + print_buffer("initial", buffer2, ""); + data2.emplace_back(Node(77, "bla")); + print_buffer("data", buffer2, data2); + return 0; } From 574c70fd60e0a2389e55b8701ceb818232a4233d Mon Sep 17 00:00:00 2001 From: Marko Budiselic Date: Sun, 22 Sep 2024 20:20:16 +0000 Subject: [PATCH 24/25] Change Vertex and Edge types --- src/storage/custom_storage/types.hpp | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/storage/custom_storage/types.hpp b/src/storage/custom_storage/types.hpp index 3aa4f475358..02e24a3a3f7 100644 --- a/src/storage/custom_storage/types.hpp +++ b/src/storage/custom_storage/types.hpp @@ -14,7 +14,6 @@ #include #include #include -#include #include #include "storage/v2/id_types.hpp" @@ -22,29 +21,30 @@ // TODO(gitbuda): To create edge, GAR internal vertex id for a given label is required -> calculate of propagate. // TODO(gitbuda): What is the right type for IDs? -// TODO(gitbuda): How to safely create all PMR values with minimal code and maximal flexibility? - -namespace memgraph::storage::custom_storage { - -// NOTE: This should be allocator aware because after import, all that could be deleted. +// TODO(gitbuda): How to safely create all PMR values with minimal code and maximal flexibility? -> PMR has overhead. +// NOTE: PMR reasoning -> this should be allocator aware because after import, all that could be deleted. // * C++Weekly#235 -> https://www.youtube.com/watch?v=vXJ1dwJ9QkI // * C++Weekly#236 -> https://www.youtube.com/watch?v=2LAsqp7UrNs +// * --> take a look at tests/manual/pmr.cpp how to make an allocator aware type. + +namespace memgraph::storage::custom_storage { -// TODO(gitbuda): Make and test Vertex being allocator aware. struct Vertex { // This is here because of the hybrid-schema option (having different type of IDs) - memgraph::storage::PropertyValue id; - std::vector labels; - std::map properties; - // std::pmr::vector labels; // NOTE: GAR only supports one label per vertex! - // std::pmr::unordered_map properties; + PropertyValue id; + std::vector labels; // NOTE: GAR only supports one label per vertex! + // Consider replacing map with PropertyStore because it's more efficient. + // NOTE: map is below just because that's comes from the query engine (example purposes). + std::map properties; }; struct Edge { + PropertyValue id; PropertyValue src_id; PropertyValue dst_id; - std::pmr::string edge_type; - std::pmr::unordered_map properties; + EdgeTypeId edge_type; + // Consider replacing map with PropertyStore because it's more efficient. + std::unordered_map properties; }; } // namespace memgraph::storage::custom_storage From aac9b798bfd6feeab4044f18acf21290363d9120 Mon Sep 17 00:00:00 2001 From: Marko Budiselic Date: Sat, 5 Oct 2024 18:24:10 +0000 Subject: [PATCH 25/25] Add print_alloc memory resource + create_container func --- tests/manual/pmr.cpp | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/tests/manual/pmr.cpp b/tests/manual/pmr.cpp index dbafecde6c2..23cefe00850 100644 --- a/tests/manual/pmr.cpp +++ b/tests/manual/pmr.cpp @@ -11,6 +11,7 @@ #include #include +#include #include #include @@ -65,7 +66,42 @@ struct Node { allocator_type get_allocator() const { return label.get_allocator(); } }; +// thanks to Rahil Baber +// Prints if new/delete gets used. +class print_alloc : public std::pmr::memory_resource { + private: + void *do_allocate(std::size_t bytes, std::size_t alignment) override { + std::cout << "Allocating " << bytes << '\n'; + return std::pmr::new_delete_resource()->allocate(bytes, alignment); + } + void do_deallocate(void *p, std::size_t bytes, std::size_t alignment) override { + std::cout << "Deallocating " << bytes << ": '"; + for (std::size_t i = 0; i < bytes; ++i) { + std::cout << *(static_cast(p) + i); + } + std::cout << "'\n"; + return std::pmr::new_delete_resource()->deallocate(p, bytes, alignment); + } + bool do_is_equal(const std::pmr::memory_resource &other) const noexcept override { + return std::pmr::new_delete_resource()->is_equal(other); + } +}; + +// This is useful because of the problem with initializer lists + alloc + reserve (remember initializer lists are +// broken). +template +auto create_container(auto *resource, Values &&...values) { + Container result{resource}; + result.reserve(sizeof...(values)); + (result.emplace_back(std::forward(values)), ...); + return result; +}; + int main() { + // IMPORTANT: This is a super nice debugging technique -> if PMR is not set, the default resource will tell us. + print_alloc mem; + std::pmr::set_default_resource(&mem); + std::array buffer1{}; std::pmr::monotonic_buffer_resource pool1(buffer1.data(), buffer1.size()); // NOTE: vector doesn't live in the buffer, only the data itself is inside the buffer.