diff --git a/libs/CMakeLists.txt b/libs/CMakeLists.txt index 1060052d8d2..d50aff41e94 100644 --- a/libs/CMakeLists.txt +++ b/libs/CMakeLists.txt @@ -336,6 +336,11 @@ mark_as_advanced(MGCXX_INCLUDE_DIR TANTIVY_TEXT_SEARCH_LIBRARY MGCXX_TEXT_SEARCH import_library(tantivy_text_search STATIC ${TANTIVY_TEXT_SEARCH_LIBRARY} ${MGCXX_INCLUDE_DIR} mgcxx-proj) import_library(mgcxx_text_search STATIC ${MGCXX_TEXT_SEARCH_LIBRARY} ${MGCXX_INCLUDE_DIR} mgcxx-proj) - # setup strong_type (cmake sub_directory tolerant) add_subdirectory(strong_type EXCLUDE_FROM_ALL) + +# Setup GraphAr +import_external_library(graphar SHARED + ${CMAKE_CURRENT_SOURCE_DIR}/graphar/cpp/lib/libgraphar.so + ${CMAKE_CURRENT_SOURCE_DIR}/graphar/cpp/include + SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/graphar/cpp) diff --git a/libs/setup.sh b/libs/setup.sh index 21a1c2c61f1..fb2217e3037 100755 --- a/libs/setup.sh +++ b/libs/setup.sh @@ -157,6 +157,7 @@ declare -A primary_urls=( ["nuraft"]="http://$local_cache_host/git/NuRaft.git" ["asio"]="http://$local_cache_host/git/asio.git" ["mgcxx"]="http://$local_cache_host/git/mgcxx.git" + ["graphar"]="http://$local_cache_host/git/GraphAr.git" ["strong_type"]="http://$local_cache_host/git/strong_type.git" ) @@ -191,6 +192,7 @@ declare -A secondary_urls=( ["asio"]="https://github.com/chriskohlhoff/asio.git" ["mgcxx"]="https://github.com/memgraph/mgcxx.git" ["strong_type"]="https://github.com/rollbear/strong_type.git" + ["graphar"]="https://github.com/apache/incubator-graphar.git" ) # antlr @@ -337,3 +339,7 @@ repo_clone_try_double "${primary_urls[mgcxx]}" "${secondary_urls[mgcxx]}" "mgcxx # strong_type v14 strong_type_ref="v14" repo_clone_try_double "${primary_urls[strong_type]}" "${secondary_urls[strong_type]}" "strong_type" "$strong_type_ref" + +# GraphAr 2024-07-02 +graphar_tag="v0.12.0" +repo_clone_try_double "${primary_urls[graphar]}" "${secondary_urls[graphar]}" "graphar" "$graphar_tag" true diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 733f2ec6502..2f377f5b47e 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -9,7 +9,7 @@ add_subdirectory(kvstore) add_subdirectory(telemetry) add_subdirectory(communication) add_subdirectory(memory) -add_subdirectory(storage/v2) +add_subdirectory(storage) add_subdirectory(integrations) add_subdirectory(query) add_subdirectory(glue) diff --git a/src/flags/experimental.cpp b/src/flags/experimental.cpp index 9cf965d76a3..f2cdf95c68e 100644 --- a/src/flags/experimental.cpp +++ b/src/flags/experimental.cpp @@ -48,6 +48,7 @@ auto const canonicalize_string = [](auto &&rng) { namespace memgraph::flags { auto const mapping = std::map{std::pair{"text-search"sv, Experiments::TEXT_SEARCH}, + std::pair{"alternative-storage"sv, Experiments::ALTERNATIVE_STORAGE}, std::pair{"high-availability"sv, Experiments::HIGH_AVAILABILITY}}; auto ExperimentsInstance() -> Experiments & { diff --git a/src/flags/experimental.hpp b/src/flags/experimental.hpp index c21d1719356..40a94974455 100644 --- a/src/flags/experimental.hpp +++ b/src/flags/experimental.hpp @@ -26,6 +26,7 @@ namespace memgraph::flags { enum class Experiments : uint8_t { TEXT_SEARCH = 1 << 0, HIGH_AVAILABILITY = 1 << 1, + ALTERNATIVE_STORAGE = 1 << 2, }; bool AreExperimentsEnabled(Experiments experiments); diff --git a/src/memgraph.cpp b/src/memgraph.cpp index 5bbbf061851..ab594cbd587 100644 --- a/src/memgraph.cpp +++ b/src/memgraph.cpp @@ -557,6 +557,7 @@ int main(int argc, char **argv) { *auth_, FLAGS_data_recovery_on_startup #endif ); + auto custom_storage = std::make_unique(); // Note: Now that all system's subsystems are initialised (dbms & auth) // We can now initialise the recovery of replication (which will include those subsystems) @@ -590,6 +591,8 @@ int main(int argc, char **argv) { auto &interpreter_context_ = memgraph::query::InterpreterContextHolder::GetInstance(); MG_ASSERT(db_acc, "Failed to access the main database"); + // TODO(gitbuda): Init moved here because tests are constructing the interpreter context. + interpreter_context_.custom_storage = custom_storage.get(); memgraph::query::procedure::gModuleRegistry.SetModulesDirectory(memgraph::flags::ParseQueryModulesDirectory(), FLAGS_data_directory); diff --git a/src/query/CMakeLists.txt b/src/query/CMakeLists.txt index 9f0c5a59d5c..3c9f9e878a8 100644 --- a/src/query/CMakeLists.txt +++ b/src/query/CMakeLists.txt @@ -78,6 +78,7 @@ target_sources(mg-query string_helpers.hpp ) +add_subdirectory(custom_cursors) target_link_libraries(mg-query PUBLIC @@ -96,6 +97,7 @@ target_link_libraries(mg-query mg::system mg-flags mg-dbms + mg_custom_cursors mg-events PRIVATE mg-module-support diff --git a/src/query/context.hpp b/src/query/context.hpp index 0e53f602633..82748365a36 100644 --- a/src/query/context.hpp +++ b/src/query/context.hpp @@ -22,6 +22,7 @@ #include "query/trigger.hpp" #include "utils/async_timer.hpp" +#include "storage/custom_storage/storage.hpp" #include "query/frame_change.hpp" #include "query/hops_limit.hpp" @@ -79,6 +80,7 @@ inline std::vector NamesToLabels(const std::vector global cache required. diff --git a/src/query/custom_cursors/create_edge.hpp b/src/query/custom_cursors/create_edge.hpp new file mode 100644 index 00000000000..3dfb8644a67 --- /dev/null +++ b/src/query/custom_cursors/create_edge.hpp @@ -0,0 +1,15 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#pragma once + +// TODO(gitbuda): To make this Cypher CREATE edge fast -> add global cache. +// TODO(gitbuda): To make create edge fast -> introduce a new semantic. diff --git a/src/query/custom_cursors/create_node.cpp b/src/query/custom_cursors/create_node.cpp new file mode 100644 index 00000000000..030f0182536 --- /dev/null +++ b/src/query/custom_cursors/create_node.cpp @@ -0,0 +1,83 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#include "query/custom_cursors/create_node.hpp" +#include "query/context.hpp" +#include "query/custom_cursors/utils.hpp" +#include "query/interpret/eval.hpp" +#include "query/interpret/frame.hpp" +#include "query/plan/operator.hpp" +#include "query/plan/scoped_profile.hpp" +#include "storage/custom_storage/types.hpp" +#include "utils/logging.hpp" + +namespace memgraph::query::custom_cursors { + +struct QueryVertex {}; + +// Creates a vertex on this GraphDb. Returns a reference to vertex placed on the +// frame. +QueryVertex CreateVertex(const plan::NodeCreationInfo &node_info, Frame *frame, ExecutionContext &context) { + auto &dba = *context.db_accessor; + if (node_info.labels.size() != 1) { + throw QueryRuntimeException( + "0 or multiple labels not yet supported under CreateNode. You have to provide exactly 1 lable for any given " + "vertex/node."); + } + // NOTE: Evaluator should use the latest accessors, as modified in this query, when + // setting properties on new nodes. + // NOTE: Evaluator is using query::DBAccessor of default storage mode (IN_MEM_TX), for props mapping & storage mode. + ExpressionEvaluator evaluator(frame, context.symbol_table, context.evaluation_context, context.db_accessor, + storage::View::NEW); + // TODO: PropsSetChecked allocates a PropertyValue, make it use context.memory + // when we update PropertyValue with custom allocator. + std::map properties; + if (const auto *node_info_properties = std::get_if(&node_info.properties)) { + for (const auto &[key, value_expression] : *node_info_properties) { + properties.emplace(key, value_expression->Accept(evaluator)); + } + } else { + auto property_map = evaluator.Visit(*std::get(node_info.properties)); + for (const auto &[key, value] : property_map.ValueMap()) { + properties.emplace(dba.NameToProperty(key), value); + } + } + // TODO(gitbuda): Put vertex on the frame. (*frame)[node_info.symbol] = new_node; + // (*frame)[node_info.symbol] = new_node; + // return (*frame)[node_info.symbol].ValueVertex(); + + // TODO(gitbuda): node_info.labels change type -> add the transformation. + // auto new_node = memgraph::storage::custom_storage::Vertex{.labels = node_info.labels, .properties = properties}; + auto new_node = memgraph::storage::custom_storage::Vertex{.labels = {}, .properties = properties}; + auto *vertex_ptr = context.custom_storage->AddVertex(std::move(new_node)); + SPDLOG_WARN("{}", context.custom_storage->VerticesNo()); + return QueryVertex{}; +} + +CreateNodeCursor::CreateNodeCursor(const plan::CreateNode &logical_operator, plan::UniqueCursorPtr input_cursor) + : logical_operator_(logical_operator), input_cursor_(std::move(input_cursor)) {} + +bool CreateNodeCursor::Pull(Frame &frame, ExecutionContext &context) { + utils::MemoryTracker::OutOfMemoryExceptionEnabler oom_exception; + memgraph::query::plan::ScopedProfile profile{ComputeProfilingKey(this), "CreateNode", &context}; + SPDLOG_WARN("CreateNodeCursor::Pull"); + if (input_cursor_->Pull(frame, context)) { + CreateVertex(logical_operator_.node_info_, &frame, context); + return true; + } + return false; +} + +void CreateNodeCursor::Shutdown() { input_cursor_->Shutdown(); } + +void CreateNodeCursor::Reset() { input_cursor_->Reset(); } + +} // namespace memgraph::query::custom_cursors diff --git a/src/query/custom_cursors/create_node.hpp b/src/query/custom_cursors/create_node.hpp new file mode 100644 index 00000000000..62016382ca9 --- /dev/null +++ b/src/query/custom_cursors/create_node.hpp @@ -0,0 +1,39 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#pragma once + +#include "query/plan/cursor.hpp" + +// TASKS: +// 1. check label creation access rule +// 2. create vertex +// 3. inform the trigger + +namespace memgraph::query::plan { +class CreateNode; +} // namespace memgraph::query::plan + +namespace memgraph::query::custom_cursors { + +class CreateNodeCursor : public memgraph::query::plan::Cursor { + public: + explicit CreateNodeCursor(const plan::CreateNode &logical_operator, plan::UniqueCursorPtr input_cursor); + bool Pull(Frame &frame, ExecutionContext &context) override; + void Shutdown() override; + void Reset() override; + + private: + const plan::CreateNode &logical_operator_; + const plan::UniqueCursorPtr input_cursor_; +}; + +} // namespace memgraph::query::custom_cursors diff --git a/src/query/custom_cursors/once.cpp b/src/query/custom_cursors/once.cpp new file mode 100644 index 00000000000..352e25dbf56 --- /dev/null +++ b/src/query/custom_cursors/once.cpp @@ -0,0 +1,36 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#include "query/custom_cursors/once.hpp" +#include "query/context.hpp" +#include "query/custom_cursors/utils.hpp" +#include "query/interpret/frame.hpp" +#include "query/plan/scoped_profile.hpp" +#include "utils/logging.hpp" + +namespace memgraph::query::custom_cursors { + +bool OnceCursor::Pull(Frame & /*unused*/, ExecutionContext &context) { + utils::MemoryTracker::OutOfMemoryExceptionEnabler oom_exception; + memgraph::query::plan::ScopedProfile profile{ComputeProfilingKey(this), "Once", &context}; + SPDLOG_WARN("OnceCursor::Pull"); + if (!did_pull_) { + did_pull_ = true; + return true; + } + return false; +} + +void OnceCursor::Shutdown() {} + +void OnceCursor::Reset() { did_pull_ = false; } + +} // namespace memgraph::query::custom_cursors diff --git a/src/query/custom_cursors/once.hpp b/src/query/custom_cursors/once.hpp new file mode 100644 index 00000000000..fe37393b7a7 --- /dev/null +++ b/src/query/custom_cursors/once.hpp @@ -0,0 +1,29 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#pragma once + +#include "query/plan/cursor.hpp" + +namespace memgraph::query::custom_cursors { + +class OnceCursor : public memgraph::query::plan::Cursor { + public: + OnceCursor() = default; + bool Pull(Frame & /*unused*/, ExecutionContext &context) override; + void Shutdown() override; + void Reset() override; + + private: + bool did_pull_{false}; +}; + +} // namespace memgraph::query::custom_cursors diff --git a/src/query/custom_cursors/produce.cpp b/src/query/custom_cursors/produce.cpp new file mode 100644 index 00000000000..42c66cf66d0 --- /dev/null +++ b/src/query/custom_cursors/produce.cpp @@ -0,0 +1,34 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#include "query/custom_cursors/produce.hpp" +#include "query/context.hpp" +#include "query/custom_cursors/utils.hpp" +#include "query/interpret/frame.hpp" +#include "spdlog/spdlog.h" +#include "utils/logging.hpp" + +namespace memgraph::query::custom_cursors { + +ProduceCursor::ProduceCursor(plan::UniqueCursorPtr input_cursor) : input_cursor_(std::move(input_cursor)) {} + +bool ProduceCursor::Pull(Frame &frame, ExecutionContext &context) { + utils::MemoryTracker::OutOfMemoryExceptionEnabler oom_exception; + memgraph::query::plan::ScopedProfile profile{ComputeProfilingKey(this), "Produce", &context}; + SPDLOG_WARN("Produce"); + return input_cursor_->Pull(frame, context); +} + +void ProduceCursor::Shutdown() { input_cursor_->Shutdown(); } + +void ProduceCursor::Reset() { input_cursor_->Reset(); } + +} // namespace memgraph::query::custom_cursors diff --git a/src/query/custom_cursors/produce.hpp b/src/query/custom_cursors/produce.hpp new file mode 100644 index 00000000000..1ce9dd1d76b --- /dev/null +++ b/src/query/custom_cursors/produce.hpp @@ -0,0 +1,29 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#pragma once + +#include "query/plan/cursor.hpp" + +namespace memgraph::query::custom_cursors { + +class ProduceCursor : public memgraph::query::plan::Cursor { + public: + explicit ProduceCursor(plan::UniqueCursorPtr input_cursor); + bool Pull(Frame &frame, ExecutionContext &context) override; + void Shutdown() override; + void Reset() override; + + private: + const plan::UniqueCursorPtr input_cursor_; +}; + +} // namespace memgraph::query::custom_cursors diff --git a/src/query/custom_cursors/scanall.cpp b/src/query/custom_cursors/scanall.cpp new file mode 100644 index 00000000000..f13066e1171 --- /dev/null +++ b/src/query/custom_cursors/scanall.cpp @@ -0,0 +1,35 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#include "query/custom_cursors/scanall.hpp" +#include "query/context.hpp" +#include "query/custom_cursors/utils.hpp" +#include "query/interpret/frame.hpp" +#include "query/plan/scoped_profile.hpp" + +namespace memgraph::query::custom_cursors { + +ScanAllCursor::ScanAllCursor(Symbol output_symbol, plan::UniqueCursorPtr input_cursor) + : output_symbol_(std::move(output_symbol)), input_cursor_(std::move(input_cursor)) {} + +bool ScanAllCursor::Pull(Frame &frame, ExecutionContext &context) { + utils::MemoryTracker::OutOfMemoryExceptionEnabler oom_exception; + memgraph::query::plan::ScopedProfile profile{ComputeProfilingKey(this), "ScanAll", &context}; + SPDLOG_WARN("ScanAll"); + context.custom_storage->Call(); + return input_cursor_->Pull(frame, context); +} + +void ScanAllCursor::Shutdown() { input_cursor_->Shutdown(); } + +void ScanAllCursor::Reset() { input_cursor_->Reset(); } + +} // namespace memgraph::query::custom_cursors diff --git a/src/query/custom_cursors/scanall.hpp b/src/query/custom_cursors/scanall.hpp new file mode 100644 index 00000000000..46db3be112b --- /dev/null +++ b/src/query/custom_cursors/scanall.hpp @@ -0,0 +1,31 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#pragma once + +#include "query/frontend/semantic/symbol.hpp" +#include "query/plan/cursor.hpp" + +namespace memgraph::query::custom_cursors { + +class ScanAllCursor : public memgraph::query::plan::Cursor { + public: + explicit ScanAllCursor(Symbol output_symbol, plan::UniqueCursorPtr input_cursor); + bool Pull(Frame &frame, ExecutionContext &context) override; + void Shutdown() override; + void Reset() override; + + private: + const Symbol output_symbol_; + const plan::UniqueCursorPtr input_cursor_; +}; + +} // namespace memgraph::query::custom_cursors diff --git a/src/query/custom_cursors/utils.hpp b/src/query/custom_cursors/utils.hpp new file mode 100644 index 00000000000..562c48ea590 --- /dev/null +++ b/src/query/custom_cursors/utils.hpp @@ -0,0 +1,22 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#pragma once + +#include + +#include "query/plan/scoped_profile.hpp" + +template +uint64_t ComputeProfilingKey(const T *obj) { + static_assert(sizeof(T *) == sizeof(uint64_t)); + return reinterpret_cast(obj); +} diff --git a/src/query/interpreter.cpp b/src/query/interpreter.cpp index e5841279bb6..ba12d491d3a 100644 --- a/src/query/interpreter.cpp +++ b/src/query/interpreter.cpp @@ -88,6 +88,7 @@ #include "replication/config.hpp" #include "replication/state.hpp" #include "spdlog/spdlog.h" +#include "storage/custom_storage/storage.hpp" #include "storage/v2/disk/storage.hpp" #include "storage/v2/edge.hpp" #include "storage/v2/edge_import_mode.hpp" @@ -1981,6 +1982,7 @@ PullPlan::PullPlan(const std::shared_ptr plan, const Parameters &pa cursor_(plan->plan().MakeCursor(execution_memory)), frame_(plan->symbol_table().max_position(), execution_memory), memory_limit_(memory_limit) { + ctx_.custom_storage = interpreter_context->custom_storage; ctx_.hops_limit = query::HopsLimit{hops_limit}; ctx_.db_accessor = dba; ctx_.symbol_table = plan->symbol_table(); diff --git a/src/query/interpreter_context.cpp b/src/query/interpreter_context.cpp index b844eeab7a7..4bac346c2be 100644 --- a/src/query/interpreter_context.cpp +++ b/src/query/interpreter_context.cpp @@ -14,6 +14,7 @@ #include "query/interpreter_context.hpp" #include "query/interpreter.hpp" +#include "storage/custom_storage/storage.hpp" #include "system/include/system/system.hpp" namespace memgraph::query { diff --git a/src/query/interpreter_context.hpp b/src/query/interpreter_context.hpp index 51e43397b87..c2e77caf478 100644 --- a/src/query/interpreter_context.hpp +++ b/src/query/interpreter_context.hpp @@ -24,6 +24,7 @@ #include "query/replication_query_handler.hpp" #include "query/typed_value.hpp" #include "replication/state.hpp" +#include "storage/custom_storage/storage.hpp" #include "storage/v2/config.hpp" #include "storage/v2/transaction.hpp" #include "system/state.hpp" @@ -57,6 +58,8 @@ struct QueryUserOrRole; */ struct InterpreterContext { memgraph::dbms::DbmsHandler *dbms_handler; + // TODO(gitbuda): The storage should be under multi-tenancy -> it figure out. + memgraph::storage::custom_storage::Storage *custom_storage; // Internal const InterpreterConfig config; diff --git a/src/query/plan/cursor.hpp b/src/query/plan/cursor.hpp new file mode 100644 index 00000000000..bdbaf3c29d3 --- /dev/null +++ b/src/query/plan/cursor.hpp @@ -0,0 +1,71 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#pragma once + +#include +#include + +#include "utils/memory.hpp" + +namespace memgraph::query { + +struct ExecutionContext; +class Frame; + +namespace plan { + +/// Base class for iteration cursors of @c LogicalOperator classes. +/// +/// Each @c LogicalOperator must produce a concrete @c Cursor, which provides +/// the iteration mechanism. +class Cursor { + public: + /// Run an iteration of a @c LogicalOperator. + /// + /// Since operators may be chained, the iteration may pull results from + /// multiple operators. + /// + /// @param Frame May be read from or written to while performing the + /// iteration. + /// @param ExecutionContext Used to get the position of symbols in frame and + /// other information. + /// + /// @throws QueryRuntimeException if something went wrong with execution + virtual bool Pull(Frame &, ExecutionContext &) = 0; + + /// Resets the Cursor to its initial state. + virtual void Reset() = 0; + + /// Perform cleanup which may throw an exception + virtual void Shutdown() = 0; + + virtual ~Cursor() = default; +}; + +/// unique_ptr to Cursor managed with a custom deleter. +/// This allows us to use utils::MemoryResource for allocation. +using UniqueCursorPtr = std::unique_ptr>; + +template +std::unique_ptr> MakeUniqueCursorPtr(utils::Allocator allocator, + TArgs &&...args) { + auto *cursor = allocator.template new_object(std::forward(args)...); + auto dtr = [allocator](Cursor *base_ptr) mutable { + auto *p = static_cast(base_ptr); + allocator.delete_object(p); + }; + // TODO: not std::function + return std::unique_ptr>(cursor, std::move(dtr)); +} + +} // namespace plan +} // namespace memgraph::query diff --git a/src/query/plan/operator.cpp b/src/query/plan/operator.cpp index d6d9bb0e314..35d070365e6 100644 --- a/src/query/plan/operator.cpp +++ b/src/query/plan/operator.cpp @@ -27,8 +27,11 @@ #include #include +#include "flags/experimental.hpp" #include "memory/query_memory_control.hpp" #include "query/common.hpp" +#include "query/custom_cursors/produce.hpp" +#include "query/custom_cursors/scanall.hpp" #include "query/procedure/module_fwd.hpp" #include "spdlog/spdlog.h" @@ -37,6 +40,7 @@ #include "license/license.hpp" #include "query/auth_checker.hpp" #include "query/context.hpp" +#include "query/custom_cursors/all.hpp" #include "query/db_accessor.hpp" #include "query/exceptions.hpp" #include "query/frontend/ast/ast.hpp" @@ -223,6 +227,10 @@ bool Once::OnceCursor::Pull(Frame &, ExecutionContext &context) { UniqueCursorPtr Once::MakeCursor(utils::MemoryResource *mem) const { memgraph::metrics::IncrementCounter(memgraph::metrics::OnceOperator); + if (memgraph::flags::AreExperimentsEnabled(memgraph::flags::Experiments::ALTERNATIVE_STORAGE)) { + return MakeUniqueCursorPtr(mem); + } + return MakeUniqueCursorPtr(mem); } @@ -295,6 +303,10 @@ ACCEPT_WITH_INPUT(CreateNode) UniqueCursorPtr CreateNode::MakeCursor(utils::MemoryResource *mem) const { memgraph::metrics::IncrementCounter(memgraph::metrics::CreateNodeOperator); + if (memgraph::flags::AreExperimentsEnabled(memgraph::flags::Experiments::ALTERNATIVE_STORAGE)) { + return MakeUniqueCursorPtr(mem, *this, input_->MakeCursor(mem)); + } + return MakeUniqueCursorPtr(mem, *this, mem); } @@ -626,6 +638,11 @@ ACCEPT_WITH_INPUT(ScanAll) UniqueCursorPtr ScanAll::MakeCursor(utils::MemoryResource *mem) const { memgraph::metrics::IncrementCounter(memgraph::metrics::ScanAllOperator); + if (memgraph::flags::AreExperimentsEnabled(memgraph::flags::Experiments::ALTERNATIVE_STORAGE)) { + return MakeUniqueCursorPtr(mem, output_symbol_, + input_->MakeCursor(mem)); + } + auto vertices = [this](Frame &, ExecutionContext &context) { auto *db = context.db_accessor; return std::make_optional(db->Vertices(view_)); @@ -2863,6 +2880,11 @@ ACCEPT_WITH_INPUT(Produce) UniqueCursorPtr Produce::MakeCursor(utils::MemoryResource *mem) const { memgraph::metrics::IncrementCounter(memgraph::metrics::ProduceOperator); + // NOTE(gitbuda): Since Produce is a stateless cursor -> it's possible to reuse it! + // if (memgraph::flags::AreExperimentsEnabled(memgraph::flags::Experiments::ALTERNATIVE_STORAGE)) { + // return MakeUniqueCursorPtr(mem, input_->MakeCursor(mem)); + // } + return MakeUniqueCursorPtr(mem, *this, mem); } @@ -3717,6 +3739,8 @@ class EmptyResultCursor : public Cursor { UniqueCursorPtr EmptyResult::MakeCursor(utils::MemoryResource *mem) const { memgraph::metrics::IncrementCounter(memgraph::metrics::EmptyResultOperator); + // NOTE(gitbuda): Needed in plain CREATE query -> reused. + return MakeUniqueCursorPtr(mem, *this, mem); } @@ -3784,6 +3808,8 @@ class AccumulateCursor : public Cursor { UniqueCursorPtr Accumulate::MakeCursor(utils::MemoryResource *mem) const { memgraph::metrics::IncrementCounter(memgraph::metrics::AccumulateOperator); + // NOTE(gitbuda): Also needed in CREATE RETURN -> reused. + return MakeUniqueCursorPtr(mem, *this, mem); } diff --git a/src/query/plan/operator.hpp b/src/query/plan/operator.hpp index fb731f2cc01..5b3446d005b 100644 --- a/src/query/plan/operator.hpp +++ b/src/query/plan/operator.hpp @@ -22,6 +22,7 @@ #include "query/common.hpp" #include "query/frontend/ast/ast.hpp" #include "query/frontend/semantic/symbol.hpp" +#include "query/plan/cursor.hpp" #include "query/plan/preprocess.hpp" #include "query/typed_value.hpp" #include "storage/v2/id_types.hpp" @@ -34,57 +35,11 @@ namespace memgraph::query { -struct ExecutionContext; class ExpressionEvaluator; -class Frame; class SymbolTable; namespace plan { -/// Base class for iteration cursors of @c LogicalOperator classes. -/// -/// Each @c LogicalOperator must produce a concrete @c Cursor, which provides -/// the iteration mechanism. -class Cursor { - public: - /// Run an iteration of a @c LogicalOperator. - /// - /// Since operators may be chained, the iteration may pull results from - /// multiple operators. - /// - /// @param Frame May be read from or written to while performing the - /// iteration. - /// @param ExecutionContext Used to get the position of symbols in frame and - /// other information. - /// - /// @throws QueryRuntimeException if something went wrong with execution - virtual bool Pull(Frame &, ExecutionContext &) = 0; - - /// Resets the Cursor to its initial state. - virtual void Reset() = 0; - - /// Perform cleanup which may throw an exception - virtual void Shutdown() = 0; - - virtual ~Cursor() = default; -}; - -/// unique_ptr to Cursor managed with a custom deleter. -/// This allows us to use utils::MemoryResource for allocation. -using UniqueCursorPtr = std::unique_ptr>; - -template -std::unique_ptr> MakeUniqueCursorPtr(utils::Allocator allocator, - TArgs &&...args) { - auto *cursor = allocator.template new_object(std::forward(args)...); - auto dtr = [allocator](Cursor *base_ptr) mutable { - auto *p = static_cast(base_ptr); - allocator.delete_object(p); - }; - // TODO: not std::function - return std::unique_ptr>(cursor, std::move(dtr)); -} - class Once; class CreateNode; class CreateExpand; diff --git a/src/storage/CMakeLists.txt b/src/storage/CMakeLists.txt new file mode 100644 index 00000000000..a707bd4f3a7 --- /dev/null +++ b/src/storage/CMakeLists.txt @@ -0,0 +1,2 @@ +add_subdirectory(custom_storage) +add_subdirectory(v2) diff --git a/src/storage/custom_storage/CMakeLists.txt b/src/storage/custom_storage/CMakeLists.txt new file mode 100644 index 00000000000..5666e9f4d54 --- /dev/null +++ b/src/storage/custom_storage/CMakeLists.txt @@ -0,0 +1,5 @@ +set(mg_custom_storage_sources + storage.cpp +) +add_library(mg_custom_storage STATIC ${mg_custom_storage_sources}) +target_link_libraries(mg_custom_storage mg-utils strong_type::strong_type rangev3 graphar) diff --git a/src/storage/custom_storage/gar_database.hpp b/src/storage/custom_storage/gar_database.hpp new file mode 100644 index 00000000000..05ac5db9917 --- /dev/null +++ b/src/storage/custom_storage/gar_database.hpp @@ -0,0 +1,126 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#pragma once + +#include +#include +#include + +#include "graphar/api.h" +#include "graphar/fwd.h" + +#include "utils/logging.hpp" + +namespace memgraph::storage::custom_storage { + +struct GARDatabaseConfig { + struct PerDatabase { + std::filesystem::path root{std::filesystem::temp_directory_path()}; // single database root directory + std::shared_ptr version; + std::string graph_name{"graph"}; + std::string graph_metadata_suffix{".graph.yaml"}; + std::string vertex_metadata_suffix{".vertex.yaml"}; + std::string edge_metadata_suffix{".edge.yaml"}; + std::filesystem::path vertex_folder_prefix{"vertex"}; + std::filesystem::path edge_folder_prefix{"edge"}; + int64_t vertex_chunk_size{1024}; + int64_t edge_chunk_size{1024}; + int64_t edge_src_chunk_size{1024}; + int64_t edge_dst_chunk_size{1024}; + bool is_directed{false}; + graphar::AdjListType ordering; + std::string SavePath() const { return root / std::filesystem::path(graph_name + graph_metadata_suffix); } + }; + std::shared_ptr base{nullptr}; + + struct GARVertexType { + std::shared_ptr base{nullptr}; + void CheckBase() const { MG_ASSERT(base != nullptr); } + std::string label; + graphar::PropertyGroupVector properties; + std::filesystem::path Prefix() const { + CheckBase(); + return base->vertex_folder_prefix / std::filesystem::path(label); + } + std::string SavePath() const { + CheckBase(); + return base->root / std::filesystem::path(label + base->vertex_metadata_suffix); + } + }; + std::vector vertex_types; + + struct GAREdgeType { + std::shared_ptr base{nullptr}; + void CheckBase() const { MG_ASSERT(base != nullptr); } + std::string src_label; + std::string edge_type; + std::string dst_label; + graphar::PropertyGroupVector properties; + std::vector> adjacent_lists; + std::string src_type_dst{src_label + "__" + edge_type + "__" + dst_label}; + std::filesystem::path Prefix() const { + CheckBase(); + return base->edge_folder_prefix / std::filesystem::path(src_type_dst); + } + std::string SavePath() const { + CheckBase(); + return base->root / std::filesystem::path(src_type_dst + base->edge_metadata_suffix); + } + }; + std::vector edge_types; +}; + +inline auto InitVertexType(const GARDatabaseConfig::GARVertexType &vertex_type) { + auto vertex_info = graphar::CreateVertexInfo(vertex_type.label, vertex_type.base->vertex_chunk_size, + vertex_type.properties, vertex_type.Prefix(), vertex_type.base->version); + MG_ASSERT(!vertex_info->Dump().has_error()); + MG_ASSERT(vertex_info->Save(vertex_type.SavePath()).ok()); + return vertex_info; +} + +inline auto InitVertexTypes(const GARDatabaseConfig &config) { + graphar::VertexInfoVector vertex_infos; + for (const auto &vertex_type : config.vertex_types) { + auto vertex_info = InitVertexType(vertex_type); + vertex_infos.push_back(vertex_info); + } + return vertex_infos; +} + +inline auto InitEdgeType(const GARDatabaseConfig::GAREdgeType &edge_type) { + auto edge_info = graphar::CreateEdgeInfo( + edge_type.src_label, edge_type.edge_type, edge_type.dst_label, edge_type.base->edge_chunk_size, + edge_type.base->edge_src_chunk_size, edge_type.base->edge_dst_chunk_size, edge_type.base->is_directed, + edge_type.adjacent_lists, edge_type.properties, edge_type.Prefix(), edge_type.base->version); + MG_ASSERT(!edge_info->Dump().has_error()); + MG_ASSERT(edge_info->Save(edge_type.SavePath()).ok()); + return edge_info; +} + +inline auto InitEdgeTypes(const GARDatabaseConfig &config) { + graphar::EdgeInfoVector edge_infos; + for (const auto &edge_type : config.edge_types) { + auto edge_info = InitEdgeType(edge_type); + edge_infos.push_back(edge_info); + } + return edge_infos; +} + +inline auto InitGraph(const GARDatabaseConfig &config, const graphar::VertexInfoVector &vertex_infos, + const graphar::EdgeInfoVector &edge_infos) { + auto graph_info = graphar::CreateGraphInfo(config.base->graph_name, vertex_infos, edge_infos, config.base->root); + MG_ASSERT(!graph_info->Dump().has_error()); + MG_ASSERT(graph_info->Save(config.base->SavePath()).ok()); + return graph_info; +} + +} // namespace memgraph::storage::custom_storage diff --git a/src/storage/custom_storage/storage.cpp b/src/storage/custom_storage/storage.cpp new file mode 100644 index 00000000000..0e86a341ac1 --- /dev/null +++ b/src/storage/custom_storage/storage.cpp @@ -0,0 +1,25 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#include "storage/custom_storage/storage.hpp" +#include "spdlog/spdlog.h" + +namespace memgraph::storage::custom_storage { + +void Storage::Call() { SPDLOG_WARN("Storage Call"); } + +Vertex *Storage::AddVertex(Vertex &&vertex) { + return vertices_.emplace_back(std::make_unique(std::move(vertex))).get(); +} + +uint64_t Storage::VerticesNo() const { return vertices_.size(); } + +} // namespace memgraph::storage::custom_storage diff --git a/src/storage/custom_storage/storage.hpp b/src/storage/custom_storage/storage.hpp new file mode 100644 index 00000000000..bd3da8a40c2 --- /dev/null +++ b/src/storage/custom_storage/storage.hpp @@ -0,0 +1,61 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#pragma once + +#include + +#include "storage/custom_storage/gar_database.hpp" +#include "storage/custom_storage/types.hpp" + +namespace memgraph::storage::custom_storage { + +// Desing ideas: +// * try to follow existing API design +// * one of the issue is granular and SYNC API design +// * try to reuse exisitng in-memory data structures +// * PropertyStore + all PropertyValues seem very reusable +// * Vertex/Edge seems not very reusable because (delta*, edges*) +// * parallelization +// * ASYNC disk/network access +// * per database, maybe even isolated data cache +// * global cache required for both fast Cypher create_edge + BFS +// * https://www.boost.org/doc/libs/1_85_0/boost/compute/detail/lru_cache.hpp -> is there a concurrent one? +// * https://github.com/facebook/hhvm/blob/master/hphp/util/concurrent-lru-cache.h +// * https://github.com/tstarling/thread-safe-lru... +// * in ON_DISK we have Transaction -> Skiplist +// * take a look under utils/cache.hpp and utils/lru_cache.hpp + +// Target queries: +// CREATE (:Label {props}); // single vertex create +// UNWIND vertices_props AS props CREATE (n:Label) SET n += props; // batch vertex create +// MATCH (n:Label {id:X}) RETURN n; // single vertex lookup +// MATCH (n1:Label {id:X}) MATCH (n2:Label {id:Y}) CREATE (n1)-[r:Type {props}]->(n2); // single edge create +// // batch edge create +// // iterate all verteices with limited memory usage -> "global" graph algos possible +// // iterate all edges with limited memory usage -> "global" graph algos possible +// // get IN/OUT/ALL edges for a given vertex -> "global" graph algos possible +// // BFS with filter lambda + +class Storage { + public: + void Call(); + // TODO(gitbuda): Make AddVertex thread-safe / concurrent. + Vertex *AddVertex(Vertex &&vertex); + uint64_t VerticesNo() const; + + private: + // TODO(gitbuda): vector is a horrible choice here -> on resize -> :boom: -> list is here just TMP + std::list> vertices_; + GARDatabaseConfig config_; +}; + +} // namespace memgraph::storage::custom_storage diff --git a/src/storage/custom_storage/types.hpp b/src/storage/custom_storage/types.hpp new file mode 100644 index 00000000000..02e24a3a3f7 --- /dev/null +++ b/src/storage/custom_storage/types.hpp @@ -0,0 +1,50 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#pragma once + +#include +#include +#include +#include + +#include "storage/v2/id_types.hpp" +#include "storage/v2/property_value.hpp" + +// TODO(gitbuda): To create edge, GAR internal vertex id for a given label is required -> calculate of propagate. +// TODO(gitbuda): What is the right type for IDs? +// TODO(gitbuda): How to safely create all PMR values with minimal code and maximal flexibility? -> PMR has overhead. +// NOTE: PMR reasoning -> this should be allocator aware because after import, all that could be deleted. +// * C++Weekly#235 -> https://www.youtube.com/watch?v=vXJ1dwJ9QkI +// * C++Weekly#236 -> https://www.youtube.com/watch?v=2LAsqp7UrNs +// * --> take a look at tests/manual/pmr.cpp how to make an allocator aware type. + +namespace memgraph::storage::custom_storage { + +struct Vertex { + // This is here because of the hybrid-schema option (having different type of IDs) + PropertyValue id; + std::vector labels; // NOTE: GAR only supports one label per vertex! + // Consider replacing map with PropertyStore because it's more efficient. + // NOTE: map is below just because that's comes from the query engine (example purposes). + std::map properties; +}; + +struct Edge { + PropertyValue id; + PropertyValue src_id; + PropertyValue dst_id; + EdgeTypeId edge_type; + // Consider replacing map with PropertyStore because it's more efficient. + std::unordered_map properties; +}; + +} // namespace memgraph::storage::custom_storage diff --git a/src/storage/v2/constraints/constraints.cpp b/src/storage/v2/constraints/constraints.cpp index 9e1d9ee902f..624862e19b7 100644 --- a/src/storage/v2/constraints/constraints.cpp +++ b/src/storage/v2/constraints/constraints.cpp @@ -26,6 +26,9 @@ Constraints::Constraints(const Config &config, StorageMode storage_mode) { case StorageMode::ON_DISK_TRANSACTIONAL: unique_constraints_ = std::make_unique(config); break; + case StorageMode::ALTERNATIVE_STORAGE: + // TODO(gitbuda): Don't pass + break; }; }); } diff --git a/src/storage/v2/storage_mode.cpp b/src/storage/v2/storage_mode.cpp index 06764685489..0b39a5fbc80 100644 --- a/src/storage/v2/storage_mode.cpp +++ b/src/storage/v2/storage_mode.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -25,6 +25,8 @@ std::string_view StorageModeToString(memgraph::storage::StorageMode storage_mode return "IN_MEMORY_TRANSACTIONAL"; case memgraph::storage::StorageMode::ON_DISK_TRANSACTIONAL: return "ON_DISK_TRANSACTIONAL"; + case memgraph::storage::StorageMode::ALTERNATIVE_STORAGE: + return "ALTERNATIVE_STORAGE"; } } diff --git a/src/storage/v2/storage_mode.hpp b/src/storage/v2/storage_mode.hpp index f4a133f386a..98f209fa376 100644 --- a/src/storage/v2/storage_mode.hpp +++ b/src/storage/v2/storage_mode.hpp @@ -16,9 +16,15 @@ #include namespace memgraph::storage { -enum class StorageMode : std::uint8_t { IN_MEMORY_ANALYTICAL, IN_MEMORY_TRANSACTIONAL, ON_DISK_TRANSACTIONAL }; +enum class StorageMode : std::uint8_t { + IN_MEMORY_ANALYTICAL, + IN_MEMORY_TRANSACTIONAL, + ON_DISK_TRANSACTIONAL, + ALTERNATIVE_STORAGE +}; inline constexpr std::array storage_mode_mappings{ + std::pair{std::string_view{"ALTERNATIVE_STORAGE"}, memgraph::storage::StorageMode::ALTERNATIVE_STORAGE}, std::pair{std::string_view{"IN_MEMORY_TRANSACTIONAL"}, memgraph::storage::StorageMode::IN_MEMORY_TRANSACTIONAL}, std::pair{std::string_view{"IN_MEMORY_ANALYTICAL"}, memgraph::storage::StorageMode::IN_MEMORY_ANALYTICAL}, std::pair{std::string_view{"ON_DISK_TRANSACTIONAL"}, memgraph::storage::StorageMode::ON_DISK_TRANSACTIONAL}}; diff --git a/tests/manual/CMakeLists.txt b/tests/manual/CMakeLists.txt index 0a46b8e60db..bc96ce03e3d 100644 --- a/tests/manual/CMakeLists.txt +++ b/tests/manual/CMakeLists.txt @@ -55,3 +55,9 @@ target_link_libraries(${test_prefix}ssl_client mg-communication) add_manual_test(ssl_server.cpp) target_link_libraries(${test_prefix}ssl_server mg-communication) + +add_manual_test(graphar.cpp) +target_link_libraries(${test_prefix}graphar graphar mg-utils) + +add_manual_test(pmr.cpp) +target_link_libraries(${test_prefix}pmr mg-utils) diff --git a/tests/manual/graphar.cpp b/tests/manual/graphar.cpp new file mode 100644 index 00000000000..cb5f2b32638 --- /dev/null +++ b/tests/manual/graphar.cpp @@ -0,0 +1,168 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#include +#include + +#include "gflags/gflags.h" +#include "graphar/graph.h" +#include "graphar/writer/edges_builder.h" +#include "graphar/writer/vertices_builder.h" + +#include "storage/custom_storage/gar_database.hpp" +#include "utils/logging.hpp" + +// https://github.com/apache/incubator-graphar +// https://graphar.apache.org/docs/specification/implementation-status + +auto GraphSchema() { + // node metadata + auto property_vector_1 = {graphar::Property("id", graphar::int64(), true)}; + auto property_vector_2 = {graphar::Property("domain", graphar::string(), false), + graphar::Property("extra", graphar::string(), false)}; + auto group1 = graphar::CreatePropertyGroup(property_vector_1, graphar::FileType::CSV); + auto group2 = graphar::CreatePropertyGroup(property_vector_2, graphar::FileType::CSV); + // edge metadata + auto adjacent_lists = {graphar::CreateAdjacentList(graphar::AdjListType::ordered_by_source, graphar::FileType::CSV), + graphar::CreateAdjacentList(graphar::AdjListType::ordered_by_dest, graphar::FileType::CSV)}; + auto property_vector_3 = {graphar::Property("created", graphar::string(), false)}; + auto group3 = graphar::CreatePropertyGroup(property_vector_3, graphar::FileType::CSV); + + using PerDatabase = memgraph::storage::custom_storage::GARDatabaseConfig::PerDatabase; + auto per_database = std::make_shared(PerDatabase{ + .root = "/tmp/gar/", + .version = std::make_shared(1), + .graph_name = "test", + .vertex_chunk_size = 4, + }); + return memgraph::storage::custom_storage::GARDatabaseConfig{ + .base = per_database, + .vertex_types = {{ + .base = per_database, + .label = "node", + .properties = {group1, group2}, + }}, + .edge_types = {{ + .base = per_database, + .src_label = "node", + .edge_type = "LINK", + .dst_label = "node", + .properties = {group3}, + .adjacent_lists = adjacent_lists, + }}, + }; +} + +int main(int argc, char **argv) { + gflags::ParseCommandLineFlags(&argc, &argv, true); + spdlog::set_level(spdlog::level::trace); + + auto db_config = GraphSchema(); + auto &vertex_type = db_config.vertex_types[0]; + auto vertex_infos = memgraph::storage::custom_storage::InitVertexTypes(db_config); + auto vertex_info = vertex_infos[0]; + auto &edge_type = db_config.edge_types[0]; + auto edge_infos = memgraph::storage::custom_storage::InitEdgeTypes(db_config); + auto edge_info = edge_infos[0]; + auto graph_info = memgraph::storage::custom_storage::InitGraph(db_config, vertex_infos, edge_infos); + spdlog::info("GAR initialized"); + + spdlog::info("== CREATE =="); + // CREATE vertex data partition 1 + graphar::builder::VerticesBuilder builder(vertex_info, vertex_type.base->root, 0); + builder.SetValidateLevel(graphar::ValidateLevel::strong_validate); + int vertex_count = 4; + std::vector property_names = {"id", "domain", "extra"}; + std::vector id = {0, 1, 2, 3}; + std::vector domain = {"google.com", "memgraph.com", "nvidia.com", "facebook.com"}; + std::vector extra = {"{key:value}", "{}", "", ""}; + for (int i = 0; i < vertex_count; i++) { + graphar::builder::Vertex v; + v.AddProperty(property_names[0], id[i % 4]); + v.AddProperty(property_names[1], domain[i % 4]); + v.AddProperty(property_names[2], extra[i % 4]); + MG_ASSERT(builder.AddVertex(v).ok()); + } + MG_ASSERT(builder.GetNum() == vertex_count); + spdlog::info("vertex_count={}", builder.GetNum()); + MG_ASSERT(builder.Dump().ok()); + spdlog::info("dump vertices collection successfully!"); + builder.Clear(); + MG_ASSERT(builder.GetNum() == 0); + + // TODO: CREATE vertex data partition 2 + // IMPORTANT: controlling start_vertex_index means partitioning & parallelization, + // BUT it only works if all chunks are monotonically populated + // (from 0 to total vertex_count, no missing vertices). + // The way how how VerticesBuilder is storing size is limiting + // because it just overrides the number of nodes during the Dump call. + // NOTE: It's possible to use low-level primitives to achieve parallelization. + graphar::builder::VerticesBuilder builder2(vertex_info, vertex_type.base->root, + vertex_type.base->vertex_chunk_size * 1); + builder.SetValidateLevel(graphar::ValidateLevel::strong_validate); + vertex_count = 2; + property_names = {"id", "domain", "extra"}; + id = {2, 3}; + domain = {"nvidia.com", "facebook.com"}; + std::vector extra2 = {"{key:value}", "{}"}; + for (int i = 0; i < vertex_count; i++) { + graphar::builder::Vertex v; + v.AddProperty(property_names[0], id[i]); + v.AddProperty(property_names[1], domain[i]); + if (i == 0) { + v.AddProperty(property_names[2], extra2[0]); + } else { + v.AddProperty(property_names[2], extra2[1]); + } + MG_ASSERT(builder2.AddVertex(v).ok()); + } + MG_ASSERT(builder2.GetNum() == vertex_count); + spdlog::info("vertex_count={}", builder2.GetNum()); + MG_ASSERT(builder2.Dump().ok()); + spdlog::info("dump vertices collection successfully!"); + builder2.Clear(); + MG_ASSERT(builder2.GetNum() == 0); + + // CREATE edge data + graphar::builder::EdgesBuilder builder3(edge_info, edge_type.base->root, graphar::AdjListType::ordered_by_dest, 1025); + builder.SetValidateLevel(graphar::ValidateLevel::strong_validate); + int edge_count = 4; + property_names = {"created"}; + std::vector src = {1, 0, 0, 2}; + std::vector dst = {0, 1, 2, 1}; + std::vector creationDate = {"2010-01-01", "2011-01-01", "2012-01-01", "2013-01-01"}; + for (int i = 0; i < edge_count; i++) { + graphar::builder::Edge e(src[i], dst[i]); + e.AddProperty("created", creationDate[i]); + MG_ASSERT(builder3.AddEdge(e).ok()); + } + MG_ASSERT(builder3.GetNum() == edge_count); + spdlog::info("edge_count={}", builder3.GetNum()); + MG_ASSERT(builder3.Dump().ok()); + spdlog::info("dump edges collection successfully!"); + builder3.Clear(); + MG_ASSERT(builder3.GetNum() == 0); + + spdlog::info("== READ =="); + // MATCH vertex data + std::string graph_metadata_path = vertex_type.SavePath(); + auto maybe_vertices = graphar::VerticesCollection::Make(graph_info, vertex_type.label); + auto vertices = maybe_vertices.value(); + spdlog::info(vertices->size()); + auto v_it_begin = vertices->begin(), v_it_end = vertices->end(); + for (auto it = v_it_begin; it != v_it_end; ++it) { + auto vertex = *it; + spdlog::info(std::to_string(vertex.property("id").value()) + " " + + vertex.property("domain").value() + " " + vertex.property("extra").value()); + } + + return 0; +} diff --git a/tests/manual/pmr.cpp b/tests/manual/pmr.cpp new file mode 100644 index 00000000000..23cefe00850 --- /dev/null +++ b/tests/manual/pmr.cpp @@ -0,0 +1,129 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#include +#include +#include +#include +#include + +#include + +// Credits to https://github.com/lefticus/cpp_weekly/blob/master/PMR/1_experiments.cpp +template +void print_line(int offset, ItrBegin begin, const ItrEnd end) { + fmt::print("(dec) {:02x}: {:3}\n", offset, fmt::join(begin, end, " ")); + fmt::print("(hex) {:02x}: {:02x}\n", offset, fmt::join(begin, end, " ")); + fmt::print("(asc) {:02x}:", offset); + std::for_each(begin, end, [](const auto c) { + if (std::isgraph(c)) { + fmt::print(" {} ", static_cast(c)); + } else { + fmt::print(" \\{:03o}", c); + } + }); + fmt::print("\n"); +} +template +void print_buffer(const std::string_view title, const Buffer &buffer, const Container &container) { + fmt::print("==============={:^10}==============\n", title); + auto begin = buffer.begin(); + fmt::print("Buffer Address Start: {}\n", static_cast(buffer.data())); + fmt::print("Buffer Address End: {}\n", static_cast(buffer.data() + buffer.size())); + for (const auto &elem : container) { + fmt::print(" Item Address: {}\n", static_cast(&elem)); + } + for (std::size_t offset = 0; offset < buffer.size(); offset += 16) { + print_line(offset, std::next(begin, offset), std::next(begin, offset + 16)); + } + fmt::print("\n"); +} + +// https://github.com/lefticus/cpp_weekly/blob/master/PMR/2_aa_type.cpp +// NOTE: For primitive fields allocator doesn't matter. +struct Node { + int64_t id; + std::pmr::string label; + using allocator_type = std::pmr::polymorphic_allocator<>; + + explicit Node(const int64_t id, const std::string_view label, allocator_type alloc = {}) + : id(id), label(label, alloc) {} + Node(const Node &other, allocator_type alloc = {}) : id(other.id), label(other.label, alloc) {} + Node(Node &&) = default; + Node(Node &&other, allocator_type alloc) : id(other.id), label(std::move(other.label), alloc) {} + Node &operator=(const Node &rhs) = default; + Node &operator=(Node &&rhs) = default; + ~Node() = default; + + allocator_type get_allocator() const { return label.get_allocator(); } +}; + +// thanks to Rahil Baber +// Prints if new/delete gets used. +class print_alloc : public std::pmr::memory_resource { + private: + void *do_allocate(std::size_t bytes, std::size_t alignment) override { + std::cout << "Allocating " << bytes << '\n'; + return std::pmr::new_delete_resource()->allocate(bytes, alignment); + } + void do_deallocate(void *p, std::size_t bytes, std::size_t alignment) override { + std::cout << "Deallocating " << bytes << ": '"; + for (std::size_t i = 0; i < bytes; ++i) { + std::cout << *(static_cast(p) + i); + } + std::cout << "'\n"; + return std::pmr::new_delete_resource()->deallocate(p, bytes, alignment); + } + bool do_is_equal(const std::pmr::memory_resource &other) const noexcept override { + return std::pmr::new_delete_resource()->is_equal(other); + } +}; + +// This is useful because of the problem with initializer lists + alloc + reserve (remember initializer lists are +// broken). +template +auto create_container(auto *resource, Values &&...values) { + Container result{resource}; + result.reserve(sizeof...(values)); + (result.emplace_back(std::forward(values)), ...); + return result; +}; + +int main() { + // IMPORTANT: This is a super nice debugging technique -> if PMR is not set, the default resource will tell us. + print_alloc mem; + std::pmr::set_default_resource(&mem); + + std::array buffer1{}; + std::pmr::monotonic_buffer_resource pool1(buffer1.data(), buffer1.size()); + // NOTE: vector doesn't live in the buffer, only the data itself is inside the buffer. + // NOTE: pmr objects are longer (std::string 32B, std::pmr::string 40B) + // NOTE: + // * std::string => ptr_data + size + data + null + // * pmr::string => ptr_alloc + ptr_data + size + data + null + std::pmr::vector data1{&pool1}; + data1.reserve(2); + print_buffer("initial", buffer1, ""); + data1.emplace_back("foo"); + print_buffer("data - foo", buffer1, data1); + data1.emplace_back("a very long long bar string"); + print_buffer("data - foo & bar", buffer1, data1); + + std::array buffer2{}; + std::pmr::monotonic_buffer_resource pool2(buffer2.data(), buffer2.size()); + std::pmr::vector data2{&pool2}; + data2.reserve(2); + print_buffer("initial", buffer2, ""); + data2.emplace_back(Node(77, "bla")); + print_buffer("data", buffer2, data2); + + return 0; +} diff --git a/tests/unit/storage_test_utils.cpp b/tests/unit/storage_test_utils.cpp index 2620666d0ae..5976a5a020e 100644 --- a/tests/unit/storage_test_utils.cpp +++ b/tests/unit/storage_test_utils.cpp @@ -10,6 +10,7 @@ // licenses/APL.txt. #include "storage_test_utils.hpp" +#include "storage/v2/storage_mode.hpp" size_t CountVertices(memgraph::storage::Storage::Accessor &storage_accessor, memgraph::storage::View view) { auto vertices = storage_accessor.Vertices(view); @@ -27,5 +28,7 @@ std::string_view StorageModeToString(memgraph::storage::StorageMode storage_mode return "IN_MEMORY_TRANSACTIONAL"; case memgraph::storage::StorageMode::ON_DISK_TRANSACTIONAL: return "ON_DISK_TRANSACTIONAL"; + case memgraph::storage::StorageMode::ALTERNATIVE_STORAGE: + return "ALTERNATIVE_STORAGE"; } }