diff --git a/src/BuildCypherLib.cmake b/src/BuildCypherLib.cmake index d84ed742..b32dcca9 100644 --- a/src/BuildCypherLib.cmake +++ b/src/BuildCypherLib.cmake @@ -61,6 +61,8 @@ set(LGRAPH_CYPHER_SRC # find cypher/ -name "*.cpp" | sort cypher/procedure/procedure.cpp cypher/resultset/record.cpp cypher/monitor/monitor_manager.cpp + cypher/execution_plan/optimization/rewrite/schema_rewrite.cpp + cypher/execution_plan/optimization/rewrite/graph.cpp ) add_library(${TARGET_LGRAPH_CYPHER_LIB} STATIC diff --git a/src/cypher/execution_plan/execution_plan.cpp b/src/cypher/execution_plan/execution_plan.cpp index 09ec9107..f635c125 100644 --- a/src/cypher/execution_plan/execution_plan.cpp +++ b/src/cypher/execution_plan/execution_plan.cpp @@ -1247,7 +1247,8 @@ static bool CheckReturnElements(const std::vector &stmt) { return true; } -void ExecutionPlan::Build(const std::vector &stmt, parser::CmdType cmd) { +void ExecutionPlan::Build(const std::vector &stmt, parser::CmdType cmd, + cypher::RTContext *ctx) { // check return elements first if (!CheckReturnElements(stmt)) { throw lgraph::CypherException( @@ -1276,7 +1277,8 @@ void ExecutionPlan::Build(const std::vector &stmt, parser::Cmd // NOTE: handle plan's destructor with care! } // Optimize the operations in the ExecutionPlan. - PassManager pass_manager(this); + // TODO(seijiang): split context-free optimizations & context-dependent ones + PassManager pass_manager(this, ctx); pass_manager.ExecutePasses(); } @@ -1323,8 +1325,11 @@ int ExecutionPlan::Execute(RTContext *ctx) { if (ctx->graph_.empty()) { ctx->ac_db_.reset(nullptr); } else { - ctx->ac_db_ = std::make_unique( - ctx->galaxy_->OpenGraph(ctx->user_, ctx->graph_)); + // We have already created ctx->ac_db_ in opt_rewrite_with_schema_inference.h + if (!ctx->ac_db_) { + ctx->ac_db_ = std::make_unique( + ctx->galaxy_->OpenGraph(ctx->user_, ctx->graph_)); + } lgraph_api::GraphDB db(ctx->ac_db_.get(), ReadOnly()); if (ReadOnly()) { ctx->txn_ = std::make_unique(db.CreateReadTxn()); diff --git a/src/cypher/execution_plan/execution_plan.h b/src/cypher/execution_plan/execution_plan.h index acf5666e..9dc54a48 100644 --- a/src/cypher/execution_plan/execution_plan.h +++ b/src/cypher/execution_plan/execution_plan.h @@ -100,7 +100,8 @@ class ExecutionPlan { OpBase *BuildSgl(const parser::SglQuery &stmt, size_t parts_offset); - void Build(const std::vector &stmt, parser::CmdType cmd); + void Build(const std::vector &stmt, parser::CmdType cmd, + cypher::RTContext *ctx); void Validate(cypher::RTContext *ctx); diff --git a/src/cypher/execution_plan/ops/op_all_node_scan.h b/src/cypher/execution_plan/ops/op_all_node_scan.h index 5a22e721..58847f13 100644 --- a/src/cypher/execution_plan/ops/op_all_node_scan.h +++ b/src/cypher/execution_plan/ops/op_all_node_scan.h @@ -102,6 +102,8 @@ class AllNodeScan : public OpBase { Node *GetNode() const { return node_; } + const SymbolTable *SymTab() const { return sym_tab_; } + CYPHER_DEFINE_VISITABLE() CYPHER_DEFINE_CONST_VISITABLE() diff --git a/src/cypher/execution_plan/ops/op_all_node_scan_dynamic.h b/src/cypher/execution_plan/ops/op_all_node_scan_dynamic.h index 255c6fd9..6f5653bc 100644 --- a/src/cypher/execution_plan/ops/op_all_node_scan_dynamic.h +++ b/src/cypher/execution_plan/ops/op_all_node_scan_dynamic.h @@ -110,6 +110,8 @@ class AllNodeScanDynamic : public OpBase { Node *GetNode() const { return node_; } + const SymbolTable *SymTab() const { return sym_tab_; } + CYPHER_DEFINE_VISITABLE() CYPHER_DEFINE_CONST_VISITABLE() diff --git a/src/cypher/execution_plan/optimization/opt_rewrite_with_schema_inference.h b/src/cypher/execution_plan/optimization/opt_rewrite_with_schema_inference.h new file mode 100644 index 00000000..28e1d155 --- /dev/null +++ b/src/cypher/execution_plan/optimization/opt_rewrite_with_schema_inference.h @@ -0,0 +1,251 @@ +/** + * Copyright 2022 AntGroup CO., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +// +// Created by seijiang on 23-07-19. +// +#pragma once + +#include "cypher/execution_plan/optimization/opt_pass.h" +#include "cypher/execution_plan/optimization/rewrite/schema_rewrite.h" + +namespace cypher { + +/* Opt Rewrite With Schema Inference: + * Graph : MovieDemo + * example Cypher: + * match p=(n0)-[e0]->(n1)-[e1]->(n2)-[e2]->(m:keyword) return COUNT(p); + * is equivalent to : + * match p=(n0:user)-[e0:is_friend]->(n1:user)-[e1:rate]->(n2:movie)-[e2:has_keyword]->(m:keyword) + *return COUNT(p); + * + * Plan before optimization: + * Produce Results + * Aggregate [COUNT(p)] + * Expand(All) [n2 --> m ] + * Expand(All) [n1 --> n2 ] + * Expand(All) [n0 --> n1 ] + * All Node Scan [n0] + * + * Plan after optimization: + * Produce Results + * Aggregate [COUNT(p)] + * Expand(All) [n2 --> m ] + * Expand(All) [n1 --> n2 ] + * Expand(All) [n0 --> n1 ] + * Node By Label Scan [n0:user] + **/ + +class OptRewriteWithSchemaInference : public OptPass { + bool check_v_label_valid(const lgraph::SchemaInfo *schema_info, const std::string label) { + auto vertex_labels = schema_info->v_schema_manager.GetAllLabels(); + if (!label.empty() && + std::find(vertex_labels.begin(), vertex_labels.end(), label) == vertex_labels.end()) { + return false; + } + return true; + } + + bool check_e_labels_valid(const lgraph::SchemaInfo *schema_info, + const std::set labels) { + auto edge_labels = schema_info->e_schema_manager.GetAllLabels(); + for (auto label : labels) { + if (std::find(edge_labels.begin(), edge_labels.end(), label) == edge_labels.end()) { + return false; + } + } + return true; + } + + // match子句中的模式图可以分为多个极大连通子图,该函数提取每个极大连通子图的点和边,经过分析后加上标签信息 + void _ExtractStreamAndAddLabels(OpBase *root, const lgraph::SchemaInfo *schema_info) { + CYPHER_THROW_ASSERT(root->type == OpType::EXPAND_ALL); + SchemaNodeMap schema_node_map; + SchemaRelpMap schema_relp_map; + auto op = root; + while (true) { + if (auto expand_all = dynamic_cast(op)) { + auto start = expand_all->GetStartNode(); + auto relp = expand_all->GetRelationship(); + auto neighbor = expand_all->GetNeighborNode(); + if (!check_v_label_valid(schema_info, start->Label())) { + return; + } + if (!check_v_label_valid(schema_info, neighbor->Label())) { + return; + } + if (!check_e_labels_valid(schema_info, relp->Types())) { + return; + } + + schema_node_map[start->ID()] = start->Label(); + schema_node_map[neighbor->ID()] = neighbor->Label(); + std::tuple, parser::LinkDirection> + relp_map_value(start->ID(), neighbor->ID(), relp->Types(), relp->direction_); + schema_relp_map[relp->ID()] = relp_map_value; + } else if (op->type == OpType::VAR_LEN_EXPAND) { + // 含有可变长算子的情况暂不处理 + return; + } else if ((op->IsScan() || op->IsDynamicScan()) && op->type != OpType::ARGUMENT) { + NodeID id; + std::string label; + if (auto all_node_scan = dynamic_cast(op)) { + id = all_node_scan->GetNode()->ID(); + label = all_node_scan->GetNode()->Label(); + } else if (auto all_node_scan_dy = dynamic_cast(op)) { + id = all_node_scan_dy->GetNode()->ID(); + label = all_node_scan_dy->GetNode()->Label(); + } else if (auto node_by_label_scan = dynamic_cast(op)) { + id = node_by_label_scan->GetNode()->ID(); + label = node_by_label_scan->GetNode()->Label(); + } else if (auto node_by_label_scan_dy = + dynamic_cast(op)) { + id = node_by_label_scan_dy->GetNode()->ID(); + label = node_by_label_scan_dy->GetNode()->Label(); + } else if (auto node_index_seek = dynamic_cast(op)) { + id = node_index_seek->GetNode()->ID(); + label = node_index_seek->GetNode()->Label(); + } else if (auto node_index_seek_dy = dynamic_cast(op)) { + id = node_index_seek_dy->GetNode()->ID(); + label = node_index_seek_dy->GetNode()->Label(); + } + if (!check_v_label_valid(schema_info, label)) { + return; + } + schema_node_map[id] = label; + } + + if (op->children.empty()) { + break; + } + CYPHER_THROW_ASSERT(op->children.size() == 1); + op = op->children[0]; + } + // 调用schema函数 + rewrite::SchemaRewrite schema_rewrite; + std::vector schema_graph_maps; + schema_graph_maps = + schema_rewrite.GetEffectivePath(*schema_info, &schema_node_map, &schema_relp_map); + // 目前只对一条可行路径的情况进行重写 + if (schema_graph_maps.size() != 1) { + return; + } + schema_node_map = schema_graph_maps[0].first; + schema_relp_map = schema_graph_maps[0].second; + op = root; + while (true) { + if (auto expand_all = dynamic_cast(op)) { + auto start = expand_all->GetStartNode(); + auto relp = expand_all->GetRelationship(); + auto neighbor = expand_all->GetNeighborNode(); + if (schema_node_map.find(start->ID()) != schema_node_map.end()) { + start->SetLabel(schema_node_map.find(start->ID())->second); + } + if (schema_node_map.find(neighbor->ID()) != schema_node_map.end()) { + neighbor->SetLabel(schema_node_map.find(neighbor->ID())->second); + } + if (schema_relp_map.find(relp->ID()) != schema_relp_map.end()) { + relp->SetTypes(std::get<2>(schema_relp_map.find(relp->ID())->second)); + } + } else if (auto all_node_scan = dynamic_cast(op)) { + auto node = all_node_scan->GetNode(); + if (schema_node_map.find(node->ID()) != schema_node_map.end()) { + node->SetLabel(schema_node_map.find(node->ID())->second); + } + auto op_label_scan = new NodeByLabelScan(node, all_node_scan->SymTab()); + auto parent = all_node_scan->parent; + for (auto child : all_node_scan->children) { + op_label_scan->AddChild(child); + } + parent->RemoveChild(all_node_scan); + parent->AddChild(op_label_scan); + } else if (auto all_node_scan_dy = dynamic_cast(op)) { + auto node = all_node_scan_dy->GetNode(); + if (schema_node_map.find(node->ID()) != schema_node_map.end()) { + node->SetLabel(schema_node_map.find(node->ID())->second); + } + auto op_label_scan = + new NodeByLabelScanDynamic(node, all_node_scan_dy->SymTab()); + auto parent = all_node_scan_dy->parent; + for (auto child : all_node_scan_dy->children) { + op_label_scan->AddChild(child); + } + parent->RemoveChild(all_node_scan_dy); + parent->AddChild(op_label_scan); + } else if (auto node_index_seek = dynamic_cast(op)) { + auto node = node_index_seek->GetNode(); + if (schema_node_map.find(node->ID()) != schema_node_map.end()) { + node->SetLabel(schema_node_map.find(node->ID())->second); + } + } else if (auto node_index_seek_dy = dynamic_cast(op)) { + auto node = node_index_seek_dy->GetNode(); + if (schema_node_map.find(node->ID()) != schema_node_map.end()) { + node->SetLabel(schema_node_map.find(node->ID())->second); + } + } + if (op->children.empty()) { + if (op->type == OpType::ALL_NODE_SCAN || + op->type == OpType::ALL_NODE_SCAN_DYNAMIC) { + delete op; + } + break; + } + CYPHER_THROW_ASSERT(op->children.size() == 1); + auto child = op->children[0]; + if (op->type == OpType::ALL_NODE_SCAN || op->type == OpType::ALL_NODE_SCAN_DYNAMIC) { + delete op; + } + op = child; + } + } + + void _RewriteWithSchemaInference(OpBase *root, const lgraph::SchemaInfo *schema_info) { + // 对单独的点和可变长不予优化 + if (root->type == OpType::EXPAND_ALL) { + _ExtractStreamAndAddLabels(root, schema_info); + } else { + for (auto child : root->children) { + _RewriteWithSchemaInference(child, schema_info); + } + } + } + + public: + cypher::RTContext *_ctx; + + explicit OptRewriteWithSchemaInference(cypher::RTContext *ctx) + : OptPass(typeid(OptRewriteWithSchemaInference).name()), _ctx(ctx) {} + + bool Gate() override { return true; } + + int Execute(ExecutionPlan *plan) override { + const lgraph::SchemaInfo *schema_info; + if (_ctx->graph_.empty()) { + _ctx->ac_db_.reset(nullptr); + schema_info = nullptr; + } else { + _ctx->ac_db_ = std::make_unique( + _ctx->galaxy_->OpenGraph(_ctx->user_, _ctx->graph_)); + lgraph_api::GraphDB db(_ctx->ac_db_.get(), true); + _ctx->txn_ = std::make_unique(db.CreateReadTxn()); + schema_info = &_ctx->txn_->GetTxn()->GetSchemaInfo(); + } + _ctx->txn_.reset(nullptr); + // _ctx->ac_db_.reset(nullptr); + _RewriteWithSchemaInference(plan->Root(), schema_info); + return 0; + } +}; + +} // namespace cypher diff --git a/src/cypher/execution_plan/optimization/pass_manager.h b/src/cypher/execution_plan/optimization/pass_manager.h index 5a83316f..933a7a6d 100644 --- a/src/cypher/execution_plan/optimization/pass_manager.h +++ b/src/cypher/execution_plan/optimization/pass_manager.h @@ -24,6 +24,7 @@ #include "cypher/execution_plan/optimization/locate_node_by_vid.h" #include "cypher/execution_plan/optimization/locate_node_by_indexed_prop.h" #include "cypher/execution_plan/optimization/parallel_traversal.h" +#include "cypher/execution_plan/optimization/opt_rewrite_with_schema_inference.h" namespace cypher { @@ -32,7 +33,8 @@ class PassManager { std::vector all_passes_; public: - explicit PassManager(ExecutionPlan *plan) : plan_(plan) { + explicit PassManager(ExecutionPlan *plan, cypher::RTContext *ctx) : plan_(plan) { + all_passes_.emplace_back(new OptRewriteWithSchemaInference(ctx)); all_passes_.emplace_back(new PassReduceCount()); all_passes_.emplace_back(new EdgeFilterPushdownExpand()); all_passes_.emplace_back(new LazyProjectTopN()); diff --git a/src/cypher/execution_plan/optimization/rewrite/edge.h b/src/cypher/execution_plan/optimization/rewrite/edge.h new file mode 100644 index 00000000..2b760398 --- /dev/null +++ b/src/cypher/execution_plan/optimization/rewrite/edge.h @@ -0,0 +1,46 @@ +/** + * Copyright 2022 AntGroup CO., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +// +// Created by liyunlong2000 on 23-07-19. +// +#pragma once +#include +#include "cypher/execution_plan/optimization/rewrite/node.h" +#include "parser/data_typedef.h" + +namespace cypher::rewrite { +class Node; + +class Edge { + public: + size_t m_id; + size_t m_source_id; + size_t m_target_id; + std::set m_labels; + parser::LinkDirection m_direction; + Edge() {} + Edge(size_t id, size_t source_id, size_t target_id, std::set labels, + parser::LinkDirection direction) { + m_source_id = source_id; + m_target_id = target_id; + m_labels = labels; + m_id = id; + m_direction = direction; + } + + ~Edge() {} +}; + +}; // namespace cypher::rewrite diff --git a/src/cypher/execution_plan/optimization/rewrite/graph.cpp b/src/cypher/execution_plan/optimization/rewrite/graph.cpp new file mode 100644 index 00000000..6e16d531 --- /dev/null +++ b/src/cypher/execution_plan/optimization/rewrite/graph.cpp @@ -0,0 +1,67 @@ +/** + * Copyright 2022 AntGroup CO., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +// +// Created by liyunlong2000 on 23-07-19. +// +#include "cypher/execution_plan/optimization/rewrite/graph.h" + +namespace cypher::rewrite { +void Graph::AddNode(size_t id, int label_num) { m_nodes.push_back(rewrite::Node(id, label_num)); } + +void Graph::AddEdge(size_t id, size_t source_id, size_t target_id, std::set labels, + parser::LinkDirection direction) { + // Edge edge(id,source_id,target_id,label_num); + if (direction == parser::LinkDirection::LEFT_TO_RIGHT) { + m_edges.push_back(Edge(id, source_id, target_id, labels, direction)); + m_nodes[source_id].m_outedges.push_back(id); + m_nodes[target_id].m_inedges.push_back(id); + } else if (direction == parser::LinkDirection::RIGHT_TO_LEFT) { + m_edges.push_back( + Edge(id, target_id, source_id, labels, parser::LinkDirection::RIGHT_TO_LEFT)); + m_nodes[source_id].m_inedges.push_back(id); + m_nodes[target_id].m_outedges.push_back(id); + } else { + m_edges.push_back(Edge(id, source_id, target_id, labels, direction)); + m_nodes[source_id].m_undirectededges.push_back(id); + m_nodes[target_id].m_undirectededges.push_back(id); + } +} + +void Graph::PrintGraph() { + for (Node node : m_nodes) { + FMA_LOG() << "Node id:" << node.m_id; + FMA_LOG() << "In edges:"; + for (size_t eid : node.m_inedges) { + Edge& edge = m_edges[eid]; + FMA_LOG() << edge.m_id << "-" << edge.m_source_id << "."; + } + FMA_LOG(); + + FMA_LOG() << "Out edges:"; + for (size_t eid : node.m_outedges) { + Edge& edge = m_edges[eid]; + FMA_LOG() << edge.m_id << "-" << edge.m_target_id << "."; + } + FMA_LOG(); + + FMA_LOG() << "Undirection edges:"; + for (size_t eid : node.m_undirectededges) { + Edge& edge = m_edges[eid]; + FMA_LOG() << edge.m_id << "-" << edge.m_direction << "."; + } + FMA_LOG(); + } +} +}; // namespace cypher::rewrite diff --git a/src/cypher/execution_plan/optimization/rewrite/graph.h b/src/cypher/execution_plan/optimization/rewrite/graph.h new file mode 100644 index 00000000..5e0d7ae8 --- /dev/null +++ b/src/cypher/execution_plan/optimization/rewrite/graph.h @@ -0,0 +1,43 @@ +/** + * Copyright 2022 AntGroup CO., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +// +// Created by liyunlong2000 on 23-07-19. +// +#pragma once +#include +#include +#include +#include "cypher/execution_plan/optimization/rewrite/node.h" +#include "cypher/execution_plan/optimization/rewrite/edge.h" + +#include "cypher/execution_plan/ops/op.h" +namespace cypher::rewrite { + +class Graph { + public: + std::vector m_nodes; + std::vector m_edges; + + Graph() {} + ~Graph() {} + void AddNode(size_t id, int label); + + void AddEdge(size_t id, size_t source_id, size_t target_id, std::set labels, + parser::LinkDirection direction); + + void PrintGraph(); +}; + +}; // namespace cypher::rewrite diff --git a/src/cypher/execution_plan/optimization/rewrite/node.h b/src/cypher/execution_plan/optimization/rewrite/node.h new file mode 100644 index 00000000..065b1795 --- /dev/null +++ b/src/cypher/execution_plan/optimization/rewrite/node.h @@ -0,0 +1,41 @@ +/** + * Copyright 2022 AntGroup CO., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +// +// Created by liyunlong2000 on 23-07-19. +// +#pragma once +#include +#include "cypher/execution_plan/optimization/rewrite/edge.h" + +namespace cypher::rewrite { + +class Edge; + +class Node { + private: + public: + size_t m_id; + int m_label; + std::vector m_outedges; + std::vector m_inedges; + std::vector m_undirectededges; + Node() {} + Node(size_t id, int label) { + m_id = id; + m_label = label; + } + ~Node() {} +}; +}; // namespace cypher::rewrite diff --git a/src/cypher/execution_plan/optimization/rewrite/schema_rewrite.cpp b/src/cypher/execution_plan/optimization/rewrite/schema_rewrite.cpp new file mode 100644 index 00000000..c39a7947 --- /dev/null +++ b/src/cypher/execution_plan/optimization/rewrite/schema_rewrite.cpp @@ -0,0 +1,382 @@ +/** + * Copyright 2022 AntGroup CO., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +// +// Created by liyunlong2000 on 23-07-19. +// +#include "cypher/execution_plan/optimization/rewrite/schema_rewrite.h" + +// #define DEBUG + +namespace cypher::rewrite { + +// 根据schema信息和cypher信息获取有效路径 +std::vector SchemaRewrite::GetEffectivePath( + const lgraph::SchemaInfo& schema_info, cypher::SchemaNodeMap* schema_node_map, + cypher::SchemaRelpMap* schema_relp_map) { + // 根据schema构建目标图 + m_schema_node_map = schema_node_map; + m_schema_relp_map = schema_relp_map; + label2idx.insert({"", -2}); + std::vector v_labels = schema_info.v_schema_manager.GetAllLabels(); + int i = 0; + for (; i < (int)v_labels.size(); i++) { + label2idx.insert({v_labels[i], i}); + idx2label.push_back(v_labels[i]); + target_graph.AddNode((size_t)i, GetLabelNum(v_labels[i])); + } + lgraph::SchemaManager e_schema_manager = schema_info.e_schema_manager; + size_t label_num = 0; + const lgraph::Schema* schema; + size_t e_cnt = 0; + while ((schema = e_schema_manager.GetSchema(label_num))) { + label2idx.insert({schema->GetLabel(), i}); + idx2label.push_back(schema->GetLabel()); + i++; + const lgraph::EdgeConstraints& ec = schema->GetEdgeConstraints(); + for (auto pair : ec) { + std::set label_nums; + label_nums.insert(GetLabelNum(schema->GetLabel())); + target_graph.AddEdge(e_cnt, (size_t)GetLabelNum(pair.first), + (size_t)GetLabelNum(pair.second), label_nums, + parser::LinkDirection::LEFT_TO_RIGHT); + e_cnt++; + } + label_num++; + } + + // cypher语句构建查询图 + size_t cnt = 0; + for (auto it = schema_node_map->begin(); it != schema_node_map->end(); it++) { + vidx2pidx.insert({cnt, it->first}); + pidx2vidx.insert({it->first, cnt}); + query_graph.AddNode(cnt, GetLabelNum(it->second)); + cnt++; + } + + e_cnt = 0; + for (auto it = schema_relp_map->begin(); it != schema_relp_map->end(); it++) { + eidx2pidx.insert({e_cnt, it->first}); + auto p_it = pidx2vidx.find(std::get<0>(it->second)); + auto src_idx = p_it->second; + p_it = pidx2vidx.find(std::get<1>(it->second)); + auto dst_idx = p_it->second; + std::set labels = std::get<2>(it->second); + parser::LinkDirection direction = std::get<3>(it->second); + std::set label_nums; + if (labels.empty()) { + query_graph.AddEdge(e_cnt, src_idx, dst_idx, label_nums, direction); + } else { + for (auto lit = labels.begin(); lit != labels.end(); lit++) { + label_nums.insert(GetLabelNum(*lit)); + } + query_graph.AddEdge(e_cnt, src_idx, dst_idx, label_nums, direction); + } + e_cnt++; + } + + // 初始化相关参数 + target_size = target_graph.m_nodes.size(); + query_size = query_graph.m_nodes.size(); + edge_core.resize(query_graph.m_edges.size()); + visited.resize(query_size); + edge_visited.resize(target_graph.m_edges.size()); + core_2.resize(query_size); + +#ifdef DEBUG + PrintLabel2Idx(); + FMA_DBG() << "目标图:"; + target_graph.PrintGraph(); + FMA_DBG() << "查询图:"; + query_graph.PrintGraph(); +#endif + + // 回溯 + for (size_t i = 0; i < target_size; i++) { + Reset(); + if (CheckNodeLabel(0, i)) { + core_2[0] = i; + depth++; + MatchRecursive(0, i); + } + } + + return sgm; +} + +// 重置所有状态 +void SchemaRewrite::Reset() { + for (size_t i = 0; i < query_size; i++) { + core_2[i] = -1; + visited[i] = false; + } + map_cnt = 0; + depth = 0; +} + +void SchemaRewrite::MatchRecursive(size_t vid, size_t t_vid) { + if (depth == query_size) { +#ifdef DEBUG + PrintMapping(); +#endif + AddMapping(); + } else { + std::vector candidate_state_infos = GenCandidateStateInfo(); + +#ifdef DEBUG + FMA_DBG() << "num of stateinfos:" << candidate_state_infos.size() << ",depth:" << depth; + for (StateInfo si : candidate_state_infos) { + FMA_DBG() << "vid:" << si.m_vid; + FMA_DBG() << "next vid:" << si.m_next_vid; + FMA_DBG() << "eid:" << si.m_eid; + } +#endif + + for (StateInfo si : candidate_state_infos) { + for (auto it = si.m_id_map.begin(); it != si.m_id_map.end(); it++) { +#ifdef DEBUG + FMA_DBG() << "check:query:" << si.m_next_vid << ",target:" << it->first; +#endif + if (CheckNodeLabel(si.m_next_vid, it->first)) { + core_2[si.m_next_vid] = it->first; + depth++; + edge_core[si.m_eid] = si.m_id_map[it->first]; + MatchRecursive(si.m_next_vid, it->first); + core_2[si.m_next_vid] = -1; + depth--; + } + } + } + } +} + +std::vector SchemaRewrite::GenCandidateStateInfo() { + std::vector candidate_state_infos; + for (size_t i = 0; i < query_size; i++) { + if (core_2[i] > -1) { + Node node = query_graph.m_nodes[i]; + for (size_t eid : node.m_outedges) { + Edge edge = query_graph.m_edges[eid]; + if (core_2[edge.m_target_id] == -1) { + std::set edge_ids = GetNextEdgeIds(&edge, core_2[i], 0); + std::map> id_map = + GetNextTVidByEdgeId(edge_ids, core_2[i]); + StateInfo si(i, edge.m_target_id, eid, 0, &id_map); + candidate_state_infos.push_back(si); + } + } + if (!candidate_state_infos.empty()) { + return candidate_state_infos; + } + for (size_t eid : node.m_inedges) { + Edge edge = query_graph.m_edges[eid]; + if (core_2[edge.m_source_id] == -1) { + std::set edge_ids = GetNextEdgeIds(&edge, core_2[i], 1); + std::map> id_map = + GetNextTVidByEdgeId(edge_ids, core_2[i]); + StateInfo si(i, edge.m_source_id, eid, 1, &id_map); + candidate_state_infos.push_back(si); + } + } + if (!candidate_state_infos.empty()) { + return candidate_state_infos; + } + for (size_t eid : node.m_undirectededges) { + Edge edge = query_graph.m_edges[eid]; + size_t next_vid = edge.m_source_id == i ? edge.m_target_id : edge.m_source_id; + if (core_2[next_vid] == -1) { + std::set edge_ids = GetNextEdgeIds(&edge, core_2[i], 2); + std::map> id_map = + GetNextTVidByEdgeId(edge_ids, core_2[i]); + StateInfo si(i, next_vid, eid, 2, &id_map); + candidate_state_infos.push_back(si); + } + } + if (!candidate_state_infos.empty()) { + return candidate_state_infos; + } + } + } + return candidate_state_infos; +} + +// //回溯 +// void SchemaRewrite::Backtrack(size_t vid){ +// map_cnt--; +// visited[vid]=false; +// core_2[vid]=-1; +// } + +// 检查目标图和查询图上当前点的label是否一致 +bool SchemaRewrite::CheckNodeLabel(size_t vid, size_t t_vid) { + Node* query_node = &query_graph.m_nodes[vid]; + Node* target_node = &target_graph.m_nodes[t_vid]; + if (query_node->m_label == -2 || query_node->m_label == target_node->m_label) { + return true; + } + return false; +} +// 根据目标图上的边id集合获取点id集合 +std::map> SchemaRewrite::GetNextTVidByEdgeId(std::set& edge_ids, + size_t t_vid) { + std::map> id2eidset; + for (auto mit = edge_ids.begin(); mit != edge_ids.end(); mit++) { + Edge& edge = target_graph.m_edges[*mit]; + size_t next_tvid = edge.m_target_id; + if (next_tvid == t_vid) { + next_tvid = edge.m_source_id; + } + auto sit = id2eidset.find(next_tvid); + if (sit != id2eidset.end()) { + sit->second.insert(*mit); + } else { + std::set ids; + ids.insert(*mit); + id2eidset[next_tvid] = ids; + } + } + return id2eidset; +} +// 根据查询图上query_edge获取目标图上可行的边id集合 +std::set SchemaRewrite::GetNextEdgeIds(Edge* query_edge, size_t t_vid, size_t direction) { + std::set edge_ids; + Node& target_node = target_graph.m_nodes[t_vid]; + if (direction == 0) { + for (size_t out_edge_id : target_node.m_outedges) { + Edge& out_edge = target_graph.m_edges[out_edge_id]; + auto out_edge_label = out_edge.m_labels.begin(); + auto it = query_edge->m_labels.find(*out_edge_label); + if (query_edge->m_labels.size() == 0 || it != query_edge->m_labels.end()) { + edge_ids.insert(out_edge_id); + } + } + } else if (direction == 1) { + for (size_t in_edge_id : target_node.m_inedges) { + Edge& in_edge = target_graph.m_edges[in_edge_id]; + auto in_edge_label = in_edge.m_labels.begin(); + auto it = query_edge->m_labels.find(*in_edge_label); + if (query_edge->m_labels.size() == 0 || it != query_edge->m_labels.end()) { + edge_ids.insert(in_edge_id); + } + } + } else if (direction == 2) { + for (size_t out_edge_id : target_node.m_outedges) { + Edge& out_edge = target_graph.m_edges[out_edge_id]; + auto out_edge_label = out_edge.m_labels.begin(); + auto it = query_edge->m_labels.find(*out_edge_label); + if (query_edge->m_labels.size() == 0 || it != query_edge->m_labels.end()) { + edge_ids.insert(out_edge_id); + } + } + for (size_t in_edge_id : target_node.m_inedges) { + Edge& in_edge = target_graph.m_edges[in_edge_id]; + auto in_edge_label = in_edge.m_labels.begin(); + auto it = query_edge->m_labels.find(*in_edge_label); + if (query_edge->m_labels.size() == 0 || it != query_edge->m_labels.end()) { + edge_ids.insert(in_edge_id); + } + } + } + return edge_ids; +} +// 根据label获取其唯一的id +int SchemaRewrite::GetLabelNum(std::string label) { + auto it = label2idx.find(label); + int label_num = -1; + if (it == label2idx.end()) { + label2idx.insert({label, label_cnt}); + label_num = label_cnt; + label_cnt++; + } else { + label_num = it->second; + } + return label_num; +} +// 将匹配的路径加入到schema graph map中 +void SchemaRewrite::AddMapping() { + cypher::SchemaNodeMap snm(*m_schema_node_map); + cypher::SchemaRelpMap srm(*m_schema_relp_map); + for (size_t j = 0; j < core_2.size(); j++) { + int core_id = core_2[j]; + std::string label = idx2label[core_id]; + auto p_id_it = vidx2pidx.find(j); + auto p_id = p_id_it->second; + snm[p_id] = label; + } + for (Edge e : query_graph.m_edges) { + // size_t src_id = e.m_source_id, tar_id = e.m_target_id; + // size_t core_src_id = core_2[src_id]; + // size_t core_tar_id = core_2[tar_id]; + + auto p_id_it = eidx2pidx.find(e.m_id); + auto p_id = p_id_it->second; + auto srm_it = srm.find(p_id); + + // parser::LinkDirection direction = std::get<3>(srm_it->second); + std::set edge_ids = edge_core[e.m_id]; + std::set edge_labels; + + for (auto it = edge_ids.begin(); it != edge_ids.end(); it++) { + Edge& edge = target_graph.m_edges[*it]; + auto lit = edge.m_labels.begin(); + edge_labels.insert(idx2label[*lit]); + } + if (srm_it != srm.end()) { + auto value = srm_it->second; + srm_it->second = std::tuple, + parser::LinkDirection>( + std::get<0>(value), std::get<1>(value), edge_labels, std::get<3>(value)); + } + } + sgm.push_back(std::make_pair(snm, srm)); +} +// 打印当前匹配的路径 +void SchemaRewrite::PrintMapping() { + FMA_LOG() << "Node Mapping:"; + for (size_t j = 0; j < query_size; j++) { + int core_id = core_2[j]; + std::string label = idx2label[core_id]; + FMA_LOG() << "(" << core_id << "[" << label << "]-" << j << ")"; + } + FMA_LOG(); + FMA_LOG() << "Edge Mapping:"; + for (Edge e : query_graph.m_edges) { + size_t src_id = e.m_source_id, tar_id = e.m_target_id; + size_t core_src_id = core_2[src_id], core_tar_id = core_2[tar_id]; + std::set edge_ids = edge_core[e.m_id]; + std::string label_str = ""; + for (auto i = edge_ids.begin(); i != edge_ids.end(); i++) { + Edge& edge = target_graph.m_edges[*i]; + int label_id = 0; + for (auto it = edge.m_labels.begin(); it != edge.m_labels.end(); it++) { + label_id = *it; + break; + } + label_str += idx2label[label_id]; + label_str += "."; + } + std::string src_label = idx2label[core_src_id]; + std::string tar_label = idx2label[core_tar_id]; + FMA_LOG() << "(" << src_id << "[" << src_label << "])-[" << label_str << "]-(" << tar_id + << "[" << tar_label << "]) "; + } +} + +void SchemaRewrite::PrintLabel2Idx() { + for (auto it : label2idx) { + FMA_LOG() << it.first << " " << it.second; + } +} + +}; // namespace cypher::rewrite diff --git a/src/cypher/execution_plan/optimization/rewrite/schema_rewrite.h b/src/cypher/execution_plan/optimization/rewrite/schema_rewrite.h new file mode 100644 index 00000000..f5389dbd --- /dev/null +++ b/src/cypher/execution_plan/optimization/rewrite/schema_rewrite.h @@ -0,0 +1,89 @@ +/** + * Copyright 2022 AntGroup CO., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +// +// Created by liyunlong2000 on 23-07-19. +// +#pragma once +#include +#include +#include +#include "cypher/execution_plan/optimization/rewrite/node.h" +#include "cypher/execution_plan/optimization/rewrite/graph.h" + +#include "cypher/execution_plan/ops/op.h" +#include "graph/common.h" +#include "parser/data_typedef.h" +#include "cypher/execution_plan/optimization/rewrite/state_info.h" +#include "cypher/execution_plan/execution_plan.h" + +namespace cypher { + +// key为pattern graph中的点id,value为label值 +typedef std::map SchemaNodeMap; +// key为pattern graph中的边id,value为(源点id,终点id,边label值,边方向,最小跳数,最大跳数)的六元组 +typedef std::map, parser::LinkDirection>> + SchemaRelpMap; +typedef std::pair SchemaGraphMap; + +namespace rewrite { + +class SchemaRewrite { + public: + std::map label2idx; // 每个label对应一个id + int label_cnt = 0; + std::vector idx2label; // id到label的对应 + + std::map vidx2pidx; // 顶点id到pattern graph中id对应 + std::map pidx2vidx; // pattern graph中id到顶点id对应 + std::map eidx2pidx; // 边id到pattern graph中id对应 + + std::vector> edge_core; // 保存查询图中边id到目标图中边id的对应关系 + std::vector sgm; + cypher::SchemaNodeMap* m_schema_node_map; + cypher::SchemaRelpMap* m_schema_relp_map; + + std::vector visited; + std::vector edge_visited; + + Graph target_graph; + Graph query_graph; + std::vector core_2; // 保存查询图中点id到目标图中点id的对应关系 + size_t query_size; + size_t target_size; + size_t map_cnt = 0; // 查询图中点已经匹配的个数 + size_t depth = 0; // 遍历的深度 + + SchemaRewrite() {} + ~SchemaRewrite() {} + + std::vector GetEffectivePath(const lgraph::SchemaInfo& schema_info, + cypher::SchemaNodeMap* schema_node_map, + cypher::SchemaRelpMap* schema_relp_map); + int GetLabelNum(std::string label); + void PrintLabel2Idx(); + void PrintMapping(); + void AddMapping(); + void Reset(); + bool CheckNodeLabel(size_t vid, size_t t_vid); + // void Backtrack(size_t vid); + std::map> GetNextTVidByEdgeId(std::set& edge_ids, + size_t t_vid); + std::set GetNextEdgeIds(Edge* edge, size_t t_vid, size_t direction); + void MatchRecursive(size_t vid, size_t t_vid); + std::vector GenCandidateStateInfo(); +}; + +}; // namespace rewrite +}; // namespace cypher diff --git a/src/cypher/execution_plan/optimization/rewrite/state_info.h b/src/cypher/execution_plan/optimization/rewrite/state_info.h new file mode 100644 index 00000000..6b2c821a --- /dev/null +++ b/src/cypher/execution_plan/optimization/rewrite/state_info.h @@ -0,0 +1,41 @@ +/** + * Copyright 2022 AntGroup CO., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +// +// Created by liyunlong2000 on 23-07-19. +// +#pragma once +#include + +namespace cypher::rewrite { + +class StateInfo { + public: + size_t m_vid; + size_t m_next_vid; + size_t m_eid; + size_t m_direction; + std::map> m_id_map; + StateInfo() {} + StateInfo(size_t vid, size_t next_vid, size_t eid, size_t direction, + std::map>* id_map) { + m_vid = vid; + m_next_vid = next_vid; + m_eid = eid; + m_direction = direction; + m_id_map = *id_map; + } +}; + +}; // namespace cypher::rewrite diff --git a/src/cypher/execution_plan/runtime_context.h b/src/cypher/execution_plan/runtime_context.h index 637d1405..9408e354 100644 --- a/src/cypher/execution_plan/runtime_context.h +++ b/src/cypher/execution_plan/runtime_context.h @@ -72,10 +72,8 @@ class RTContext : public SubmitQueryContext { bool Check(std::string &msg) const { if (!SubmitQueryContext::Check(msg)) return false; - if (ac_db_) { - msg = "Access controlled db not empty"; - return false; - } + // We removed the check for the existence of ac_db_ during execution plan execution + // since ac_db_ is already created during the execution plan optimization phase if (txn_) { msg = "Previous transaction not closed."; return false; diff --git a/src/cypher/execution_plan/scheduler.cpp b/src/cypher/execution_plan/scheduler.cpp index 4551413d..6253d5fa 100644 --- a/src/cypher/execution_plan/scheduler.cpp +++ b/src/cypher/execution_plan/scheduler.cpp @@ -56,7 +56,7 @@ void Scheduler::Eval(RTContext *ctx, const std::string &script, ElapsedTime &ela FMA_DBG_STREAM(Logger()) << sql_query.ToString(); } plan = std::make_shared(); - plan->Build(visitor.GetQuery(), visitor.CommandType()); + plan->Build(visitor.GetQuery(), visitor.CommandType(), ctx); plan->Validate(ctx); if (visitor.CommandType() == parser::CmdType::EXPLAIN) { ctx->result_info_ = std::make_unique(); diff --git a/src/cypher/graph/node.h b/src/cypher/graph/node.h index ea08c8fc..82094ddd 100644 --- a/src/cypher/graph/node.h +++ b/src/cypher/graph/node.h @@ -57,6 +57,8 @@ class Node { const std::string &Label() const; + void SetLabel(const std::string &label) { label_ = label; } + const std::string &Alias() const; bool &Visited() { return visited_; } diff --git a/src/cypher/graph/relationship.h b/src/cypher/graph/relationship.h index 7216e51e..e2fe445e 100644 --- a/src/cypher/graph/relationship.h +++ b/src/cypher/graph/relationship.h @@ -60,6 +60,8 @@ class Relationship { const std::set &Types() const; + void SetTypes(const std::set &types) { types_ = types; } + NodeID Lhs() const { return lhs_; } NodeID Rhs() const { return rhs_; } diff --git a/test/cypher_plan_validate.json b/test/cypher_plan_validate.json index ecf88c06..eaa69684 100644 --- a/test/cypher_plan_validate.json +++ b/test/cypher_plan_validate.json @@ -94,6 +94,23 @@ "plan": "Execution Plan:\nProduce Results\n Project [a,k]\n Apply\n Unwind [([b],a,+),k]\n Argument [a]\n Project [a]\n", "res": 2 } + ], + "schema_rewrite": [ + { + "query": "MATCH p=(n1)-[r1]->(n2)-[r2]->(m:Person) return count(p)", + "plan": "Execution Plan:\nProduce Results\n Aggregate [count(p)]\n Expand(All) [n2 --> m ]\n Expand(All) [n1 --> n2 ]\n Node By Label Scan [n1:Person]\n", + "res": 1 + }, + { + "query": "MATCH p1=(n1)-[r1]->(n2)-[r2]->(m1:City),p2=(n3)-[r3]->(m2:Film) return count(p1)", + "plan": "Execution Plan:\nProduce Results\n Aggregate [count(p1)]\n Cartesian Product\n Expand(All) [n2 --> m1 ]\n Expand(All) [n1 --> n2 ]\n Node By Label Scan [n1:Person]\n Expand(All) [n3 --> m2 ]\n Node By Label Scan [n3:Person]\n", + "res": 1 + }, + { + "query": "MATCH p1=(n1)-[r1]->(n2)-[r2]->(m1:City) with count(p1) as cp match p1=(n1)-[r1]->(m1:Film) return count(p1)", + "plan": "Execution Plan:\nProduce Results\n Aggregate [count(p1)]\n Apply\n Expand(All) [n1 --> m1 ]\n Node By Label Scan Dynamic [n1:Person]\n Argument [cp]\n Aggregate [cp]\n Expand(All) [n2 --> m1 ]\n Expand(All) [n1 --> n2 ]\n Node By Label Scan [n1:Person]\n", + "res": 1 + } ] } } diff --git a/test/graph_factory.h b/test/graph_factory.h index e050daf6..1cd06f03 100644 --- a/test/graph_factory.h +++ b/test/graph_factory.h @@ -24,6 +24,214 @@ #include "lgraph/lgraph.h" +static const std::map yago_data = { + {"yago.conf", R"( +{ + "schema": [ + { + "label" : "Person", + "type" : "VERTEX", + "primary" : "name", + "properties" : [ + {"name" : "name", "type":"STRING"}, + {"name" : "birthyear", "type":"INT16", "optional":true} + ] + }, + { + "label" : "City", + "type" : "VERTEX", + "primary" : "name", + "properties" : [ + {"name": "name", "type":"STRING"} + ] + }, + { + "label" : "Film", + "primary": "title", + "type" : "VERTEX", + "properties" : [ + {"name": "title", "type":"STRING"} + ] + }, + { + "label" : "HAS_CHILD", + "type" : "EDGE" + }, + { + "label" : "MARRIED", + "type" : "EDGE" + }, + { + "label" : "BORN_IN", + "type" : "EDGE", + "properties" : [ + {"name" : "reg_time", "type":"DATETIME", "optional":true}, + {"name" : "weight", "type":"FLOAT", "optional":true} + ] + }, + { + "label" : "DIRECTED", + "type" : "EDGE" + }, + { + "label" : "WROTE_MUSIC_FOR", + "type" : "EDGE" + }, + { + "label" : "ACTED_IN", + "type" : "EDGE", + "properties" : [ + {"name" : "charactername", "type":"STRING"} + ] + } + ], + "files" : [ + { + "path" : "person.csv", + "format" : "CSV", + "label" : "Person", + "columns" : ["name","birthyear"] + }, + { + "path" : "city.csv", + "format" : "CSV", + "label" : "City", + "columns" : ["name"] + }, + { + "path" : "film.csv", + "format" : "CSV", + "label" : "Film", + "columns" : ["title"] + }, + { + "path" : "has_child.csv", + "format" : "CSV", + "label" : "HAS_CHILD", + "SRC_ID" : "Person", + "DST_ID" : "Person", + "columns" : ["SRC_ID","DST_ID"] + }, + { + "path" : "married.csv", + "format" : "CSV", + "label" : "MARRIED", + "SRC_ID" : "Person", + "DST_ID" : "Person", + "columns" : ["SRC_ID","DST_ID"] + }, + { + "path" : "born_in.csv", + "format" : "CSV", + "label" : "BORN_IN", + "SRC_ID" : "Person", + "DST_ID" : "City", + "columns" : ["SRC_ID","DST_ID","reg_time","weight"] + }, + { + "path" : "directed.csv", + "format" : "CSV", + "label" : "DIRECTED", + "SRC_ID" : "Person", + "DST_ID" : "Film", + "columns" : ["SRC_ID","DST_ID"] + }, + { + "path" : "wrote.csv", + "format" : "CSV", + "label" : "WROTE_MUSIC_FOR", + "SRC_ID" : "Person", + "DST_ID" : "Film", + "columns" : ["SRC_ID","DST_ID"] + }, + { + "path" : "acted_in.csv", + "format" : "CSV", + "label" : "ACTED_IN", + "SRC_ID" : "Person", + "DST_ID" : "Film", + "columns" : ["SRC_ID","DST_ID","charactername"] + } + ] +} + )"}, + + {"person.csv", + R"(Rachel Kempson,1910 +Michael Redgrave,1908 +Vanessa Redgrave,1937 +Corin Redgrave,1939 +Liam Neeson,1952 +Natasha Richardson,1963 +Richard Harris,1930 +Dennis Quaid,1954 +Lindsay Lohan,1986 +Jemma Redgrave,1965 +Roy Redgrave,1873 +John Williams,1932 +Christopher Nolan,1970 +)"}, + + {"city.csv", + R"(New York +London +Houston +)"}, + + {"film.csv", + R"("Goodbye, Mr. Chips" +Batman Begins +Harry Potter and the Sorcerer's Stone +The Parent Trap +Camelot +)"}, + + {"has_child.csv", + R"(Rachel Kempson,Vanessa Redgrave +Rachel Kempson,Corin Redgrave +Michael Redgrave,Vanessa Redgrave +Michael Redgrave,Corin Redgrave +Corin Redgrave,Jemma Redgrave +Vanessa Redgrave,Natasha Richardson +Roy Redgrave,Michael Redgrave +)"}, + + {"married.csv", + R"(Rachel Kempson,Michael Redgrave +Michael Redgrave,Rachel Kempson +Natasha Richardson,Liam Neeson +Liam Neeson,Natasha Richardson +)"}, + + {"born_in.csv", + R"(Vanessa Redgrave,London,2023-05-01 10:00:00,20.21 +Natasha Richardson,London,2023-05-01 11:00:00,20.18 +Christopher Nolan,London,2023-05-01 12:00:00,19.93 +Dennis Quaid,Houston,2023-05-01 13:00:00,19.11 +Lindsay Lohan,New York,2023-05-01 14:00:00,20.62 +John Williams,New York,2023-05-01 15:00:00,20.55 +)"}, + + {"directed.csv", + R"(Christopher Nolan,Batman Begins +)"}, + + {"wrote.csv", + R"(John Williams,Harry Potter and the Sorcerer's Stone +John Williams,"Goodbye, Mr. Chips" +)"}, + + {"acted_in.csv", + R"(Michael Redgrave,"Goodbye, Mr. Chips",The Headmaster +Vanessa Redgrave,Camelot,Guenevere +Richard Harris,Camelot,King Arthur +Richard Harris,Harry Potter and the Sorcerer's Stone,Albus Dumbledore +Natasha Richardson,The Parent Trap,Liz James +Dennis Quaid,The Parent Trap,Nick Parker +Lindsay Lohan,The Parent Trap,Halle/Annie +Liam Neeson,Batman Begins,Henri Ducard +)"}}; + class GraphFactory { public: static void create_snapshot(const std::string& dir = "./testdb") { @@ -142,8 +350,14 @@ class GraphFactory { } static void WriteYagoFiles() { - static const std::map data = { - {"yago.conf", R"( + CreateCsvFiles(yago_data); + } + + static void WriteYagoFilesWithConstraints() { + // add constraints for yago.conf + auto yago_data_with_constraints = yago_data; + yago_data_with_constraints.at("yago.conf") = +R"( { "schema": [ { @@ -171,24 +385,42 @@ class GraphFactory { {"name": "title", "type":"STRING"} ] }, - {"label" : "HAS_CHILD", "type" : "EDGE"}, - {"label" : "MARRIED", "type" : "EDGE"}, + { + "label" : "HAS_CHILD", + "type" : "EDGE", + "constraints": [["Person", "Person"]] + }, + { + "label" : "MARRIED", + "type" : "EDGE", + "constraints": [["Person", "Person"]] + }, { "label" : "BORN_IN", "type" : "EDGE", "properties" : [ {"name" : "reg_time", "type":"DATETIME", "optional":true}, {"name" : "weight", "type":"FLOAT", "optional":true} - ] + ], + "constraints": [["Person", "City"]] + }, + { + "label" : "DIRECTED", + "type" : "EDGE", + "constraints": [["Person", "Film"]] + }, + { + "label" : "WROTE_MUSIC_FOR", + "type" : "EDGE", + "constraints": [["Person", "Film"]] }, - {"label" : "DIRECTED", "type" : "EDGE"}, - {"label" : "WROTE_MUSIC_FOR", "type" : "EDGE"}, { "label" : "ACTED_IN", "type" : "EDGE", "properties" : [ {"name" : "charactername", "type":"STRING"} - ] + ], + "constraints": [["Person", "Film"]] } ], "files" : [ @@ -260,85 +492,9 @@ class GraphFactory { } ] } - )"}, +)"; - {"person.csv", - R"(Rachel Kempson,1910 -Michael Redgrave,1908 -Vanessa Redgrave,1937 -Corin Redgrave,1939 -Liam Neeson,1952 -Natasha Richardson,1963 -Richard Harris,1930 -Dennis Quaid,1954 -Lindsay Lohan,1986 -Jemma Redgrave,1965 -Roy Redgrave,1873 -John Williams,1932 -Christopher Nolan,1970 -)"}, - - {"city.csv", - R"(New York -London -Houston -)"}, - - {"film.csv", - R"("Goodbye, Mr. Chips" -Batman Begins -Harry Potter and the Sorcerer's Stone -The Parent Trap -Camelot -)"}, - - {"has_child.csv", - R"(Rachel Kempson,Vanessa Redgrave -Rachel Kempson,Corin Redgrave -Michael Redgrave,Vanessa Redgrave -Michael Redgrave,Corin Redgrave -Corin Redgrave,Jemma Redgrave -Vanessa Redgrave,Natasha Richardson -Roy Redgrave,Michael Redgrave -)"}, - - {"married.csv", - R"(Rachel Kempson,Michael Redgrave -Michael Redgrave,Rachel Kempson -Natasha Richardson,Liam Neeson -Liam Neeson,Natasha Richardson -)"}, - - {"born_in.csv", - R"(Vanessa Redgrave,London,2023-05-01 10:00:00,20.21 -Natasha Richardson,London,2023-05-01 11:00:00,20.18 -Christopher Nolan,London,2023-05-01 12:00:00,19.93 -Dennis Quaid,Houston,2023-05-01 13:00:00,19.11 -Lindsay Lohan,New York,2023-05-01 14:00:00,20.62 -John Williams,New York,2023-05-01 15:00:00,20.55 -)"}, - - {"directed.csv", - R"(Christopher Nolan,Batman Begins -)"}, - - {"wrote.csv", - R"(John Williams,Harry Potter and the Sorcerer's Stone -John Williams,"Goodbye, Mr. Chips" -)"}, - - {"acted_in.csv", - R"(Michael Redgrave,"Goodbye, Mr. Chips",The Headmaster -Vanessa Redgrave,Camelot,Guenevere -Richard Harris,Camelot,King Arthur -Richard Harris,Harry Potter and the Sorcerer's Stone,Albus Dumbledore -Natasha Richardson,The Parent Trap,Liz James -Dennis Quaid,The Parent Trap,Nick Parker -Lindsay Lohan,The Parent Trap,Halle/Annie -Liam Neeson,Batman Begins,Henri Ducard -)"}}; - - CreateCsvFiles(data); + CreateCsvFiles(yago_data_with_constraints); } /** @@ -362,4 +518,22 @@ Liam Neeson,Batman Begins,Henri Ducard import_v3::Importer importer(config); importer.DoImportOffline(); } + + // add edge constraints for yago + static void create_yago_with_constraints(const std::string& dir = "./lgraph_db") { + using namespace lgraph; + WriteYagoFilesWithConstraints(); + import_v3::Importer::Config config; + config.config_file = "./yago.conf"; // the config file specifying which files to import + config.db_dir = dir; // db data dir to use + config.delete_if_exists = true; + config.graph = "default"; + + config.parse_block_threads = 1; + config.parse_file_threads = 1; + config.generate_sst_threads = 1; + + import_v3::Importer importer(config); + importer.DoImportOffline(); + } }; diff --git a/test/test_cypher.cpp b/test/test_cypher.cpp index d368867b..424e7f98 100644 --- a/test/test_cypher.cpp +++ b/test/test_cypher.cpp @@ -1,4 +1,4 @@ -/** +/** * Copyright 2022 AntGroup CO., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -66,7 +66,7 @@ int test_file_script(const std::string &file, cypher::RTContext *ctx) { for (auto &p : s.parts) p.symbol_table.DumpTable(); } cypher::ExecutionPlan execution_plan; - execution_plan.Build(stmt, visitor.CommandType()); + execution_plan.Build(stmt, visitor.CommandType(), ctx); execution_plan.Validate(ctx); execution_plan.DumpGraph(); execution_plan.DumpPlan(0, false); @@ -91,7 +91,7 @@ int test_interactive(cypher::RTContext *ctx) { parser.addErrorListener(&CypherErrorListener::INSTANCE); visitor.visit(parser.oC_Cypher()); cypher::ExecutionPlan execution_plan; - execution_plan.Build(visitor.GetQuery(), visitor.CommandType()); + execution_plan.Build(visitor.GetQuery(), visitor.CommandType(), ctx); execution_plan.Validate(ctx); execution_plan.Execute(ctx); UT_LOG() << "Result:\n" << ctx->result_->Dump(false); @@ -115,7 +115,7 @@ void eval_scripts_check(cypher::RTContext *ctx, const std::vector & parser.addErrorListener(&CypherErrorListener::INSTANCE); CypherBaseVisitor visitor(ctx, parser.oC_Cypher()); cypher::ExecutionPlan execution_plan; - execution_plan.Build(visitor.GetQuery(), visitor.CommandType()); + execution_plan.Build(visitor.GetQuery(), visitor.CommandType(), ctx); execution_plan.Validate(ctx); execution_plan.DumpGraph(); execution_plan.DumpPlan(0, false); @@ -285,8 +285,14 @@ int test_query(cypher::RTContext *ctx) { {"MATCH (n:Person) WHERE n.birthyear > 1900 AND n.birthyear < 2000 RETURN count(n)", 1}, {"MATCH (n:Person) RETURN n.birthyear, count(n)", 13}, {"MATCH (f:Film)<-[:ACTED_IN]-(p:Person)-[:BORN_IN]->(c:City) " - "RETURN c.name, count(f) AS sum ORDER BY sum DESC", 3}, - }; + "RETURN c.name, count(f) AS sum ORDER BY sum DESC", + 3}, + /* test schema rewrite optimization */ + {"MATCH p=(n1)-[r1]->(n2)-[r2]->(m:Person) return count(p)", 1}, + {"MATCH p1=(n1)-[r1]->(n2)-[r2]->(m1:City),p2=(n3)-[r3]->(m2:Film) return count(p1)", 1}, + {"MATCH p1=(n1)-[r1]->(n2)-[r2]->(m1:City) with count(p1) as cp match " + "p1=(n1)-[r1]->(m1:Film) return count(p1)", + 1}}; std::vector scripts; std::vector check; for (auto &s : script_check) { @@ -2396,7 +2402,8 @@ TEST_P(TestCypher, Cypher) { char **argv = _ut_argv; fma_common::Configuration config; config.Add(test_case, "tc", true).Comment(str); - config.Add(database, "d", true).Comment("Select database: 0-current, 1-new yago, 2-empty"); + config.Add(database, "d", true) + .Comment("Select database: 0-current, 1-new yago, 2-empty, 3-yago with constraints"); config.Add(file, "f", true).Comment("File path"); config.Add(verbose, "v", true).Comment("Verbose: 0-WARNING, 1-INFO, 2-DEBUG"); config.ExitAfterHelp(); @@ -2415,8 +2422,11 @@ TEST_P(TestCypher, Cypher) { } else if (database == 1) { fma_common::FileSystem::GetFileSystem("./testdb").RemoveDir("./testdb"); GraphFactory::create_yago("./testdb"); + } else if (database == 2) { + fma_common::FileSystem::GetFileSystem("./testdb").RemoveDir("./testdb"); } else { fma_common::FileSystem::GetFileSystem("./testdb").RemoveDir("./testdb"); + GraphFactory::create_yago_with_constraints("./testdb"); } lgraph::Galaxy::Config gconf; gconf.dir = "./testdb"; @@ -2560,7 +2570,7 @@ using namespace ::testing; INSTANTIATE_TEST_CASE_P( TestCypher, TestCypher, - Values(ParamCypher{3, 1}, ParamCypher{4, 1}, ParamCypher{5, 1}, ParamCypher{6, 1}, + Values(ParamCypher{3, 1}, ParamCypher{4, 3}, ParamCypher{5, 1}, ParamCypher{6, 1}, ParamCypher{7, 1}, ParamCypher{8, 1}, ParamCypher{9, 1}, ParamCypher{10, 1}, ParamCypher{11, 1}, ParamCypher{12, 1}, ParamCypher{13, 1}, ParamCypher{14, 1}, ParamCypher{15, 1}, ParamCypher{16, 1}, ParamCypher{18, 1}, ParamCypher{101, 1}, diff --git a/test/test_cypher_plan.cpp b/test/test_cypher_plan.cpp index ea19a091..d030537f 100644 --- a/test/test_cypher_plan.cpp +++ b/test/test_cypher_plan.cpp @@ -60,7 +60,7 @@ void eval_query_check(cypher::RTContext *ctx, const std::string &query, double t0, t1, t2; t0 = fma_common::GetTime(); - execution_plan.Build(visitor.GetQuery(), visitor.CommandType()); + execution_plan.Build(visitor.GetQuery(), visitor.CommandType(), ctx); execution_plan.DumpGraph(); std::string res_plan = execution_plan.DumpPlan(0, false); @@ -83,7 +83,7 @@ int test_cypher_plan(const nlohmann::json &conf) { dataset = el["dataset"]; if (dataset == "yago") { UT_LOG() << "test on dataset:" << dataset; - GraphFactory::create_yago("./testdb"); + GraphFactory::create_yago_with_constraints("./testdb"); lgraph::Galaxy::Config gconf; gconf.dir = "./testdb"; lgraph::Galaxy galaxy(gconf, true, nullptr); diff --git a/test/test_edge_constraint.cpp b/test/test_edge_constraint.cpp index 6ae8f8de..af26dcd5 100644 --- a/test/test_edge_constraint.cpp +++ b/test/test_edge_constraint.cpp @@ -98,7 +98,7 @@ static void eval_scripts(cypher::RTContext *ctx, const std::vector parser.addErrorListener(&CypherErrorListener::INSTANCE); CypherBaseVisitor visitor(ctx, parser.oC_Cypher()); cypher::ExecutionPlan execution_plan; - execution_plan.Build(visitor.GetQuery(), visitor.CommandType()); + execution_plan.Build(visitor.GetQuery(), visitor.CommandType(), ctx); execution_plan.Validate(ctx); execution_plan.DumpGraph(); execution_plan.DumpPlan(0, false);