Skip to content

Commit

Permalink
feat(analytical): support use int32_t as vid_t to save memory for sma…
Browse files Browse the repository at this point in the history
…ll graphs (#3063)

Signed-off-by: Tao He <linzhu.ht@alibaba-inc.com>
  • Loading branch information
sighingnow committed Jul 28, 2023
1 parent f590fd1 commit 33576bf
Show file tree
Hide file tree
Showing 14 changed files with 154 additions and 38 deletions.
28 changes: 16 additions & 12 deletions analytical_engine/core/loader/dynamic_to_arrow_converter.h
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,7 @@ struct COOBuilder {};
template <typename DST_FRAG_T>
struct COOBuilder<DST_FRAG_T, int32_t> {
using oid_t = int32_t;
using vid_t = typename DST_FRAG_T::vid_t;
using src_fragment_t = DynamicFragment;
using dst_fragment_t = DST_FRAG_T;

Expand All @@ -310,7 +311,7 @@ struct COOBuilder<DST_FRAG_T, int32_t> {
continue;
}
auto u_oid = src_frag->GetId(u);
vineyard::property_graph_types::VID_TYPE u_gid;
vid_t u_gid;

CHECK(dst_vm->GetGid(fid, 0, u_oid.GetInt(), u_gid));

Expand All @@ -320,7 +321,7 @@ struct COOBuilder<DST_FRAG_T, int32_t> {
continue;
}
auto v_oid = src_frag->GetId(v);
vineyard::property_graph_types::VID_TYPE v_gid;
vid_t v_gid;

CHECK(dst_vm->GetGid(0, v_oid.GetInt(), v_gid));
ARROW_OK_OR_RAISE(src_builder.Append(u_gid));
Expand All @@ -331,7 +332,7 @@ struct COOBuilder<DST_FRAG_T, int32_t> {
auto& v = e.neighbor;
if (src_frag->IsOuterVertex(v)) {
auto v_oid = src_frag->GetId(v);
vineyard::property_graph_types::VID_TYPE v_gid;
vid_t v_gid;

CHECK(dst_vm->GetGid(0, v_oid.GetInt(), v_gid));
ARROW_OK_OR_RAISE(src_builder.Append(v_gid));
Expand All @@ -355,6 +356,7 @@ struct COOBuilder<DST_FRAG_T, int32_t> {
template <typename DST_FRAG_T>
struct COOBuilder<DST_FRAG_T, int64_t> {
using oid_t = int64_t;
using vid_t = typename DST_FRAG_T::vid_t;
using src_fragment_t = DynamicFragment;
using dst_fragment_t = DST_FRAG_T;

Expand All @@ -374,7 +376,7 @@ struct COOBuilder<DST_FRAG_T, int64_t> {
continue;
}
auto u_oid = src_frag->GetId(u);
vineyard::property_graph_types::VID_TYPE u_gid;
vid_t u_gid;

CHECK(dst_vm->GetGid(fid, 0, u_oid.GetInt64(), u_gid));

Expand All @@ -384,7 +386,7 @@ struct COOBuilder<DST_FRAG_T, int64_t> {
continue;
}
auto v_oid = src_frag->GetId(v);
vineyard::property_graph_types::VID_TYPE v_gid;
vid_t v_gid;

CHECK(dst_vm->GetGid(0, v_oid.GetInt64(), v_gid));
ARROW_OK_OR_RAISE(src_builder.Append(u_gid));
Expand All @@ -395,7 +397,7 @@ struct COOBuilder<DST_FRAG_T, int64_t> {
auto& v = e.neighbor;
if (src_frag->IsOuterVertex(v)) {
auto v_oid = src_frag->GetId(v);
vineyard::property_graph_types::VID_TYPE v_gid;
vid_t v_gid;

CHECK(dst_vm->GetGid(0, v_oid.GetInt64(), v_gid));
ARROW_OK_OR_RAISE(src_builder.Append(v_gid));
Expand All @@ -419,6 +421,7 @@ struct COOBuilder<DST_FRAG_T, int64_t> {
template <typename DST_FRAG_T>
struct COOBuilder<DST_FRAG_T, std::string> {
using oid_t = int64_t;
using vid_t = typename DST_FRAG_T::vid_t;
using src_fragment_t = DynamicFragment;
using dst_fragment_t = DST_FRAG_T;

Expand All @@ -438,7 +441,7 @@ struct COOBuilder<DST_FRAG_T, std::string> {
continue;
}
auto u_oid = src_frag->GetId(u);
vineyard::property_graph_types::VID_TYPE u_gid;
vid_t u_gid;

CHECK(dst_vm->GetGid(fid, 0, u_oid.GetString(), u_gid));

Expand All @@ -448,7 +451,7 @@ struct COOBuilder<DST_FRAG_T, std::string> {
continue;
}
auto v_oid = src_frag->GetId(v);
vineyard::property_graph_types::VID_TYPE v_gid;
vid_t v_gid;

CHECK(dst_vm->GetGid(0, v_oid.GetString(), v_gid));
ARROW_OK_OR_RAISE(src_builder.Append(u_gid));
Expand All @@ -459,7 +462,7 @@ struct COOBuilder<DST_FRAG_T, std::string> {
auto& v = e.neighbor;
if (src_frag->IsOuterVertex(v)) {
auto v_oid = src_frag->GetId(v);
vineyard::property_graph_types::VID_TYPE v_gid;
vid_t v_gid;

CHECK(dst_vm->GetGid(0, v_oid.GetString(), v_gid));
ARROW_OK_OR_RAISE(src_builder.Append(v_gid));
Expand Down Expand Up @@ -606,14 +609,15 @@ class VertexMapConverter<vineyard::ArrowVertexMap<
*
* @tparam OID_T OID type
*/
template <typename OID_T, typename VERTEX_MAP_T, bool COMPACT = false>
template <typename OID_T, typename VID_T, typename VERTEX_MAP_T,
bool COMPACT = false>
class DynamicToArrowConverter {
using src_fragment_t = DynamicFragment;
using oid_t = OID_T;
using vid_t = typename src_fragment_t::vid_t;
using vertex_map_t = VERTEX_MAP_T;
using dst_fragment_t =
vineyard::ArrowFragment<oid_t, vid_t, vertex_map_t, COMPACT>;
vineyard::ArrowFragment<OID_T, VID_T, vertex_map_t, COMPACT>;
using oid_array_t = typename vineyard::ConvertToArrowType<oid_t>::ArrayType;

public:
Expand Down Expand Up @@ -769,7 +773,7 @@ class DynamicToArrowConverter {
std::vector<std::shared_ptr<arrow::Field>> schema_vector = {
std::make_shared<arrow::Field>("src", arrow::uint64()),
std::make_shared<arrow::Field>("dst", arrow::uint64())};
COOBuilder<dst_fragment_t, oid_t> builder;
COOBuilder<dst_fragment_t, OID_T> builder;
BOOST_LEAF_AUTO(src_dst_array, builder.Build(src_frag, dst_vm));
std::shared_ptr<arrow::Array> src_array = src_dst_array.first,
dst_array = src_dst_array.second;
Expand Down
13 changes: 9 additions & 4 deletions analytical_engine/frame/property_graph_frame.cc
Original file line number Diff line number Diff line change
Expand Up @@ -213,9 +213,14 @@ ToArrowFragment(vineyard::Client& client, const grape::CommSpec& comm_spec,
std::shared_ptr<gs::IFragmentWrapper>& wrapper_in,
const std::string& dst_graph_name) {
#ifdef NETWORKX
static_assert(std::is_same<vid_t, gs::DynamicFragment::vid_t>::value,
"The type of ArrowFragment::vid_t does not match with the "
"DynamicFragment::vid_t");
if (!std::is_same<vid_t, gs::DynamicFragment::vid_t>::value) {
RETURN_GS_ERROR(vineyard::ErrorCode::kInvalidValueError,
"The type of vid_t '" + vineyard::type_name<vid_t>() +
"' does not match with the "
"DynamicFragment::vid_t '" +
vineyard::type_name<gs::DynamicFragment::vid_t>() +
"'");
}

if (wrapper_in->graph_def().graph_type() !=
gs::rpc::graph::DYNAMIC_PROPERTY) {
Expand Down Expand Up @@ -254,7 +259,7 @@ ToArrowFragment(vineyard::Client& client, const grape::CommSpec& comm_spec,
std::string(vineyard::type_name<oid_t>()));
}

gs::DynamicToArrowConverter<oid_t, vertex_map_t, compact_v> converter(
gs::DynamicToArrowConverter<oid_t, vid_t, vertex_map_t, compact_v> converter(
comm_spec, client);
BOOST_LEAF_AUTO(arrow_frag, converter.Convert(dynamic_frag));
VINEYARD_CHECK_OK(client.Persist(arrow_frag->id()));
Expand Down
2 changes: 1 addition & 1 deletion analytical_engine/test/test_convert.cc
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ int main(int argc, char** argv) {

BOOST_LEAF_AUTO(dynamic_frag, a2d_converter.Convert(arrow_frag));
LOG(INFO) << "ArrowFragment->DynamicFragment done.";
gs::DynamicToArrowConverter<oid_t, vertex_map_t> d2a_converter(
gs::DynamicToArrowConverter<oid_t, vid_t, vertex_map_t> d2a_converter(
comm_spec, client);
BOOST_LEAF_AUTO(arrow_frag1, d2a_converter.Convert(dynamic_frag));
LOG(INFO) << "DynamicFragment->ArrowFragment done.";
Expand Down
2 changes: 1 addition & 1 deletion coordinator/gscoordinator/op_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def _generate_runstep_request(session_id, dag_def, dag_bodies):
# TODO: make the stacktrace separated from normal error messages
# Too verbose.
if len(e.details()) > 3072: # 3k bytes
msg = f"{e.details()[:256]} ... [truncated]"
msg = f"{e.details()[:1024]} ... [truncated]"
else:
msg = e.details()
raise AnalyticalEngineInternalError(msg)
Expand Down
2 changes: 2 additions & 0 deletions coordinator/gscoordinator/template/CMakeLists.template
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,7 @@ if (CYTHON_PREGEL_APP)
add_library(${FRAME_NAME} SHARED ${FILES_NEED_COMPILE}
${ANALYTICAL_ENGINE_FRAME_DIR}/cython_app_frame.cc)
target_compile_definitions(${FRAME_NAME} PRIVATE _OID_TYPE=$_oid_type
_VID_TYPE=$_vid_type
_VD_TYPE=$_vd_type
_MD_TYPE=$_md_type
_MODULE_NAME=$_module_name
Expand All @@ -353,6 +354,7 @@ elseif (CYTHON_PIE_APP)
add_library(${FRAME_NAME} SHARED ${FILES_NEED_COMPILE}
${ANALYTICAL_ENGINE_FRAME_DIR}/cython_pie_app_frame.cc)
target_compile_definitions(${FRAME_NAME} PRIVATE _OID_TYPE=$_oid_type
_VID_TYPE=$_vid_type
_VD_TYPE=$_vd_type
_MD_TYPE=$_md_type
_MODULE_NAME=$_module_name
Expand Down
17 changes: 11 additions & 6 deletions coordinator/gscoordinator/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ def get_app_sha256(attr, java_class_path: str):
java_jar_path,
java_app_class,
) = _codegen_app_info(attr, DEFAULT_GS_CONFIG_FILE, java_class_path)
graph_header, graph_type, _ = _codegen_graph_info(attr)
graph_header, graph_type, _, _ = _codegen_graph_info(attr)
logger.info(
"app type: %s (%s), graph type: %s (%s)",
app_class,
Expand Down Expand Up @@ -241,7 +241,7 @@ def get_app_sha256(attr, java_class_path: str):


def get_graph_sha256(attr):
_, graph_class, _ = _codegen_graph_info(attr)
_, graph_class, _, _ = _codegen_graph_info(attr)
return hashlib.sha256(graph_class.encode("utf-8", errors="ignore")).hexdigest()


Expand Down Expand Up @@ -445,7 +445,7 @@ def compile_app(
str(java_app_class),
)

graph_header, graph_type, graph_oid_type = _codegen_graph_info(attr)
graph_header, graph_type, graph_oid_type, graph_vid_type = _codegen_graph_info(attr)
if app_type == "java_pie":
logger.info(
"Check consistent between java app %s and graph %s",
Expand Down Expand Up @@ -556,6 +556,7 @@ def compile_app(
_analytical_engine_home=ANALYTICAL_ENGINE_HOME,
_frame_name=library_name,
_oid_type=graph_oid_type,
_vid_type=graph_vid_type,
_vd_type=vd_type,
_md_type=md_type,
_graph_type=graph_type,
Expand Down Expand Up @@ -599,7 +600,7 @@ def compile_graph_frame(
None: for consistency with compile_app.
"""
logger.info("Building graph library ...")
_, graph_class, _ = _codegen_graph_info(attr)
_, graph_class, _, _ = _codegen_graph_info(attr)

library_dir = os.path.join(workspace, library_name)
os.makedirs(library_dir, exist_ok=True)
Expand Down Expand Up @@ -782,7 +783,11 @@ def _pre_process_for_bind_app_op(op, op_result_pool, key_to_op, **kwargs):
)
)
op.attr[types_pb2.VID_TYPE].CopyFrom(
utils.s_to_attr(utils.data_type_to_cpp(vy_info.vid_type))
utils.s_to_attr(
utils.normalize_data_type_str(
utils.data_type_to_cpp(vy_info.vid_type)
)
)
)
op.attr[types_pb2.V_DATA_TYPE].CopyFrom(
utils.s_to_attr(utils.data_type_to_cpp(vy_info.vdata_type))
Expand Down Expand Up @@ -1735,7 +1740,7 @@ def compact_edges():
raise ValueError(
f"Unknown graph type: {graph_def_pb2.GraphTypePb.Name(graph_type)}"
)
return graph_header, graph_fqn, oid_type()
return graph_header, graph_fqn, oid_type(), vid_type()


def create_single_op_dag(op_type, config=None):
Expand Down
17 changes: 13 additions & 4 deletions docs/analytical_engine/performance_tuning.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ its property graphs. Basically, the `ArrowFragment` has the following members:
first the neighbor vertex id and the second is the index points to the
corresponding edge table.

By default, the type of `neighbor_vertex_id` is `uint64_t` and the type of
`edge_table_index` is `size_t`.
By default, the type of `neighbor_vertex_id` is `uint64_t` or `uint32_t` and
the type of `edge_table_index` is `size_t`.

The size of the `indptr` array is `num_edges`.

Expand All @@ -70,8 +70,8 @@ its property graphs. Basically, the `ArrowFragment` has the following members:
first the neighbor vertex id and the second is the index points to the
corresponding edge table.

By default, the type of `neighbor_vertex_id` is `uint64_t` and the type of
`edge_table_index` is `size_t`.
By default, the type of `neighbor_vertex_id` is `uint64_t` or `uint32_t` and
the type of `edge_table_index` is `size_t`.

The size of the `indptr` array is `num_edges`.

Expand Down Expand Up @@ -152,6 +152,15 @@ footprint as follows:

- Optimizing topologies:

- GraphScope uses `uint64_t` as the `VID_T` (internal vertex id) to support large-scale
graphs. However, from above analysis, the type of `VID_T` is one of the key factors
that affects the memory footprint of the topology part.

If you are sure your graph is fairly small (less than `10^8` of vertices, the absolute
value depends on number of labels and number of partitions), you can use `int32_t`
as the `VID_T` to optimize the memory usage, by `vid_type="int32_t"` option in
`graphscope.g()` and `graphscope.load_from()`.s

- GraphScope supports options `compact_edges=True` in `graphscope.g()` and `graphscope.load_from()`
to compact the `ie_lists` and `oe_lists` arrays using delta and varint encoding. Such compression
can half the memory footprint of the topology part, but has overhead in computation during
Expand Down
24 changes: 24 additions & 0 deletions python/graphscope/client/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -1251,13 +1251,34 @@ def g(
self,
incoming_data=None,
oid_type="int64",
vid_type="uint64",
directed=True,
generate_eid=True,
retain_oid=True,
vertex_map="global",
compact_edges=False,
use_perfect_hash=False,
) -> Union[Graph, GraphDAGNode]:
"""Construct a GraphScope graph object on the default session.
It will launch and set a session to default when there is no default session found.
See params detail in :class:`graphscope.framework.graph.GraphDAGNode`
Returns:
:class:`graphscope.framework.graph.GraphDAGNode`: Evaluated in eager mode.
Examples:
.. code:: python
>>> import graphscope
>>> g = graphscope.g()
>>> import graphscope
>>> sess = graphscope.session()
>>> g = sess.g() # creating graph on the session "sess"
"""
if (
isinstance(incoming_data, vineyard.ObjectID)
and repr(incoming_data) in self._vineyard_object_mapping_table
Expand All @@ -1270,6 +1291,7 @@ def g(
self,
incoming_data,
oid_type,
vid_type,
directed,
generate_eid,
retain_oid,
Expand Down Expand Up @@ -1698,6 +1720,7 @@ def get_controller(self, default):
def g(
incoming_data=None,
oid_type="int64",
vid_type="uint64",
directed=True,
generate_eid=True,
retain_oid=True,
Expand Down Expand Up @@ -1729,6 +1752,7 @@ def g(
return get_default_session().g(
incoming_data,
oid_type,
vid_type,
directed,
generate_eid,
retain_oid,
Expand Down
6 changes: 3 additions & 3 deletions python/graphscope/framework/dag_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,12 +203,12 @@ def add_labels_to_graph(graph, loader_op):
types_pb2.GRAPH_TYPE: utils.graph_type_to_attr(graph._graph_type),
types_pb2.DIRECTED: utils.b_to_attr(graph._directed),
types_pb2.OID_TYPE: utils.s_to_attr(graph._oid_type),
types_pb2.VID_TYPE: utils.s_to_attr(graph._vid_type),
types_pb2.GENERATE_EID: utils.b_to_attr(graph._generate_eid),
types_pb2.RETAIN_OID: utils.b_to_attr(graph._retain_oid),
types_pb2.VERTEX_MAP_TYPE: utils.i_to_attr(graph._vertex_map),
types_pb2.COMPACT_EDGES: utils.b_to_attr(graph._compact_edges),
types_pb2.USE_PERFECT_HASH: utils.b_to_attr(graph._use_perfect_hash),
types_pb2.VID_TYPE: utils.s_to_attr("uint64_t"),
types_pb2.IS_FROM_VINEYARD_ID: utils.b_to_attr(False),
types_pb2.IS_FROM_GAR: utils.b_to_attr(False),
}
Expand Down Expand Up @@ -250,12 +250,12 @@ def consolidate_columns(
types_pb2.GRAPH_TYPE: utils.graph_type_to_attr(graph._graph_type),
types_pb2.DIRECTED: utils.b_to_attr(graph._directed),
types_pb2.OID_TYPE: utils.s_to_attr(graph._oid_type),
types_pb2.VID_TYPE: utils.s_to_attr(graph._vid_type),
types_pb2.GENERATE_EID: utils.b_to_attr(graph._generate_eid),
types_pb2.RETAIN_OID: utils.b_to_attr(graph._retain_oid),
types_pb2.VERTEX_MAP_TYPE: utils.i_to_attr(graph._vertex_map),
types_pb2.COMPACT_EDGES: utils.b_to_attr(graph._compact_edges),
types_pb2.USE_PERFECT_HASH: utils.b_to_attr(graph._use_perfect_hash),
types_pb2.VID_TYPE: utils.s_to_attr("uint64_t"),
types_pb2.IS_FROM_VINEYARD_ID: utils.b_to_attr(False),
types_pb2.IS_FROM_GAR: utils.b_to_attr(False),
types_pb2.CONSOLIDATE_COLUMNS_LABEL: utils.s_to_attr(label),
Expand Down Expand Up @@ -1094,7 +1094,7 @@ def archive_graph(graph, path):
config = {
types_pb2.GRAPH_TYPE: utils.graph_type_to_attr(graph._graph_type),
types_pb2.OID_TYPE: utils.s_to_attr(graph._oid_type),
types_pb2.VID_TYPE: utils.s_to_attr("uint64_t"),
types_pb2.VID_TYPE: utils.s_to_attr(graph._vid_type),
types_pb2.VERTEX_MAP_TYPE: utils.i_to_attr(graph._vertex_map),
types_pb2.COMPACT_EDGES: utils.b_to_attr(graph._compact_edges),
types_pb2.USE_PERFECT_HASH: utils.b_to_attr(graph._use_perfect_hash),
Expand Down
Loading

0 comments on commit 33576bf

Please sign in to comment.