Skip to content

Commit

Permalink
Rewrite graph loading API. (#193)
Browse files Browse the repository at this point in the history
- Support build graph iteratively by `add_vertices` and `add_edges`, and partially support lazy loading until graph is used.

- Also add a constraint version of `remove_vertices` and `remove_edges`.

- Rewrite most test cases

- Update relevant notebooks and docs
  • Loading branch information
siyuan0322 committed Mar 22, 2021
1 parent 7a8f3f6 commit d180191
Show file tree
Hide file tree
Showing 59 changed files with 2,595 additions and 3,237 deletions.
55 changes: 22 additions & 33 deletions README-zh.md
Original file line number Diff line number Diff line change
Expand Up @@ -132,39 +132,28 @@ GraphScope 以属性图(property graph)建模图数据。属性图中,点
请下载数据并将其解压缩到本地的挂载目录(在本例中为`〜/test_data`)。

```python
g = sess.load_from(
vertices={
"paper": "/testingdata/ogbn_mag_small/paper.csv",
"author": "/testingdata/ogbn_mag_small/author.csv",
"institution": "/testingdata/ogbn_mag_small/institution.csv",
"field_of_study": "/testingdata/ogbn_mag_small/field_of_study.csv",
},
edges={
"affiliated": (
"/testingdata/ogbn_mag_small/author_affiliated_with_institution.csv",
[],
("src_id", "author"),
("dst_id", "institution"),
),
"cites": (
"/testingdata/ogbn_mag_small/paper_cites_paper.csv",
[],
("src_id", "paper"),
("dst_id", "paper"),
),
"hasTopic": (
"/testingdata/ogbn_mag_small/paper_has_topic_field_of_study.csv",
[],
("src_id", "paper"),
("dst_id", "field_of_study"),
),
"writes": (
"/testingdata/ogbn_mag_small/author_writes_paper.csv",
[],
("src_id", "author"),
("dst_id", "paper"),
),
},
g = graphscope.Graph(sess)
g = (
g.add_vertices("/testingdata/ogbn_mag_small/paper.csv", label="paper")
.add_vertices("/testingdata/ogbn_mag_small/author.csv", label="author")
.add_vertices("/testingdata/ogbn_mag_small/institution.csv", label="institution")
.add_vertices("/testingdata/ogbn_mag_small/field_of_study.csv", label="field_of_study")
.add_edges(
"/testingdata/ogbn_mag_small/author_affiliated_with_institution.csv",
label="affiliated", src_label="author", dst_label="institution",
)
.add_edges(
"/testingdata/ogbn_mag_small/paper_has_topic_field_of_study.csv",
label="hasTopic", src_label="paper", dst_label="field_of_study",
)
.add_edges(
"/testingdata/ogbn_mag_small/paper_cites_paper.csv",
label="cites", src_label="paper", dst_label="paper",
)
.add_edges(
"/testingdata/ogbn_mag_small/author_writes_paper.csv",
label="writes", src_label="author", dst_label="paper",
)
)
```

Expand Down
55 changes: 22 additions & 33 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -123,39 +123,28 @@ To load this graph to GraphScope, one may use the code below with the [data file


```python
g = sess.load_from(
vertices={
"paper": "/testingdata/ogbn_mag_small/paper.csv",
"author": "/testingdata/ogbn_mag_small/author.csv",
"institution": "/testingdata/ogbn_mag_small/institution.csv",
"field_of_study": "/testingdata/ogbn_mag_small/field_of_study.csv",
},
edges={
"affiliated": (
"/testingdata/ogbn_mag_small/author_affiliated_with_institution.csv",
[],
("src_id", "author"),
("dst_id", "institution"),
),
"cites": (
"/testingdata/ogbn_mag_small/paper_cites_paper.csv",
[],
("src_id", "paper"),
("dst_id", "paper"),
),
"hasTopic": (
"/testingdata/ogbn_mag_small/paper_has_topic_field_of_study.csv",
[],
("src_id", "paper"),
("dst_id", "field_of_study"),
),
"writes": (
"/testingdata/ogbn_mag_small/author_writes_paper.csv",
[],
("src_id", "author"),
("dst_id", "paper"),
),
},
g = graphscope.Graph(sess)
g = (
g.add_vertices("/testingdata/ogbn_mag_small/paper.csv", label="paper")
.add_vertices("/testingdata/ogbn_mag_small/author.csv", label="author")
.add_vertices("/testingdata/ogbn_mag_small/institution.csv", label="institution")
.add_vertices("/testingdata/ogbn_mag_small/field_of_study.csv", label="field_of_study")
.add_edges(
"/testingdata/ogbn_mag_small/author_affiliated_with_institution.csv",
label="affiliated", src_label="author", dst_label="institution",
)
.add_edges(
"/testingdata/ogbn_mag_small/paper_has_topic_field_of_study.csv",
label="hasTopic", src_label="paper", dst_label="field_of_study",
)
.add_edges(
"/testingdata/ogbn_mag_small/paper_cites_paper.csv",
label="cites", src_label="paper", dst_label="paper",
)
.add_edges(
"/testingdata/ogbn_mag_small/author_writes_paper.csv",
label="writes", src_label="author", dst_label="paper",
)
)
```

Expand Down
73 changes: 38 additions & 35 deletions demo/node_classification_on_citation.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -48,51 +48,54 @@
"outputs": [],
"source": [
"# Loading graph\n",
"\n",
"def load_ogbn_mag(sess, prefix):\n",
" \"\"\"Load ogbn_mag dataset as a ArrowProperty Graph.\n",
" \"\"\"Load ogbn_mag graph.\n",
" The ogbn-mag dataset is a heterogeneous network composed of a subset of the Microsoft Academic Graph (MAG).\n",
" See more details here:\n",
" https://ogb.stanford.edu/docs/nodeprop/#ogbn-mag\n",
"\n",
" Args:\n",
" sess (graphscope.Session): Load graph within the session.\n",
" sess (:class:`graphscope.Session`): Load graph within the session.\n",
" prefix (str): Data directory.\n",
" directed (bool, optional): Determine to load a directed or undirected graph.\n",
" Defaults to True.\n",
"\n",
" Returns:\n",
" graphscope.Graph: A Graph object which graph type is ArrowProperty\n",
" :class:`graphscope.Graph`: A Graph object which graph type is ArrowProperty\n",
" \"\"\"\n",
" vertices={\n",
" \"paper\": os.path.join(prefix, \"paper.csv\"),\n",
" \"author\": os.path.join(prefix, \"author.csv\"),\n",
" \"institution\": os.path.join(prefix, \"institution.csv\"),\n",
" \"field_of_study\": os.path.join(prefix, \"field_of_study.csv\"),\n",
" }\n",
" edges={\n",
" \"affiliated\": (\n",
" graph = Graph(sess)\n",
" graph = (\n",
" graph.add_vertices(os.path.join(prefix, \"paper.csv\"), \"paper\")\n",
" .add_vertices(os.path.join(prefix, \"author.csv\"), \"author\")\n",
" .add_vertices(os.path.join(prefix, \"institution.csv\"), \"institution\")\n",
" .add_vertices(os.path.join(prefix, \"field_of_study.csv\"), \"field_of_study\")\n",
" .add_edges(\n",
" os.path.join(prefix, \"author_affiliated_with_institution.csv\"),\n",
" [],\n",
" (\"src_id\", \"author\"),\n",
" (\"dst_id\", \"institution\"),\n",
" ),\n",
" \"cites\": (\n",
" os.path.join(prefix, \"paper_cites_paper.csv\"),\n",
" [],\n",
" (\"src_id\", \"paper\"),\n",
" (\"dst_id\", \"paper\"),\n",
" ),\n",
" \"hasTopic\": (\n",
" \"affiliated\",\n",
" src_label=\"author\",\n",
" dst_label=\"institution\",\n",
" )\n",
" .add_edges(\n",
" os.path.join(prefix, \"paper_has_topic_field_of_study.csv\"),\n",
" [],\n",
" (\"src_id\", \"paper\"),\n",
" (\"dst_id\", \"field_of_study\"),\n",
" ),\n",
" \"writes\": (\n",
" \"hasTopic\",\n",
" src_label=\"paper\",\n",
" dst_label=\"field_of_study\",\n",
" )\n",
" .add_edges(\n",
" os.path.join(prefix, \"paper_cites_paper.csv\"),\n",
" \"cites\",\n",
" src_label=\"paper\",\n",
" dst_label=\"paper\",\n",
" )\n",
" .add_edges(\n",
" os.path.join(prefix, \"author_writes_paper.csv\"),\n",
" [],\n",
" (\"src_id\", \"author\"),\n",
" (\"dst_id\", \"paper\"),\n",
" ),\n",
" }\n",
" return sess.load_from(edges, vertices)\n",
" \"writes\",\n",
" src_label=\"author\",\n",
" dst_label=\"paper\",\n",
" )\n",
" )\n",
"\n",
" return graph\n",
"\n",
"# load ogbn_mag graph\n",
"graph = load_ogbn_mag(sess, \"/testingdata/ogbn_mag_small\")"
Expand Down Expand Up @@ -721,4 +724,4 @@
},
"nbformat": 4,
"nbformat_minor": 4
}
}
4 changes: 2 additions & 2 deletions docs/analytics_engine.rst
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@ To run your own algorithms, you may trigger it in place where you defined it.
import graphscope
sess = graphscope.session()
g = sess.load_from("...")
g = graphscope.Graph(sess)
# load my algorithm
my_app = SSSP_Pregel()
Expand All @@ -327,7 +327,7 @@ Later, you can load your own algorithm from the gar package.
import graphscope
sess = graphscope.session()
g = sess.load_from("...")
g = graphscope.Session(sess)
# load my algorithm from a gar package
my_app = load_app('SSSP_Pregel', 'file:///var/graphscope/udf/my_sssp_pregel.gar')
Expand Down
63 changes: 30 additions & 33 deletions docs/getting_started.rst
Original file line number Diff line number Diff line change
Expand Up @@ -112,39 +112,36 @@ To load this graph to GraphScope, one may use the code below.

.. code:: python
g = sess.load_from(
vertices={
"paper": "paper.csv",
"author": "author.csv",
"institution": "institution.csv",
"field_of_study": "field_of_study.csv",
},
edges={
"affiliated": (
"author_affiliated_with_institution.csv",
[],
("src_id", "author"),
("dst_id", "institution"),
),
"cites": (
"paper_cites_paper.csv",
[],
("src_id", "paper"),
("dst_id", "paper"),
),
"hasTopic": (
"paper_has_topic_field_of_study.csv",
[],
("src_id", "paper"),
("dst_id", "field_of_study"),
),
"writes": (
"author_writes_paper.csv",
[],
("src_id", "author"),
("dst_id", "paper"),
),
}
g = graphscope.Graph(sess)
g = (
g.add_vertices("paper.csv", label="paper")
.add_vertices("author.csv", label="author")
.add_vertices("institution.csv", label="institution")
.add_vertices("field_of_study.csv", label="field_of_study")
.add_edges(
"author_affiliated_with_institution.csv",
label="affiliated",
src_label="author",
dst_label="institution",
)
.add_edges(
"paper_has_topic_field_of_study.csv",
label="hasTopic",
src_label="paper",
dst_label="field_of_study",
)
.add_edges(
"paper_cites_paper.csv",
label="cites",
src_label="paper",
dst_label="paper",
)
.add_edges(
"author_writes_paper.csv",
label="writes",
src_label="author",
dst_label="paper",
)
)
Alternatively, we provide a function to load this graph for convenience.
Expand Down
Loading

0 comments on commit d180191

Please sign in to comment.