In [1]:
import os
import logging
import nest_asyncio; nest_asyncio.apply()

import graphscope

logging.basicConfig(level=logging.INFO,stream=sys.stdout)

k8s_volumes = {
    "data": {
        "type": "hostPath",
        "field": {
          "path": os.path.expanduser("~/test_data/"),
          "type": "Directory"
        },
        "mounts": {
          "mountPath": "/testingdata"
        }
    }
}

In [2]:
# Create session

sess = graphscope.session(k8s_volumes=k8s_volumes)


In [3]:
# Loading graph

def load_ogbn_mag(sess, prefix):
    """Load ogbn_mag dataset as a ArrowProperty Graph.

    Args:
        sess (graphscope.Session): Load graph within the session.
        prefix (str): Data directory.

    Returns:
        graphscope.Graph: A Graph object which graph type is ArrowProperty
    """
    vertices={
        "paper": os.path.join(prefix, "paper.csv"),
        "author": os.path.join(prefix, "author.csv"),
        "institution": os.path.join(prefix, "institution.csv"),
        "field_of_study": os.path.join(prefix, "field_of_study.csv"),
    }
    edges={
        "affiliated": (
            os.path.join(prefix, "author_affiliated_with_institution.csv"),
            [],
            ("src_id", "author"),
            ("dst_id", "institution"),
        ),
        "cites": (
            os.path.join(prefix, "paper_cites_paper.csv"),
            [],
            ("src_id", "paper"),
            ("dst_id", "paper"),
        ),
        "hasTopic": (
            os.path.join(prefix, "paper_has_topic_field_of_study.csv"),
            [],
            ("src_id", "paper"),
            ("dst_id", "field_of_study"),
        ),
        "writes": (
            os.path.join(prefix, "author_writes_paper.csv"),
            [],
            ("src_id", "author"),
            ("dst_id", "paper"),
        ),
    }
    return sess.load_from(edges, vertices)


# load ogbn_mag graph
graph = load_ogbn_mag(sess, "/testingdata/ogbn_mag_small")

In [4]:
# Interactive query

# get the entrypoint for submitting Gremlin queries on graph g.
interactive = sess.gremlin(graph)

# count the number of papers two authors (with id 2 and 4307) have co-authored.
papers = interactive.execute("g.V().has('author', 'id', 2).out('writes').where(__.in('writes').has('id', 4307)).count()").one()
print("result", papers)


result [1]


In [5]:
# Graph analytics

# exact a subgraph of publication within a time range.
sub_graph = interactive.subgraph(
    "g.V().has('year', inside(2014, 2020)).outE('cites')"
)

# project the subgraph to simple graph by selecting papers and their citations.
simple_g = sub_graph.project_to_simple(v_label="paper", e_label="cites")

# compute the kcore and triangle-counting.
kc_result = graphscope.k_core(simple_g, k=5)
tc_result = graphscope.triangles(simple_g)

# add the results as new columns to the citation graph.
sub_graph = sub_graph.add_column(kc_result, {"kcore": "r"})
sub_graph = sub_graph.add_column(tc_result, {"tc": "r"})

In [6]:
# Graph neural networks (GNNs)

# define the features for learning
paper_features = []
for i in range(128):
    paper_features.append("feat_" + str(i))
paper_features.append("kcore")
paper_features.append("tc")

# launch a learning engine.
lg = sess.learning(sub_graph, nodes=[("paper", paper_features)],
                   edges=[("paper", "cites", "paper")],
                   gen_labels=[
                       ("train", "paper", 100, (0, 75)),
                       ("val", "paper", 100, (75, 85)),
                       ("test", "paper", 100, (85, 100))
                   ])

Instructions for updating:
non-resource variables are not supported in the long term


In [7]:
# define the training function.

from graphscope.learning.examples import GCN
from graphscope.learning.graphlearn.python.model.tf.trainer import LocalTFTrainer
from graphscope.learning.graphlearn.python.model.tf.optimizer import get_tf_optimizer

# supervised GCN.
def train(config, graph):
    def model_fn():
        return GCN(graph,
                    config["class_num"],
                    config["features_num"],
                    config["batch_size"],
                    val_batch_size=config["val_batch_size"],
                    test_batch_size=config["test_batch_size"],
                    categorical_attrs_desc=config["categorical_attrs_desc"],
                    hidden_dim=config["hidden_dim"],
                    in_drop_rate=config["in_drop_rate"],
                    neighs_num=config["neighs_num"],
                    hops_num=config["hops_num"],
                    node_type=config["node_type"],
                    edge_type=config["edge_type"],
                    full_graph_mode=config["full_graph_mode"])
    trainer = LocalTFTrainer(model_fn,
                             epoch=config["epoch"],
                             optimizer=get_tf_optimizer(
                             config["learning_algo"],
                             config["learning_rate"],
                             config["weight_decay"]))
    trainer.train_and_evaluate()

In [8]:

config = {"class_num": 349, # output dimension
            "features_num": 130, # 128 dimension + kcore + triangle count
            "batch_size": 500,
            "val_batch_size": 100,
            "test_batch_size":100,
            "categorical_attrs_desc": "",
            "hidden_dim": 256,
            "in_drop_rate": 0.5,
            "hops_num": 2,
            "neighs_num": [5, 10],
            "full_graph_mode": False,
            "agg_type": "gcn",  # mean, sum
            "learning_algo": "adam",
            "learning_rate": 0.0005,
            "weight_decay": 0.000005,
            "epoch": 20,
            "node_type": "paper",
            "edge_type": "cites"}

train(config, lg)

Instructions for updating:
This is a deprecated API that should only be used in TF 1 graph mode and legacy TF 2 graph mode available through `tf.compat.v1`. In all other situations -- namely, eager mode and inside `tf.function` -- you can consume dataset elements using `for elem in dataset: ...` or by explicitly creating iterator via `iterator = iter(dataset)` and fetching its elements via `values = next(iterator)`. Furthermore, this API is not available in TF 2. During the transition from TF 1 to TF 2 you can use `tf.compat.v1.data.make_initializable_iterator(dataset)` to create a TF 1 graph mode style iterator for a dataset created through TF 2 APIs. Note that this should be a transient state of your code base as there are in general no guarantees about the interoperability of TF 1 and TF 2 code.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
training...
Epoch 00, Iteration 0, Time(s) 2.3594, Loss 6.52750
Epoch 00, I

In [9]:
# close the session.
sess.close()