From bf1fef4d122ae7e072f4746958e84d0c17101f73 Mon Sep 17 00:00:00 2001 From: shirly121 Date: Mon, 10 Jul 2023 20:39:42 +0800 Subject: [PATCH 1/7] [GIE Doc] refine docs for cypher --- .../templates/frontend/statefulset.yaml | 6 +++++- charts/gie-standalone/templates/frontend/svc.yaml | 11 +++++++++++ charts/gie-standalone/values.yaml | 4 ++++ docs/interactive_engine/deployment.md | 2 +- docs/interactive_engine/dev_and_test.md | 11 +++++++---- docs/overview/getting_started.md | 10 +++++++--- docs/overview/graph_interactive_workloads.md | 13 ++++++++++++- interactive_engine/compiler/set_properties.sh | 6 +++++- 8 files changed, 52 insertions(+), 11 deletions(-) diff --git a/charts/gie-standalone/templates/frontend/statefulset.yaml b/charts/gie-standalone/templates/frontend/statefulset.yaml index 9303b274ec40..333722fa5479 100644 --- a/charts/gie-standalone/templates/frontend/statefulset.yaml +++ b/charts/gie-standalone/templates/frontend/statefulset.yaml @@ -81,7 +81,7 @@ spec: done $GRAPHSCOPE_HOME/bin/giectl start_frontend ${GRAPHSCOPE_RUNTIME} ${object_id} \ - $json_file $runtime_hosts $GREMLIN_SERVER_PORT $EXTRA_CONFIG + $json_file $runtime_hosts $GREMLIN_SERVER_PORT $CYPHER_SERVER_PORT $EXTRA_CONFIG exit_code=$? while [ $exit_code -eq 0 ] @@ -103,6 +103,8 @@ spec: value: {{ .Values.executor.service.gaiaRpc | quote }} - name: GREMLIN_SERVER_PORT value: {{ .Values.frontend.service.gremlinPort | quote }} + - name: CYPHER_SERVER_PORT + value: {{ .Values.frontend.service.cypherPort | quote }} - name: DNS_NAME_PREFIX_STORE value: {{ $storeFullname }}-{}.{{ $storeFullname }}-headless.{{ $releaseNamespace }}.svc.{{ $clusterDomain }} - name: SERVERSSIZE @@ -124,6 +126,8 @@ spec: ports: - name: gremlin containerPort: {{ .Values.frontend.service.gremlinPort }} + - name: cypher + containerPort: {{ .Values.frontend.service.cypherPort }} {{- if .Values.frontend.readinessProbe.enabled }} readinessProbe: tcpSocket: diff --git a/charts/gie-standalone/templates/frontend/svc.yaml b/charts/gie-standalone/templates/frontend/svc.yaml index d2d9a8ec95f3..710c0391f181 100644 --- a/charts/gie-standalone/templates/frontend/svc.yaml +++ b/charts/gie-standalone/templates/frontend/svc.yaml @@ -36,5 +36,16 @@ spec: nodePort: null {{- end }} {{- end }} + - name: cypher + port: {{ .Values.frontend.service.cypherPort }} + protocol: TCP + targetPort: cypher + {{- if and (or (eq .Values.frontend.service.type "NodePort") (eq .Values.frontend.service.type "LoadBalancer")) (not (empty .Values.frontend.service.nodePorts.cypher)) }} + {{- if (not (empty .Values.frontend.service.nodePorts.cypher)) }} + nodePort: {{ .Values.frontend.service.nodePorts.cypher }} + {{- else if eq .Values.frontend.service.type "ClusterIP" }} + nodePort: null + {{- end }} + {{- end }} selector: {{ include "graphscope-store.selectorLabels" . | nindent 4 }} app.kubernetes.io/component: frontend \ No newline at end of file diff --git a/charts/gie-standalone/values.yaml b/charts/gie-standalone/values.yaml index f7380a1a3cba..abe69f885c54 100644 --- a/charts/gie-standalone/values.yaml +++ b/charts/gie-standalone/values.yaml @@ -363,12 +363,16 @@ frontend: ## gremlinPort: 8182 + ## Cypher server port + cypherPort: 7687 + ## Specify the nodePort value for the LoadBalancer and NodePort service types. ## ref: https://kubernetes.io/docs/concepts/services-networking/service/#type-nodeport ## nodePorts: service: "" gremlin: "" + cypher: "" ## Service clusterIP ## # clusterIP: None diff --git a/docs/interactive_engine/deployment.md b/docs/interactive_engine/deployment.md index cde2452a519f..5736c28d9d38 100644 --- a/docs/interactive_engine/deployment.md +++ b/docs/interactive_engine/deployment.md @@ -74,7 +74,7 @@ deployment and management of applications. To deploy GIE standalone using Helm, kubectl describe svc [YOUR_RELEASE_NAME]-gie-standalone-frontend \ | grep "Endpoints:" | awk -F' ' '{print $2}' ``` - You should see the GIE Frontend service endpoint as `:`. + You should see two exposed endpoints for GIE Frontend service, one is `:` for gremlin queries, another is `:` for cypher queries. - Connect to the GIE frontend service using the Tinkerpop's official SDKs or Gremlin console, which can be found [here](./tinkerpop_gremlin.md). diff --git a/docs/interactive_engine/dev_and_test.md b/docs/interactive_engine/dev_and_test.md index d94ed78ac288..7385f90468d1 100644 --- a/docs/interactive_engine/dev_and_test.md +++ b/docs/interactive_engine/dev_and_test.md @@ -117,8 +117,11 @@ pegasus.hosts = localhost:1234 # graph schema path graph.schema = /tmp/.json -## Frontend Config -frontend.service.port = 8182 +## Gremlin Server Port +gremlin.server.port = 8182 + +## Bolt Server Port +neo4j.bolt.server.port = 7687 # disable authentication if username or password is not set # auth.username = default @@ -131,9 +134,9 @@ java -cp ".:$GIE_TEST_HOME/lib/*" -Djna.library.path=$GIE_TEST_HOME/lib com.alib ``` With the frontend service, you can open the gremlin console and set the endpoint to -`localhost:8182`, as given [here](./tinkerpop_gremlin.md#gremlin-console). +`localhost:8182`, as given [here](./tinkerpop_gremlin.md#gremlin-console). Similarly, you can open the cypher-shell and set the url to `neo4j://localhost:7687` by using `-a` option. -7. Kill the services of `vineyardd`, `gaia_executor` and `frontend`: +1. Kill the services of `vineyardd`, `gaia_executor` and `frontend`: ``` pkill -f vineyardd pkill -f gaia_executor diff --git a/docs/overview/getting_started.md b/docs/overview/getting_started.md index fbfe9b693481..62f92f9e00b4 100644 --- a/docs/overview/getting_started.md +++ b/docs/overview/getting_started.md @@ -238,7 +238,7 @@ print(ret.to_dataframe(selector={'id': 'v.id', 'distance': 'r'}) ## Graph Interactive Query Quick Start With the `graphscope` package already installed, you can effortlessly engage with a graph on your local machine. -You simply need to create the `gremlin` instance to serve as the conduit for submitting all Gremlin queries. +You simply need to create the `interactive` instance to serve as the conduit for submitting all Gremlin or Cypher queries. ````{dropdown} Example: Run Interactive Queries in GraphScope ```python @@ -252,14 +252,18 @@ gs.set_option(show_log=True) #(modern graph is an example property graph for Gremlin queries given by Apache at https://tinkerpop.apache.org/docs/current/tutorials/getting-started/) graph = load_modern_graph() -# Hereafter, you can use the `graph` object to create an `gremlin` query session -g = gs.gremlin(graph) +# Hereafter, you can use the `graph` object to create an `interactive` query session, which will start one Gremlin service and one Cypher service simultaneously on the backend. +g = gs.interactive(graph) # then `execute` any supported gremlin query. q1 = g.execute('g.V().count()') print(q1.all().result()) # should print [6] q2 = g.execute('g.V().hasLabel(\'person\')') print(q2.all().result()) # should print [[v[2], v[3], v[0], v[1]]] + +# or `execute` any supported cypher query. +q3 = g.execute("MATCH (n:person) RETURN count(n)", lang="cypher", routing_=RoutingControl.READ) +print(q3.records[0][0]) # should print 6 ``` ```` diff --git a/docs/overview/graph_interactive_workloads.md b/docs/overview/graph_interactive_workloads.md index 448d3b8dd52a..ee635363f26e 100644 --- a/docs/overview/graph_interactive_workloads.md +++ b/docs/overview/graph_interactive_workloads.md @@ -9,7 +9,7 @@ Graph interactive workloads primarily focus on exploring complex graph structure all occurrences (or instances) of the pattern in the graph. Pattern matching often involves relational operations to project, order and group the matched instances. In GraphScope, the Graph Interactive Engine (GIE) has been developed to handle such interactive workloads, -which provides widely used query languages, such as Gremlin, that allow users to easily +which provides widely used query languages, such as Gremlin or Cypher, that allow users to easily express both graph traversal and pattern matching queries. These queries will be executed with massive parallelism in a cluster of machines, providing efficient and scalable solutions to graph interactive workloads. @@ -87,3 +87,14 @@ g.V().match( The pattern matching query is declarative in the sense that users only describes the pattern using the `match()` step, while the engine determine how to execute the query (i.e. the execution plan) at runtime according to a pre-defined cost model. For example, a [worst-case optimal](https://vldb.org/pvldb/vol12/p1692-mhedhbi.pdf) execution plan may first compute the matches of `v1` and `v2`, and then intersect the neighbors of `v1` and `v2` as the matches of `v3`. +## Neo4j and Cypher +Neo4j is a popular graph database management system known for its native graph processing capabilities. It provides an efficient and scalable solution for storing, querying, and analyzing graph data. One of the key components of Neo4j is the query language Cypher, which is specifically designed for working with graph data. We have fully embraced the power of Neo4j by implementing essential and impactful operators in Cypher, which enables users to leverage the expressive capabilities of Cypher for querying and manipulating graph data. Additionally, we have integrated Neo4j's Bolt server into our system, allowing Cypher users to submit their queries using the open SDK. As a result, Cypher users can easily get started with GIE through the existing [Neo4j ecosystem](../interactive_engine/neo4j_eco.md), including the language wrappers of Python and Cypher-Shell. + +### Pattern Matching + The `MATCH` operator in Cypher provides a declarative syntax that allows you to express graph patterns in a concise and intuitive manner. The pattern-based approach aligns well with the structure of graph data, making it easier to understand and write queries. This helps both beginners and experienced users to quickly grasp and work with complex graph patterns. Moreover, The `MATCH` operator allows you to combine multiple patterns, optional patterns, and logical operators to create complex queries, which empowers you to express complex relationships and conditions within a single query. It can be written in Cypher for the above `Triangle` example: +```bash +Match (v1)-[:Knows]-(v2), + (v1)-[:Purchases]->(v3), + (v2)-[:Purchases]->(v3) +Return DISTINCT v1, v2, v3; +``` diff --git a/interactive_engine/compiler/set_properties.sh b/interactive_engine/compiler/set_properties.sh index 0933f11479ab..2e176b0f6773 100755 --- a/interactive_engine/compiler/set_properties.sh +++ b/interactive_engine/compiler/set_properties.sh @@ -26,6 +26,10 @@ hosts="pegasus.hosts: $DNS_NAME_PREFIX_STORE:$GAIA_RPC_PORT"; hosts="${hosts/"{}"/0}"; +gremlin_server_port="gremlin.server.port: $GREMLIN_SERVER_PORT"; + +cypher_server_port="neo4j.bolt.server.port: $CYPHER_SERVER_PORT"; + count=1; while (($count<$SERVERSSIZE)) do @@ -37,6 +41,6 @@ done graph_schema="graph.schema: $GRAPH_SCHEMA" -properties="$worker_num\n$timeout\n$batch_size\n$output_capacity\n$hosts\n$server_num\n$graph_schema" +properties="$worker_num\n$timeout\n$batch_size\n$output_capacity\n$hosts\n$server_num\n$graph_schema\n$gremlin_server_port\n$cypher_server_port" echo -e $properties > ./conf/ir.compiler.properties From 0967b2732f7aa39a15e2fd011781838a0b83ea99 Mon Sep 17 00:00:00 2001 From: shirly121 Date: Mon, 10 Jul 2023 20:43:00 +0800 Subject: [PATCH 2/7] add sleep for debug --- .github/workflows/k8s-ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/k8s-ci.yml b/.github/workflows/k8s-ci.yml index 030f5d067bd7..2adb8b83b005 100644 --- a/.github/workflows/k8s-ci.yml +++ b/.github/workflows/k8s-ci.yml @@ -702,6 +702,7 @@ jobs: cd ${GITHUB_WORKSPACE}/charts # create local persistent volume which contains graph data for test kubectl apply -f ./gie-standalone/tools/pvc.yaml + sleep 80000s # create gie instance (compiler & executor & exp storage) helm install test ./gie-standalone \ --set frontend.image.repository=graphscope/interactive-experimental \ From f4f57cab06233bdb0536044a21616b0377800b90 Mon Sep 17 00:00:00 2001 From: shirly121 Date: Tue, 11 Jul 2023 13:05:02 +0800 Subject: [PATCH 3/7] [GIE Doc] update shell to get endpoints of frontend service --- .github/workflows/k8s-ci.yml | 1 - docs/interactive_engine/getting_started.md | 2 +- interactive_engine/compiler/ir_k8s_failover_ci.sh | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/k8s-ci.yml b/.github/workflows/k8s-ci.yml index 2adb8b83b005..030f5d067bd7 100644 --- a/.github/workflows/k8s-ci.yml +++ b/.github/workflows/k8s-ci.yml @@ -702,7 +702,6 @@ jobs: cd ${GITHUB_WORKSPACE}/charts # create local persistent volume which contains graph data for test kubectl apply -f ./gie-standalone/tools/pvc.yaml - sleep 80000s # create gie instance (compiler & executor & exp storage) helm install test ./gie-standalone \ --set frontend.image.repository=graphscope/interactive-experimental \ diff --git a/docs/interactive_engine/getting_started.md b/docs/interactive_engine/getting_started.md index 62052aaf39d6..a33ee7bbca70 100644 --- a/docs/interactive_engine/getting_started.md +++ b/docs/interactive_engine/getting_started.md @@ -102,7 +102,7 @@ You could pass additional key-value pairs to customize the startup configuration ```python # Set the timeout value to 10 min -g = gs.gremlin(graph, params={'pegasus.timeout': 600000}) +g = gs.gremlin(graph, params={'query.execution.timeout.ms': 600000}) ``` ## What's the Next diff --git a/interactive_engine/compiler/ir_k8s_failover_ci.sh b/interactive_engine/compiler/ir_k8s_failover_ci.sh index 99c83c6a1dfb..1b73707935d9 100755 --- a/interactive_engine/compiler/ir_k8s_failover_ci.sh +++ b/interactive_engine/compiler/ir_k8s_failover_ci.sh @@ -48,7 +48,7 @@ wait_role_pods_to_run store ${store_total} sleep 5 -node_port=$(kubectl --namespace=${namespace} get svc ${role_prefix}-frontend -o go-template='{{range.spec.ports}}{{if .nodePort}}{{.nodePort}}{{"\n"}}{{end}}{{end}}') +node_port=$(kubectl --namespace=${namespace} get svc ${role_prefix}-frontend -o go-template='{{range.spec.ports}}{{if .nodePort}}{{.nodePort}}{{"\n"}}{{end}}{{end}}' | head -1) hostname=$(minikube ip) python3 ./submit_query.py $hostname:${node_port} From b2256b11c1c6f2689609c56080debf6db5d5dd42 Mon Sep 17 00:00:00 2001 From: shirly121 Date: Tue, 11 Jul 2023 13:38:31 +0800 Subject: [PATCH 4/7] [GIE Doc] add and fix links in gie doc --- docs/interactive_engine/deployment.md | 7 ++++--- docs/interactive_engine/dev_and_test.md | 2 +- docs/interactive_engine/neo4j/cypher_sdk.md | 3 ++- docs/interactive_engine/tinkerpop/tinkerpop_gremlin.md | 4 ++-- docs/overview/graph_interactive_workloads.md | 2 +- 5 files changed, 10 insertions(+), 8 deletions(-) diff --git a/docs/interactive_engine/deployment.md b/docs/interactive_engine/deployment.md index 5736c28d9d38..d4d401ab52ae 100644 --- a/docs/interactive_engine/deployment.md +++ b/docs/interactive_engine/deployment.md @@ -74,10 +74,11 @@ deployment and management of applications. To deploy GIE standalone using Helm, kubectl describe svc [YOUR_RELEASE_NAME]-gie-standalone-frontend \ | grep "Endpoints:" | awk -F' ' '{print $2}' ``` - You should see two exposed endpoints for GIE Frontend service, one is `:` for gremlin queries, another is `:` for cypher queries. + You should see two exposed endpoints for GIE Frontend service, one is `:` for gremlin querying, another is `:` for cypher querying. -- Connect to the GIE frontend service using the Tinkerpop's official SDKs or Gremlin console, which -can be found [here](./tinkerpop_gremlin.md). +- Connect to the GIE frontend service by the following two ways: + 1. using the Tinkerpop's official SDKs or Gremlin console, which can be found [here](./tinkerpop/tinkerpop_gremlin.md). + 2. using the Neo4j's official SDKs or Cypher-Shell, which can be found [here](./neo4j/cypher_sdk.md). ## Remove the GIE Service ```bash diff --git a/docs/interactive_engine/dev_and_test.md b/docs/interactive_engine/dev_and_test.md index 7385f90468d1..306a2e2292be 100644 --- a/docs/interactive_engine/dev_and_test.md +++ b/docs/interactive_engine/dev_and_test.md @@ -134,7 +134,7 @@ java -cp ".:$GIE_TEST_HOME/lib/*" -Djna.library.path=$GIE_TEST_HOME/lib com.alib ``` With the frontend service, you can open the gremlin console and set the endpoint to -`localhost:8182`, as given [here](./tinkerpop_gremlin.md#gremlin-console). Similarly, you can open the cypher-shell and set the url to `neo4j://localhost:7687` by using `-a` option. +`localhost:8182`, as given [here](./tinkerpop/tinkerpop_gremlin.md#connecting-via-gremlin-console). Similarly, you can open the cypher-shell and set the url to `neo4j://localhost:7687` by using `-a` option, as given [here](./neo4j/cypher_sdk.md#connecting-via-cypher-shell). 1. Kill the services of `vineyardd`, `gaia_executor` and `frontend`: ``` diff --git a/docs/interactive_engine/neo4j/cypher_sdk.md b/docs/interactive_engine/neo4j/cypher_sdk.md index b96f5f9ac5ba..8d0c0e8bd404 100644 --- a/docs/interactive_engine/neo4j/cypher_sdk.md +++ b/docs/interactive_engine/neo4j/cypher_sdk.md @@ -3,7 +3,8 @@ This document will provide you with step-by-step guidance on how to connect your FrontEnd service, which offers functionalities similar to the official Tinkerpop service. Your first step is to obtain the Bolt Connector of GIE Frontend service: -- Follow the [instruction](./dev_and_test.md#manually-start-the-gie-services) while starting GIE on a local machine. +- Follow the [instruction](../deployment.md#deploy-your-first-gie-service) while deploying GIE in a K8s cluster, +- Follow the [instruction](../dev_and_test.md#manually-start-the-gie-services) while starting GIE on a local machine. ## Connecting via Python Driver diff --git a/docs/interactive_engine/tinkerpop/tinkerpop_gremlin.md b/docs/interactive_engine/tinkerpop/tinkerpop_gremlin.md index 12f70de30861..0991159520f3 100644 --- a/docs/interactive_engine/tinkerpop/tinkerpop_gremlin.md +++ b/docs/interactive_engine/tinkerpop/tinkerpop_gremlin.md @@ -3,8 +3,8 @@ This document will provide you with step-by-step guidance on how to connect your FrontEnd service, which offers functionalities similar to the official Tinkerpop service. Your first step is to obtain the endpoint of GIE Frontend service: -- Follow the [instruction](./deployment.md#deploy-your-first-gie-service) while deploying GIE in a K8s cluster, -- Follow the [instruction](./dev_and_test.md#manually-start-the-gie-services) while starting GIE on a local machine. +- Follow the [instruction](../deployment.md#deploy-your-first-gie-service) while deploying GIE in a K8s cluster, +- Follow the [instruction](../dev_and_test.md#manually-start-the-gie-services) while starting GIE on a local machine. ## Connecting via Python SDK diff --git a/docs/overview/graph_interactive_workloads.md b/docs/overview/graph_interactive_workloads.md index ee635363f26e..4622d83c07a4 100644 --- a/docs/overview/graph_interactive_workloads.md +++ b/docs/overview/graph_interactive_workloads.md @@ -88,7 +88,7 @@ g.V().match( The pattern matching query is declarative in the sense that users only describes the pattern using the `match()` step, while the engine determine how to execute the query (i.e. the execution plan) at runtime according to a pre-defined cost model. For example, a [worst-case optimal](https://vldb.org/pvldb/vol12/p1692-mhedhbi.pdf) execution plan may first compute the matches of `v1` and `v2`, and then intersect the neighbors of `v1` and `v2` as the matches of `v3`. ## Neo4j and Cypher -Neo4j is a popular graph database management system known for its native graph processing capabilities. It provides an efficient and scalable solution for storing, querying, and analyzing graph data. One of the key components of Neo4j is the query language Cypher, which is specifically designed for working with graph data. We have fully embraced the power of Neo4j by implementing essential and impactful operators in Cypher, which enables users to leverage the expressive capabilities of Cypher for querying and manipulating graph data. Additionally, we have integrated Neo4j's Bolt server into our system, allowing Cypher users to submit their queries using the open SDK. As a result, Cypher users can easily get started with GIE through the existing [Neo4j ecosystem](../interactive_engine/neo4j_eco.md), including the language wrappers of Python and Cypher-Shell. +[Neo4j](https://neo4j.com/docs/) is a popular graph database management system known for its native graph processing capabilities. It provides an efficient and scalable solution for storing, querying, and analyzing graph data. One of the key components of Neo4j is the query language [Cypher](https://neo4j.com/docs/cypher-manual/current/introduction/), which is specifically designed for working with graph data. We have fully embraced the power of Neo4j by implementing essential and impactful operators in Cypher, which enables users to leverage the expressive capabilities of Cypher for querying and manipulating graph data. Additionally, we have integrated Neo4j's Bolt server into our system, allowing Cypher users to submit their queries using the open SDK. As a result, Cypher users can easily get started with GIE through the existing [Neo4j ecosystem](../interactive_engine/neo4j_eco.md), including the language wrappers of Python and Cypher-Shell. ### Pattern Matching The `MATCH` operator in Cypher provides a declarative syntax that allows you to express graph patterns in a concise and intuitive manner. The pattern-based approach aligns well with the structure of graph data, making it easier to understand and write queries. This helps both beginners and experienced users to quickly grasp and work with complex graph patterns. Moreover, The `MATCH` operator allows you to combine multiple patterns, optional patterns, and logical operators to create complex queries, which empowers you to express complex relationships and conditions within a single query. It can be written in Cypher for the above `Triangle` example: From b03b3e75acff332e586421eef8b93d122f3ee402 Mon Sep 17 00:00:00 2001 From: shirly121 Date: Tue, 11 Jul 2023 19:11:05 +0800 Subject: [PATCH 5/7] [GIE Doc] add commands to get endpoints for gremlin and cypher --- docs/interactive_engine/deployment.md | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/docs/interactive_engine/deployment.md b/docs/interactive_engine/deployment.md index d4d401ab52ae..87c7468b6c68 100644 --- a/docs/interactive_engine/deployment.md +++ b/docs/interactive_engine/deployment.md @@ -70,11 +70,16 @@ deployment and management of applications. To deploy GIE standalone using Helm, You should see the `[YOUR_RELEASE_NAME]-gie-standalone-frontend-0` and `[YOUR_RELEASE_NAME]-gie-standalone-store-0` pods running. - Get the endpoint of the GIE Frontend service: - ```bash - kubectl describe svc [YOUR_RELEASE_NAME]-gie-standalone-frontend \ - | grep "Endpoints:" | awk -F' ' '{print $2}' - ``` - You should see two exposed endpoints for GIE Frontend service, one is `:` for gremlin querying, another is `:` for cypher querying. + 1. get `:` for gremlin querying + ```bash + kubectl describe svc [YOUR_RELEASE_NAME]-gie-standalone-frontend \ + | grep "Endpoints:" | awk -F' ' '{print $2}' | head -1 + ``` + 2. get `:` for cypher querying + ```bash + kubectl describe svc [YOUR_RELEASE_NAME]-gie-standalone-frontend \ + | grep "Endpoints:" | awk -F' ' '{print $2}' | tail -1 + ``` - Connect to the GIE frontend service by the following two ways: 1. using the Tinkerpop's official SDKs or Gremlin console, which can be found [here](./tinkerpop/tinkerpop_gremlin.md). From 765f51301ccd7aab7c22c8dbb29fb015f245d224 Mon Sep 17 00:00:00 2001 From: "longbin.lailb" Date: Wed, 12 Jul 2023 10:59:48 +0800 Subject: [PATCH 6/7] refine cypher docs --- docs/interactive_engine/dev_and_test.md | 2 +- docs/interactive_engine/getting_started.md | 28 ++++++++----------- docs/interactive_engine/neo4j/cypher_sdk.md | 19 ++++++++----- .../tinkerpop/tinkerpop_gremlin.md | 8 +++--- docs/overview/getting_started.md | 26 +++++++++++------ 5 files changed, 45 insertions(+), 38 deletions(-) diff --git a/docs/interactive_engine/dev_and_test.md b/docs/interactive_engine/dev_and_test.md index 306a2e2292be..f1a1929a3fa2 100644 --- a/docs/interactive_engine/dev_and_test.md +++ b/docs/interactive_engine/dev_and_test.md @@ -136,7 +136,7 @@ java -cp ".:$GIE_TEST_HOME/lib/*" -Djna.library.path=$GIE_TEST_HOME/lib com.alib With the frontend service, you can open the gremlin console and set the endpoint to `localhost:8182`, as given [here](./tinkerpop/tinkerpop_gremlin.md#connecting-via-gremlin-console). Similarly, you can open the cypher-shell and set the url to `neo4j://localhost:7687` by using `-a` option, as given [here](./neo4j/cypher_sdk.md#connecting-via-cypher-shell). -1. Kill the services of `vineyardd`, `gaia_executor` and `frontend`: +7. Kill the services of `vineyardd`, `gaia_executor` and `frontend`: ``` pkill -f vineyardd pkill -f gaia_executor diff --git a/docs/interactive_engine/getting_started.md b/docs/interactive_engine/getting_started.md index a33ee7bbca70..c2e29e07ad09 100644 --- a/docs/interactive_engine/getting_started.md +++ b/docs/interactive_engine/getting_started.md @@ -65,14 +65,18 @@ gs.set_option(show_log=True) # load the modern graph as example. graph = load_modern_graph() -# Hereafter, you can use the `graph` object to create an `gremlin` query session -g = gs.gremlin(graph) -# then `execute` any supported gremlin query. +# Hereafter, you can use the `graph` object to create an `interactive` query session +g = gs.interactive(graph) +# then `execute` any supported gremlin query (by default) q1 = g.execute('g.V().count()') print(q1.all().result()) # should print [6] q2 = g.execute('g.V().hasLabel(\'person\')') print(q2.all().result()) # should print [[v[2], v[3], v[0], v[1]]] + +# or `execute` any supported Cypher query, by passing `lang="cypher"` +q3 = g.execute("MATCH (n:person) RETURN count(n)", lang="cypher", routing_=RoutingControl.READ) +print(q3.records[0][0]) # should print 6 ``` You may see something like: @@ -87,31 +91,21 @@ You may see something like: The number 6 is printed, which is the number of vertices in modern graph. -### Retrieve the gremlin client - -The `g` returned by `gs.gremlin()` is a wrapper around `Client` of `gremlinpython`, you could get the `Client` by - -```python -client = g.gremlin_client -print(client.submit('g.V()').all().result()) -``` - ### Customize Configurations for GIE instance You could pass additional key-value pairs to customize the startup configuration of GIE, for example: ```python # Set the timeout value to 10 min -g = gs.gremlin(graph, params={'query.execution.timeout.ms': 600000}) +g = gs.interactive(graph, params={'query.execution.timeout.ms': 600000}) ``` ## What's the Next -As shown in the above example, it is very easy to use GraphScope to interactively query a graph using the gremlin query language on your local machine. You may find more tutorials [here](https://tinkerpop.apache.org/docs/current/tutorials/getting-started/) for the basic Gremlin usage, in which most read-only queries can be seamlessly executed with the above `g.execute()` function. +As shown in the above example, it is very easy to use GraphScope to interactively query a graph using both the Gremlin and Cypher query language on your local machine. In addition to the above local-machine entr\'ee, we have prepared the following topics for your reference. -- GIE can handle much complex cases, for example, the complex LDBC - business intelligence workloads. [A walk-through tutorial is here!](./guide_and_examples) - GIE can be deployed in a distributed environment to process very large graph. [How to do that?](./deployment) -- GIE has supported a lot of standard Gremlin steps, together with many useful syntactic sugars. [Please look into the details!](./supported_gremlin_steps) +- GIE has been designed to integrate with the Tinkerpop ecosystem, with necessary extensions such as some syntactic sugars to facilitate the use of Gremlin. [Please look into the details!](./tinkerpop/tinkerpop_gremlin.md) +- - GIE has been designed to integrate with the Neo4j ecosystem. [Please look into the details!](./neo4j/cypher_sdk.md) - Want to learn more about the technique details of GIE. [This is the design and architecture of GIE!](./design_of_gie) diff --git a/docs/interactive_engine/neo4j/cypher_sdk.md b/docs/interactive_engine/neo4j/cypher_sdk.md index 8d0c0e8bd404..ea698275b352 100644 --- a/docs/interactive_engine/neo4j/cypher_sdk.md +++ b/docs/interactive_engine/neo4j/cypher_sdk.md @@ -1,10 +1,9 @@ # GIE for Cypher -This document will provide you with step-by-step guidance on how to connect your Cypher applications to the GIE's -FrontEnd service, which offers functionalities similar to the official Tinkerpop service. +We have implemented Neo4j's [Bolt](https://neo4j.com/docs/bolt/current/bolt/) protocol for you to connect your Neo4j applications to the GIE's Frontend service. -Your first step is to obtain the Bolt Connector of GIE Frontend service: -- Follow the [instruction](../deployment.md#deploy-your-first-gie-service) while deploying GIE in a K8s cluster, -- Follow the [instruction](../dev_and_test.md#manually-start-the-gie-services) while starting GIE on a local machine. +Your first step is to obtain the Cypher endpoint for the [Bolt](https://neo4j.com/docs/bolt/current/bolt/) connector +- Follow the [instruction](../deployment.md) while deploying GIE in a K8s cluster, +- Follow the [instruction](../dev_and_test.md) while starting GIE on a local machine. ## Connecting via Python Driver @@ -20,7 +19,7 @@ Then connect to the service and run queries: ```Python from neo4j import GraphDatabase, RoutingControl -URI = "neo4j://localhost:7687" # the bolt connector you've obtained +URI = "neo4j://localhost:7687" # neo4j:// + Cypher endpoint you've obtained AUTH = ("", "") # We have not implemented authentication yet def print_top_10(driver): @@ -36,6 +35,12 @@ with GraphDatabase.driver(URI, auth=AUTH) as driver: print_top_10(driver) ``` +````{hint} +A simpler option is to use the `interactive` object for submitting Cypher queries through +[GraphScope's python SDK](../getting_started.md), which is a wrapper that encompasses Neo4j's +Python Driver and will automatically acquire the endpoint. +```` + ## Connecting via Cypher-Shell 1. Download and extract `cypher-shell` @@ -43,7 +48,7 @@ with GraphDatabase.driver(URI, auth=AUTH) as driver: wget https://dist.neo4j.org/cypher-shell/cypher-shell-4.4.19.zip unzip cypher-shell-4.4.19.zip && cd cypher-shell ``` -2. Connect to the Bolt Connector +2. Connect to the Bolt connector with the Cypher endpoint you've obtained ```bash ./cypher-shell -a neo4j://localhost:7687 ``` diff --git a/docs/interactive_engine/tinkerpop/tinkerpop_gremlin.md b/docs/interactive_engine/tinkerpop/tinkerpop_gremlin.md index 0991159520f3..6ab83f1ade71 100644 --- a/docs/interactive_engine/tinkerpop/tinkerpop_gremlin.md +++ b/docs/interactive_engine/tinkerpop/tinkerpop_gremlin.md @@ -3,8 +3,8 @@ This document will provide you with step-by-step guidance on how to connect your FrontEnd service, which offers functionalities similar to the official Tinkerpop service. Your first step is to obtain the endpoint of GIE Frontend service: -- Follow the [instruction](../deployment.md#deploy-your-first-gie-service) while deploying GIE in a K8s cluster, -- Follow the [instruction](../dev_and_test.md#manually-start-the-gie-services) while starting GIE on a local machine. +- Follow the [instruction](../deployment.md) while deploying GIE in a K8s cluster, +- Follow the [instruction](../dev_and_test.md) while starting GIE on a local machine. ## Connecting via Python SDK @@ -35,8 +35,8 @@ Then connect to the service and run queries: ``` ````{hint} -A simpler option is to use the `gremlin` object for submitting Gremlin queries through -[GraphScope's python SDK](./getting_started.md), which is a wrapper that encompasses Tinkerpop's +A simpler option is to use the `interactive` object for submitting Gremlin queries through +[GraphScope's python SDK](../getting_started.md), which is a wrapper that encompasses Tinkerpop's Gremlin-Python and will automatically acquire the endpoint. ```` diff --git a/docs/overview/getting_started.md b/docs/overview/getting_started.md index 62f92f9e00b4..a3e2cef35810 100644 --- a/docs/overview/getting_started.md +++ b/docs/overview/getting_started.md @@ -70,20 +70,28 @@ g = load_ogbn_mag() ``` ```` -Interactive queries enable users to explore, examine, and present graph data in a flexible and in-depth manner, allowing them to find specific information quickly. GraphScope utilizes Gremlin, a high-level graph traversal language, for interactive queries and offers efficient execution at scale. +Interactive queries enable users to explore, examine, and present graph data in a flexible and in-depth manner, allowing them to find specific information quickly. GraphScope enhances the presentation of interactive queries and ensures efficient execution of these queries on a large scale by providing support for the popular query languages [Gremlin](https://tinkerpop.apache.org/gremlin.html) and [Cypher](https://opencypher.org/). -````{dropdown} Run interactive queries with Gremlin +````{dropdown} Run interactive queries with Gremlin and Cypher In this example, we use graph traversal to count the number of papers two given authors have co-authored. To simplify the query, we assume the authors can be uniquely identified by ID 2 and 4307, respectively. ```python -# get the endpoint for submitting Gremlin queries on graph g. -interactive = graphscope.gremlin(g) +# get the endpoint for submitting interactive queries on graph g. +interactive = graphscope.interactive(g) -# count the number of papers two authors (with id 2 and 4307) have co-authored +# Gremlin query for counting the number of papers two authors (with id 2 and 4307) have co-authored papers = interactive.execute("g.V().has('author', 'id', 2).out('writes').where(__.in('writes').has('id', 4307)).count()").one() + +# Cypher query for counting the number of papers two authors (with id 2 and 4307) have co-authored +# Note that for Cypher query, the parameter of lang="cypher" is mandatory +papers = interactive.execute( \ + "MATCH (n1:author) -[:writes]->(p:paper) <-[:writes]-(n2:author) \ + WHERE n1.id = 2 AND n2.id = 4307 \ + RETURN count(p)", \ + lang="cypher", routing_=RoutingControl.READ) ``` ```` @@ -218,7 +226,7 @@ from graphscope.dataset.modern_graph import load_modern_graph gs.set_option(show_log=True) # load the modern graph as example. -#(modern graph is an example property graph for Gremlin queries given by Apache at https://tinkerpop.apache.org/docs/current/tutorials/getting-started/) +#(modern graph is an example property graph given by Apache at https://tinkerpop.apache.org/docs/current/tutorials/getting-started/) graph = load_modern_graph() # triggers label propagation algorithm(LPA) @@ -238,7 +246,7 @@ print(ret.to_dataframe(selector={'id': 'v.id', 'distance': 'r'}) ## Graph Interactive Query Quick Start With the `graphscope` package already installed, you can effortlessly engage with a graph on your local machine. -You simply need to create the `interactive` instance to serve as the conduit for submitting all Gremlin or Cypher queries. +You simply need to create the `interactive` instance to serve as the conduit for submitting Gremlin or Cypher queries. ````{dropdown} Example: Run Interactive Queries in GraphScope ```python @@ -249,7 +257,7 @@ from graphscope.dataset.modern_graph import load_modern_graph gs.set_option(show_log=True) # load the modern graph as example. -#(modern graph is an example property graph for Gremlin queries given by Apache at https://tinkerpop.apache.org/docs/current/tutorials/getting-started/) +#(modern graph is an example property graph given by Apache at https://tinkerpop.apache.org/docs/current/tutorials/getting-started/) graph = load_modern_graph() # Hereafter, you can use the `graph` object to create an `interactive` query session, which will start one Gremlin service and one Cypher service simultaneously on the backend. @@ -261,7 +269,7 @@ print(q1.all().result()) # should print [6] q2 = g.execute('g.V().hasLabel(\'person\')') print(q2.all().result()) # should print [[v[2], v[3], v[0], v[1]]] -# or `execute` any supported cypher query. +# or `execute` any supported Cypher query q3 = g.execute("MATCH (n:person) RETURN count(n)", lang="cypher", routing_=RoutingControl.READ) print(q3.records[0][0]) # should print 6 ``` From 86fc6acad01e43ca0e35d105bf25cb39d922a8f8 Mon Sep 17 00:00:00 2001 From: shirly121 Date: Wed, 12 Jul 2023 15:19:29 +0800 Subject: [PATCH 7/7] [GIE Doc] minor fix --- docs/overview/getting_started.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/overview/getting_started.md b/docs/overview/getting_started.md index a3e2cef35810..867e58bccf7a 100644 --- a/docs/overview/getting_started.md +++ b/docs/overview/getting_started.md @@ -88,9 +88,9 @@ papers = interactive.execute("g.V().has('author', 'id', 2).out('writes').where(_ # Cypher query for counting the number of papers two authors (with id 2 and 4307) have co-authored # Note that for Cypher query, the parameter of lang="cypher" is mandatory papers = interactive.execute( \ - "MATCH (n1:author) -[:writes]->(p:paper) <-[:writes]-(n2:author) \ + "MATCH (n1:author)-[:writes]->(p:paper)<-[:writes]-(n2:author) \ WHERE n1.id = 2 AND n2.id = 4307 \ - RETURN count(p)", \ + RETURN count(DISTINCT p)", \ lang="cypher", routing_=RoutingControl.READ) ``` ````